Merge 4c1c0b2 into 7f2456d

dnstwister · Dec 28, 2018 · c42cfb1 · c42cfb1
2 parents 7f2456d + 4c1c0b2
commit c42cfb1
Show file tree

Hide file tree

Showing 18 changed files with 1,335 additions and 74 deletions.
diff --git a/build/build_fed.py b/build/build_fed.py
@@ -37,6 +37,11 @@
         'jquery-1.11.3.min.js',
         'report.js',
     ),
+    'search.min.js': (
+        'jsonpipe.js',
+        'search-ui.js',
+        'search.js',
+    ),
     'analyse.min.js': (
         'jquery-1.11.3.min.js',
         'analyse.js',

diff --git a/dnstwister/api/__init__.py b/dnstwister/api/__init__.py
@@ -1,6 +1,7 @@
 """The analysis API endpoint."""
-import binascii
+import json
 import urlparse
+
 import whois as whois_mod
 
 import flask
@@ -163,3 +164,23 @@ def fuzz(hexdomain):
     payload = standard_api_values(domain, skip='fuzz')
     payload['fuzzy_domains'] = fuzz_payload
     return flask.jsonify(payload)
+
+
+@app.route('/fuzz_chunked/<hexdomain>')
+def fuzz_chunked(hexdomain):
+    """Return a chunked json fuzz based on jsonpipe by eBay."""
+    domain = tools.parse_domain(hexdomain)
+    if domain is None:
+        flask.abort(
+            400,
+            'Malformed domain or domain not represented in hexadecimal format.'
+        )
+
+    def generate():
+        for result in tools.fuzzy_domains_iter(domain):
+            yield json.dumps({
+                'd': result.domain,
+                'ed': tools.encode_domain(result.domain)
+            }) + '\n\n'
+
+    return flask.Response(generate())
diff --git a/dnstwister/dnstwist/dnstwist.py b/dnstwister/dnstwist/dnstwist.py
@@ -56,6 +56,30 @@ class InvalidDomain(Exception):
     pass
 
 
+class Result(object):
+
+    def __init__(self, fuzzer, domain):
+        self._fuzzer = fuzzer
+        self._domain = domain
+
+    @property
+    def fuzzer(self):
+        return self._fuzzer
+
+    @property
+    def domain(self):
+        return self._domain
+
+
+class ResultBuilder(object):
+
+    def __init__(self, tld):
+        self._tld = tld
+
+    def build(self, fuzzer, domain):
+        return Result(fuzzer, domain + '.' + self._tld)
+
+
 def is_valid_domain(domain):
     """Validate a domain - including unicode domains."""
     try:
@@ -150,7 +174,6 @@ def __filter_domains(self):
         idna.core.check_label = old_func
 
     def __bitsquatting(self):
-        result = []
         masks = [1, 2, 4, 8, 16, 32, 64, 128]
         for i in range(0, len(self.domain)):
             c = self.domain[i]
@@ -163,11 +186,9 @@ def __bitsquatting(self):
                 b = chr(ord(c) ^ masks[j])
                 o = ord(b)
                 if (o >= 48 and o <= 57) or (o >= 97 and o <= 122) or o == 45:
-                    result.append(self.domain[:i] + b + self.domain[i+1:])
-
-        return result
+                    yield self.domain[:i] + b + self.domain[i+1:]
 
-    def __homoglyph(self):
+    def __homoglyph(self, MAX=1000):
         glyphs = {
         'a': [u'à', u'á', u'â', u'ã', u'ä', u'å', u'ɑ', u'а', u'ạ', u'ǎ', u'ă', u'ȧ', u'ӓ'],
         'b': ['d', 'lb', 'ib', u'ʙ', u'Ь', u'b̔', u'ɓ', u'Б'],
@@ -197,7 +218,8 @@ def __homoglyph(self):
         'z': [u'ʐ', u'ż', u'ź', u'ʐ', u'ᴢ']
         }
 
-        result = set()
+        yielded = 0
+        seen = set()
 
         for ws in range(0, len(self.domain)):
             for i in range(0, (len(self.domain)-ws)+1):
@@ -210,106 +232,102 @@ def __homoglyph(self):
                         win_copy = win
                         for g in glyphs[c]:
                             win = win.replace(c, g)
-                            result.add(self.domain[:i] + win + self.domain[i+ws:])
+                            candidate = self.domain[:i] + win + self.domain[i+ws:]
+                            if candidate not in seen:
+                                seen.add(candidate)
+                                yield candidate
+                                yielded += 1
                             win = win_copy
 
                             # Very long domains have terrible complexity when
                             # ran through this algorithm.
-                            if len(result) >= 1000:
-                                return result
+                            if MAX is not None and yielded >= MAX:
+                                return
                     j += 1
 
-        return result
-
     def __hyphenation(self):
-        result = []
-
         for i in range(1, len(self.domain)):
-            result.append(self.domain[:i] + '-' + self.domain[i:])
-
-        return result
+            yield self.domain[:i] + '-' + self.domain[i:]
 
     def __insertion(self):
-        result = set()
+        seen = set()
 
         for i in range(1, len(self.domain)-1):
             for keys in self.keyboards:
                 if self.domain[i] in keys:
                     for c in keys[self.domain[i]]:
-                        result.add(self.domain[:i] + c + self.domain[i] + self.domain[i+1:])
-                        result.add(self.domain[:i] + self.domain[i] + c + self.domain[i+1:])
+                        first = self.domain[:i] + c + self.domain[i] + self.domain[i+1:]
+                        second = self.domain[:i] + self.domain[i] + c + self.domain[i+1:]
+
+                        if first not in seen:
+                            seen.add(first)
+                            yield first
 
-        return result
+                        if second not in seen:
+                            seen.add(second)
+                            yield second
 
     def __omission(self):
-        result = set()
+        seen = set()
 
         for i in range(0, len(self.domain)):
-            result.add(self.domain[:i] + self.domain[i+1:])
+            candidate = self.domain[:i] + self.domain[i+1:]
+            if candidate not in seen:
+                seen.add(candidate)
+                yield candidate
 
         n = re.sub(r'(.)\1+', r'\1', self.domain)
 
-        if n not in result and n != self.domain:
-            result.add(n)
-
-        return result
+        if n not in seen and n != self.domain:
+            yield n
 
     def __repetition(self):
-        result = set()
+        seen = set()
 
         for i in range(0, len(self.domain)):
             if self.domain[i].isalpha():
-                result.add(self.domain[:i] + self.domain[i] + self.domain[i] + self.domain[i+1:])
-
-        return result
+                candidate = self.domain[:i] + self.domain[i] + self.domain[i] + self.domain[i+1:]
+                if candidate not in seen:
+                    seen.add(candidate)
+                    yield candidate
 
     def __replacement(self):
-        result = set()
+        seen = set()
 
         for i in range(0, len(self.domain)):
             for keys in self.keyboards:
                 if self.domain[i] in keys:
                     for c in keys[self.domain[i]]:
-                        result.add(self.domain[:i] + c + self.domain[i+1:])
-
-        return result
+                        candidate = self.domain[:i] + c + self.domain[i+1:]
+                        if candidate not in seen:
+                            seen.add(candidate)
+                            yield candidate
 
     def __subdomain(self):
-        result = []
-
         for i in range(1, len(self.domain)):
             if self.domain[i] not in ['-', '.'] and self.domain[i-1] not in ['-', '.']:
-                result.append(self.domain[:i] + '.' + self.domain[i:])
-
-        return result
+                yield self.domain[:i] + '.' + self.domain[i:]
 
     def __transposition(self):
-        result = []
-
         for i in range(0, len(self.domain)-1):
             if self.domain[i+1] != self.domain[i]:
-                result.append(self.domain[:i] + self.domain[i+1] + self.domain[i] + self.domain[i+2:])
-
-        return result
+                yield self.domain[:i] + self.domain[i+1] + self.domain[i] + self.domain[i+2:]
 
     def __vowel_swap(self):
         vowels = 'aeiou'
-        result = set()
+        seen = set()
 
         for i in range(0, len(self.domain)):
             for vowel in vowels:
                 if self.domain[i] in vowels:
-                    result.add(self.domain[:i] + vowel + self.domain[i+1:])
-
-        return result
+                    candidate = self.domain[:i] + vowel + self.domain[i+1:]
+                    if candidate not in seen:
+                        seen.add(candidate)
+                        yield candidate
 
     def __addition(self):
-        result = []
-
         for i in range(97, 123):
-            result.append(self.domain + chr(i))
-
-        return result
+            yield self.domain + chr(i)
 
     def fuzz(self):
         """ Perform a domain fuzz.
@@ -352,3 +370,66 @@ def fuzz(self):
             self.domains.append({ 'fuzzer': 'Various', 'domain-name': self.domain + '-' + self.tld + '.com' })
 
         self.__filter_domains()
+
+    def fuzz_iter(self, de_dupe=False):
+        """Return an iterator of the fuzz.
+
+        The intent is to reduce memory usage and to allow the fuzzed domains
+        to be returned in a chunked manner over HTTP chunking to the
+        front-end.
+
+        The sacrifice of some performance should be lost in the time taken to
+        individually resolve each domain - aka an additional 0.001 sec per
+        domain here is irrelevant if it takes 1 second to resolve each one
+        in the browser.
+
+        You can optionally de-duplicate as you go, though that will use more
+        memory obviously.
+        """
+        seen = set()
+        builder = ResultBuilder(self.tld)
+
+        yield builder.build('Original*', self.domain)
+
+        fuzzers = {
+            'Addition': self.__addition,
+            'Bitsquatting': self.__bitsquatting,
+            'Homoglyph': lambda: self.__homoglyph(MAX=None),
+            'Hyphenation': self.__hyphenation,
+            'Insertion': self.__insertion,
+            'Omission': self.__omission,
+            'Repetition': self.__repetition,
+            'Replacement': self.__replacement,
+            'Subdomain': self.__subdomain,
+            'Transposition': self.__transposition,
+            'Vowel swap': self.__vowel_swap
+        }
+
+        for (tag, fuzzer_func) in fuzzers.items():
+
+            for domain in fuzzer_func():
+                if de_dupe:
+                    if domain in seen:
+                        continue
+                    else:
+                        seen.add(domain)
+
+                if not is_valid_domain(domain + '.' + self.tld):
+                    continue
+
+                yield builder.build(tag, domain)
+
+        if not self.domain.startswith('www.'):
+            yield builder.build('Various', 'ww' + self.domain)
+            yield builder.build('Various', 'www' + self.domain)
+            yield builder.build('Various', 'www-' + self.domain)
+
+        if '.' in self.tld:
+            yield Result('Various', self.domain + '.' + self.tld.split('.')[-1])
+            yield Result('Various', self.domain + self.tld)
+
+        if '.' not in self.tld:
+            yield builder.build('Various', self.domain + self.tld)
+
+        if self.tld != 'com' and '.' not in self.tld:
+            yield Result('Various', self.domain + '-' + self.tld + '.com')
diff --git a/dnstwister/static/report.min.css b/dnstwister/static/report.min.css