Merge branch 'master' of https://github.com/laramies/theHarvester

laramies · Apr 16, 2018 · fad1941 · fad1941
2 parents df9d031 + abe266b
commit fad1941
Show file tree

Hide file tree

Showing 7 changed files with 155 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 *.pyc
 *.idea
 tests/myparser.py
+stash.sqlite
 *.sqlite
 .vscode
diff --git a/discovery/__init__.py b/discovery/__init__.py
@@ -18,4 +18,6 @@
            "virustotal",
            "threatcrowd",
            "wfuzz_search",
+           "port_scanner",
+           "takeover",
            "googleCSE"]
diff --git a/discovery/pgpsearch.py b/discovery/pgpsearch.py
@@ -9,12 +9,13 @@ class search_pgp:
     def __init__(self, word):
         self.word = word
         self.results = ""
-        self.server = "pgp.mit.edu"
-        #self.server = "pgp.rediris.es:11371" Not  working at the moment
-        self.hostname = "pgp.mit.edu"
+        #self.server = "pgp.mit.edu"
+        self.server = "pgp.rediris.es"
+        self.hostname = "pgp.rediris.es"
         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
 
     def process(self):
+        print "\tSearching PGP results..."
         h = httplib.HTTP(self.server)
         h.putrequest('GET', "/pks/lookup?search=" + self.word + "&op=index")
         h.putheader('Host', self.hostname)

diff --git a/discovery/port_scanner.py b/discovery/port_scanner.py
@@ -0,0 +1,31 @@
+import threading
+import socket
+
+class port_scan():
+    def __init__(self, host, ports):
+        self.threads = 25      
+        self.host = host
+        self.ports = ports
+
+        self.lock = threading.BoundedSemaphore(value=self.threads)
+
+    def port_scanner(self, host, ports):
+        openports = []
+        self.lock.acquire()
+        for port in ports:
+            try:
+                s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+                s.settimeout(2)
+                result = s.connect_ex((host, int(port)))
+                if result == 0:
+                    openports.append(port)
+                s.close()
+            except Exception,e:
+                print e
+                pass
+        self.lock.release()
+        return openports
+
+    def process(self):
+            ports = self.port_scanner (self.host, self.ports)
+            return ports
diff --git a/discovery/s3_scanner.py b/discovery/s3_scanner.py
@@ -0,0 +1,54 @@
+import string
+import requests
+import sys
+import myparser
+import re
+from requests.packages.urllib3.exceptions import InsecureRequestWarning
+requests.packages.urllib3.disable_warnings(InsecureRequestWarning) 
+
+
+
+
+
+class s3_scanner:
+    def __init__(self, host):
+        self.host = host
+        self.results = ""
+        self.totalresults = ""
+        self.fingerprints = ["www.herokucdn.com/error-pages/no-such-app.html","<title>Squarespace - No Such Account</title>","<p> If you're trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>","<p> If you\'re trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>","<span class=\"title\">Bummer. It looks like the help center that you are trying to reach no longer exists.</span>","<head> <title>The page you\'re looking for could not be found (404)</title> <style> body { color: #666; text-align: center; font-family: \"Helvetica Neue\", Helvetica, Arial, sans-serif; margin: 0; width: 800px; margin: auto; font-size: 14px; } h1 { font-size: 56px; line-height: 100px; font-weight: normal; color: #456; } h2 { font-size: 24px; color: #666; line-height: 1.5em; } h3 { color: #456; font-size: 20px; font-weight: normal; line-height: 28px; } hr { margin: 18px 0; border: 0; border-top: 1px solid #EEE; border-bottom: 1px solid white; } </style> </head>"]
+
+    def __check_http(self, bucket_url):
+        check_response = self.session.head(
+            S3_URL, timeout=3, headers={"Host": bucket_url})
+
+#       if not ARGS.ignore_rate_limiting\
+#              and (check_response.status_code == 503 and check_response.reason == "Slow Down"):
+#            self.q.rate_limited = True
+            # add it back to the bucket for re-processing
+ #           self.q.put(bucket_url)
+        if check_response.status_code == 307:  # valid bucket, lets check if its public
+            new_bucket_url = check_response.headers["Location"]
+            bucket_response = requests.request(
+                "GET" if ARGS.only_interesting else "HEAD", new_bucket_url, timeout=3)
+
+            if bucket_response.status_code == 200\
+                    and (not ARGS.only_interesting or
+                             (ARGS.only_interesting and any(keyword in bucket_response.text for keyword in KEYWORDS))):
+                cprint("Found bucket '{}'".format(new_bucket_url), "green", attrs=["bold"])
+                self.__log(new_bucket_url)
+
+    def do_s3(self):
+        try:
+            print "\t Searching takeovers for "  + self.host
+            r = requests.get('https://' + self.host, verify=False)
+            for x in self.fingerprints:
+                take_reg = re.compile(x)
+                self.temp = take_reg.findall(r.text)
+                if self.temp != []:
+                        print "\t\033[91m Takeover detected! - " + self.host + "\033[1;32;40m "
+        except Exception, e:
+                print e
+
+
+    def process(self):
+        self.do_take()
diff --git a/discovery/takeover.py b/discovery/takeover.py
@@ -0,0 +1,32 @@
+import string
+import requests
+import sys
+import myparser
+import re
+from requests.packages.urllib3.exceptions import InsecureRequestWarning
+requests.packages.urllib3.disable_warnings(InsecureRequestWarning) 
+
+class take_over:
+    def __init__(self, host):
+        self.host = host
+        self.results = ""
+        self.totalresults = ""
+        self.fingerprints = ["<title>Squarespace - Domain Not Claimed</title>","www.herokucdn.com/error-pages/no-such-app.html","<title>Squarespace - No Such Account</title>","<p> If you're trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>","<p> If you\'re trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>","<span class=\"title\">Bummer. It looks like the help center that you are trying to reach no longer exists.</span>","<head> <title>The page you\'re looking for could not be found (404)</title> <style> body { color: #666; text-align: center; font-family: \"Helvetica Neue\", Helvetica, Arial, sans-serif; margin: 0; width: 800px; margin: auto; font-size: 14px; } h1 { font-size: 56px; line-height: 100px; font-weight: normal; color: #456; } h2 { font-size: 24px; color: #666; line-height: 1.5em; } h3 { color: #456; font-size: 20px; font-weight: normal; line-height: 28px; } hr { margin: 18px 0; border: 0; border-top: 1px solid #EEE; border-bottom: 1px solid white; } </style> </head>"]
+
+
+    def do_take(self):
+        try:
+            print "\t Searching takeovers for "  + self.host
+            r = requests.get('https://' + self.host, verify=False)
+            for x in self.fingerprints:
+                take_reg = re.compile(x)
+                self.temp = take_reg.findall(r.text)
+                if self.temp != []:
+                        print "\t\033[91m Takeover detected! - " + self.host + "\033[1;32;40m "
+        except Exception, e:
+                pass
+
+
+    def process(self):
+        self.do_take()
+
diff --git a/theHarvester.py b/theHarvester.py
@@ -26,7 +26,7 @@
 print "* | |_| | | |  __/ / __  / (_| | |   \ V /  __/\__ \ ||  __/ |    *"
 print "*  \__|_| |_|\___| \/ /_/ \__,_|_|    \_/ \___||___/\__\___|_|    *"
 print "*                                                                 *"
-print "* TheHarvester Ver. 2.7.2                                         *"
+print "* TheHarvester Ver. 3.0                                           *"
 print "* Coded by Christian Martorella                                   *"
 print "* Edge-Security Research                                          *"
 print "* cmartorella@edge-security.com                                   *"
@@ -52,6 +52,7 @@ def usage():
     print "       -c: Perform a DNS brute force for the domain name"
     print "       -t: Perform a DNS TLD expansion discovery"
     print "       -e: Use this DNS server"
+    print "       -p: port scan the detected hosts and check for Takeovers (80,443,22,21,8080)"
     print "       -l: Limit the number of results to work with(bing goes from 50 to 50 results,"
     print "            google 100 to 100, and pgp doesn't use this option)"
     print "       -h: use SHODAN database to query discovered hosts"
@@ -67,7 +68,7 @@ def start(argv):
         usage()
         sys.exit()
     try:
-        opts, args = getopt.getopt(argv, "l:d:b:s:vf:nhcte:")
+        opts, args = getopt.getopt(argv, "l:d:b:s:vf:nhcpte:")
     except getopt.GetoptError:
         usage()
         sys.exit()
@@ -86,6 +87,8 @@ def start(argv):
     shodan = False
     vhost = []
     virtual = False
+    ports_scanning = False
+    takeover_check = False
     limit = 500
     dnsserver = ""
     for opt, arg in opts:
@@ -107,6 +110,8 @@ def start(argv):
             shodan = True
         elif opt == '-e':
             dnsserver = arg
+        elif opt == '-p':
+            ports_scanning = True
         elif opt == '-t':
             dnstld = True
         elif opt == '-b':
@@ -139,8 +144,7 @@ def start(argv):
         all_emails = []
         db=stash.stash_manager()
         db.store_all(word,all_hosts,'host','netcraft')
-
-
+
 
     if engine == "threatcrowd":
         print "[-] Searching in Threatcrowd:"
@@ -271,7 +275,7 @@ def start(argv):
         print "Full harvest on " + word
         all_emails = []
         all_hosts = []
-        virtual = "basic"
+
 
         print "[-] Searching in Google.."
         search = googlesearch.search_google(word, limit, start)
@@ -378,6 +382,28 @@ def start(argv):
                 pass
             else:
                 host_ip.append(ip.lower())
+    #Port Scanning #################################################
+        if ports_scanning == True:
+            print("\n\n\033[1;32;40m[-] Scanning ports (Active):\n")
+            for x in full:
+                host = x.split(' : ')[1]
+                domain = x.split(' : ')[0]
+                if host != "empty" :
+                    print "- Scanning : " + host
+                    ports = [80,443,22,8080,21]
+                    try:
+                        scan = port_scanner.port_scan(host,ports)
+                        openports = scan.process()
+                        if len(openports) > 1:
+                                print "\t\033[91m Detected open ports: " + ','.join(str(e) for e in openports) +  "\033[1;32;40m"
+                        takeover_check = 'True'
+                        if takeover_check == 'True':
+                            if len(openports) > 0:   
+                                search_take = takeover.take_over(domain)
+                                search_take.process()
+                    except Exception, e:
+                        print e
+
 
     #DNS reverse lookup#################################################
     dnsrev = []