Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/laramies/theHarvester
Browse files Browse the repository at this point in the history
  • Loading branch information
laramies committed Apr 16, 2018
2 parents df9d031 + abe266b commit fad1941
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -1,5 +1,6 @@
*.pyc
*.idea
tests/myparser.py
stash.sqlite
*.sqlite
.vscode
2 changes: 2 additions & 0 deletions discovery/__init__.py
Expand Up @@ -18,4 +18,6 @@
"virustotal",
"threatcrowd",
"wfuzz_search",
"port_scanner",
"takeover",
"googleCSE"]
7 changes: 4 additions & 3 deletions discovery/pgpsearch.py
Expand Up @@ -9,12 +9,13 @@ class search_pgp:
def __init__(self, word):
self.word = word
self.results = ""
self.server = "pgp.mit.edu"
#self.server = "pgp.rediris.es:11371" Not working at the moment
self.hostname = "pgp.mit.edu"
#self.server = "pgp.mit.edu"
self.server = "pgp.rediris.es"
self.hostname = "pgp.rediris.es"
self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"

def process(self):
print "\tSearching PGP results..."
h = httplib.HTTP(self.server)
h.putrequest('GET', "/pks/lookup?search=" + self.word + "&op=index")
h.putheader('Host', self.hostname)
Expand Down
31 changes: 31 additions & 0 deletions discovery/port_scanner.py
@@ -0,0 +1,31 @@
import threading
import socket

class port_scan():
def __init__(self, host, ports):
self.threads = 25
self.host = host
self.ports = ports

self.lock = threading.BoundedSemaphore(value=self.threads)

def port_scanner(self, host, ports):
openports = []
self.lock.acquire()
for port in ports:
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(2)
result = s.connect_ex((host, int(port)))
if result == 0:
openports.append(port)
s.close()
except Exception,e:
print e
pass
self.lock.release()
return openports

def process(self):
ports = self.port_scanner (self.host, self.ports)
return ports
54 changes: 54 additions & 0 deletions discovery/s3_scanner.py
@@ -0,0 +1,54 @@
import string
import requests
import sys
import myparser
import re
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)





class s3_scanner:
def __init__(self, host):
self.host = host
self.results = ""
self.totalresults = ""
self.fingerprints = ["www.herokucdn.com/error-pages/no-such-app.html","<title>Squarespace - No Such Account</title>","<p> If you're trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>","<p> If you\'re trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>","<span class=\"title\">Bummer. It looks like the help center that you are trying to reach no longer exists.</span>","<head> <title>The page you\'re looking for could not be found (404)</title> <style> body { color: #666; text-align: center; font-family: \"Helvetica Neue\", Helvetica, Arial, sans-serif; margin: 0; width: 800px; margin: auto; font-size: 14px; } h1 { font-size: 56px; line-height: 100px; font-weight: normal; color: #456; } h2 { font-size: 24px; color: #666; line-height: 1.5em; } h3 { color: #456; font-size: 20px; font-weight: normal; line-height: 28px; } hr { margin: 18px 0; border: 0; border-top: 1px solid #EEE; border-bottom: 1px solid white; } </style> </head>"]

def __check_http(self, bucket_url):
check_response = self.session.head(
S3_URL, timeout=3, headers={"Host": bucket_url})

# if not ARGS.ignore_rate_limiting\
# and (check_response.status_code == 503 and check_response.reason == "Slow Down"):
# self.q.rate_limited = True
# add it back to the bucket for re-processing
# self.q.put(bucket_url)
if check_response.status_code == 307: # valid bucket, lets check if its public
new_bucket_url = check_response.headers["Location"]
bucket_response = requests.request(
"GET" if ARGS.only_interesting else "HEAD", new_bucket_url, timeout=3)

if bucket_response.status_code == 200\
and (not ARGS.only_interesting or
(ARGS.only_interesting and any(keyword in bucket_response.text for keyword in KEYWORDS))):
cprint("Found bucket '{}'".format(new_bucket_url), "green", attrs=["bold"])
self.__log(new_bucket_url)

def do_s3(self):
try:
print "\t Searching takeovers for " + self.host
r = requests.get('https://' + self.host, verify=False)
for x in self.fingerprints:
take_reg = re.compile(x)
self.temp = take_reg.findall(r.text)
if self.temp != []:
print "\t\033[91m Takeover detected! - " + self.host + "\033[1;32;40m "
except Exception, e:
print e


def process(self):
self.do_take()
32 changes: 32 additions & 0 deletions discovery/takeover.py
@@ -0,0 +1,32 @@
import string
import requests
import sys
import myparser
import re
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

class take_over:
def __init__(self, host):
self.host = host
self.results = ""
self.totalresults = ""
self.fingerprints = ["<title>Squarespace - Domain Not Claimed</title>","www.herokucdn.com/error-pages/no-such-app.html","<title>Squarespace - No Such Account</title>","<p> If you're trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>","<p> If you\'re trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>","<span class=\"title\">Bummer. It looks like the help center that you are trying to reach no longer exists.</span>","<head> <title>The page you\'re looking for could not be found (404)</title> <style> body { color: #666; text-align: center; font-family: \"Helvetica Neue\", Helvetica, Arial, sans-serif; margin: 0; width: 800px; margin: auto; font-size: 14px; } h1 { font-size: 56px; line-height: 100px; font-weight: normal; color: #456; } h2 { font-size: 24px; color: #666; line-height: 1.5em; } h3 { color: #456; font-size: 20px; font-weight: normal; line-height: 28px; } hr { margin: 18px 0; border: 0; border-top: 1px solid #EEE; border-bottom: 1px solid white; } </style> </head>"]


def do_take(self):
try:
print "\t Searching takeovers for " + self.host
r = requests.get('https://' + self.host, verify=False)
for x in self.fingerprints:
take_reg = re.compile(x)
self.temp = take_reg.findall(r.text)
if self.temp != []:
print "\t\033[91m Takeover detected! - " + self.host + "\033[1;32;40m "
except Exception, e:
pass


def process(self):
self.do_take()

36 changes: 31 additions & 5 deletions theHarvester.py
Expand Up @@ -26,7 +26,7 @@
print "* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *"
print "* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *"
print "* *"
print "* TheHarvester Ver. 2.7.2 *"
print "* TheHarvester Ver. 3.0 *"
print "* Coded by Christian Martorella *"
print "* Edge-Security Research *"
print "* cmartorella@edge-security.com *"
Expand All @@ -52,6 +52,7 @@ def usage():
print " -c: Perform a DNS brute force for the domain name"
print " -t: Perform a DNS TLD expansion discovery"
print " -e: Use this DNS server"
print " -p: port scan the detected hosts and check for Takeovers (80,443,22,21,8080)"
print " -l: Limit the number of results to work with(bing goes from 50 to 50 results,"
print " google 100 to 100, and pgp doesn't use this option)"
print " -h: use SHODAN database to query discovered hosts"
Expand All @@ -67,7 +68,7 @@ def start(argv):
usage()
sys.exit()
try:
opts, args = getopt.getopt(argv, "l:d:b:s:vf:nhcte:")
opts, args = getopt.getopt(argv, "l:d:b:s:vf:nhcpte:")
except getopt.GetoptError:
usage()
sys.exit()
Expand All @@ -86,6 +87,8 @@ def start(argv):
shodan = False
vhost = []
virtual = False
ports_scanning = False
takeover_check = False
limit = 500
dnsserver = ""
for opt, arg in opts:
Expand All @@ -107,6 +110,8 @@ def start(argv):
shodan = True
elif opt == '-e':
dnsserver = arg
elif opt == '-p':
ports_scanning = True
elif opt == '-t':
dnstld = True
elif opt == '-b':
Expand Down Expand Up @@ -139,8 +144,7 @@ def start(argv):
all_emails = []
db=stash.stash_manager()
db.store_all(word,all_hosts,'host','netcraft')




if engine == "threatcrowd":
print "[-] Searching in Threatcrowd:"
Expand Down Expand Up @@ -271,7 +275,7 @@ def start(argv):
print "Full harvest on " + word
all_emails = []
all_hosts = []
virtual = "basic"


print "[-] Searching in Google.."
search = googlesearch.search_google(word, limit, start)
Expand Down Expand Up @@ -378,6 +382,28 @@ def start(argv):
pass
else:
host_ip.append(ip.lower())
#Port Scanning #################################################
if ports_scanning == True:
print("\n\n\033[1;32;40m[-] Scanning ports (Active):\n")
for x in full:
host = x.split(' : ')[1]
domain = x.split(' : ')[0]
if host != "empty" :
print "- Scanning : " + host
ports = [80,443,22,8080,21]
try:
scan = port_scanner.port_scan(host,ports)
openports = scan.process()
if len(openports) > 1:
print "\t\033[91m Detected open ports: " + ','.join(str(e) for e in openports) + "\033[1;32;40m"
takeover_check = 'True'
if takeover_check == 'True':
if len(openports) > 0:
search_take = takeover.take_over(domain)
search_take.process()
except Exception, e:
print e


#DNS reverse lookup#################################################
dnsrev = []
Expand Down

0 comments on commit fad1941

Please sign in to comment.