weakfilescan

动态多线程敏感信息泄露检测工具
ring04h · Apr 2, 2015 · db9b98e · db9b98e
commit db9b98e
Show file tree

Hide file tree

Showing 106 changed files with 37,684 additions and 0 deletions.
diff --git a/common.py b/common.py
@@ -0,0 +1,203 @@
+# encoding: utf-8
+# 全局函数文件
+# email: ringzero@0x557.org
+
+from config import *
+import re
+import urlparse
+import threading
+from bs4 import BeautifulSoup
+from libs.tldextract import extract, TLDExtract
+import libs.requests as requests
+from libs.FuzzUrlGenerator import UrlGenerator
+from libs.UrlSplitParser import UrlSplitParser
+
+if allow_http_session:
+	requests = requests.Session()
+
+def get_basedomain(url):
+	try:
+		# return urlparse.urlparse(url).netloc
+		return extract(url).registered_domain
+		# return extract(url).domain # 更加有关联性的处理方法
+	except Exception, e:
+		pass
+
+def get_baseurl(link):
+	netloc = urlparse.urlparse(link).netloc
+	if netloc:
+		split_url = link.split(netloc)
+		baseurl = '%s%s' % (split_url[0], netloc)
+		return baseurl
+
+def http_request_get(url, body_content_workflow=False):
+	result = requests.get(url, 
+		stream=body_content_workflow, 
+		headers=headers, 
+		timeout=timeout, 
+		proxies=proxies,
+		allow_redirects=allow_redirects)
+	return result
+
+def http_request_post(url, payload, body_content_workflow=False):
+	"""
+		payload = {'key1': 'value1', 'key2': 'value2'}
+	"""
+	result = requests.post(url, 
+		data=payload, 
+		headers=headers, 
+		stream=body_content_workflow, 
+		timeout=timeout, 
+		proxies=proxies,
+		allow_redirects=allow_redirects)
+	return result
+
+def checksite_possibility(siteurl): # 检查可能性
+    temp_weburls = [
+        '/ea63a430b109194d/',
+        '/ea63a430b109194d1/',
+        '/ea63a430b109194d.'+default_extion,
+        '/ea63a430b109194d1.'+default_extion,
+    ]
+
+    req_result = {}
+    for tempurl in temp_weburls:
+        httpres = http_request_get(siteurl.rstrip('/')+tempurl)
+        is_redirect = True if len(httpres.history) > 0 else False
+        req_result[tempurl] = {
+            'status_code' : httpres.status_code,
+            'is_redirect' : is_redirect,
+            'text' : httpres.text,
+            'history' : httpres.history,
+            'request' : httpres.url,
+            'text_size' : len(httpres.text),
+        }
+
+    possibility = 100
+    refer_to_val = 0
+    regex = re.compile(page_not_found_reg)
+
+    dir1 = temp_weburls[0]
+    dir2 = temp_weburls[1]
+    file1 = temp_weburls[2]
+    file2 = temp_weburls[3]
+
+    # 分析状态判断结果
+    if req_result[dir1]['status_code'] != 404 and req_result[dir2]['status_code'] != 404:
+        possibility -= 10 # print '返回状态不等于404'
+        if not regex.findall(req_result[dir1]['text']) and not regex.findall(req_result[file1]['text']):
+            possibility -= 10 # print '文件和目录错误页面都没有状态标示'
+        else:
+            refer_to_val += 50 # print '有特征码可参考'
+        if req_result[dir1]['text_size'] != req_result[dir2]['text_size']:
+            possibility -= 10 # print '返回的结果大小不一样'
+        if dir1 in req_result[dir1]['text'] and file1 in req_result[file1]['text']:
+            possibility -= 10 # 请求的文件名存在于返回内容当中
+
+    if req_result[dir1]['request'] == req_result[dir2]['request']:
+        possibility -= 10 # 返回的请求url结果一样
+
+    if req_result[file1]['status_code'] != 404 and req_result[file2]['status_code'] != 404:
+        possibility -= 10 # print '返回状态不等于404'
+        if not regex.findall(req_result[dir1]['text']) and not regex.findall(req_result[file1]['text']):
+            possibility -= 10 # print '文件和目录错误页面都没有状态标示'
+        else:
+            refer_to_val += 50 # print '有特征码可参考'
+        if req_result[file1]['text_size'] != req_result[file2]['text_size']:
+            possibility -= 10 # print '返回的结果大小不一样'
+        if dir1 in req_result[dir1]['text'] and file1 in req_result[file1]['text']:
+            possibility -= 10 # 请求的文件名存在于返回内容当中
+
+    if req_result[file1]['request'] == req_result[file2]['request']:
+        possibility -= 10 # 返回的请求url结果一样
+
+    if refer_to_val < 50 and possibility < 65:
+        return {'considered':False, 'possibility':possibility, 'refer_to_val':refer_to_val}
+    else:
+        return {'considered':True, 'possibility':possibility, 'refer_to_val':refer_to_val}
+
+def get_segments(url):
+	url_webdirs = []
+	parser_obj = UrlSplitParser(urlparse.urlparse(url))
+	for segment in parser_obj.get_paths()['segment']:
+		url_webdirs.append(parser_obj.baseurl + segment)
+	return url_webdirs
+
+class LinksParser(object):
+	"""docstring for link_parser"""
+	def __init__(self, html_content):
+		super(LinksParser, self).__init__()
+		self.html_content = html_content
+		self.url_links = {
+			'a':[],
+			'link':[],
+			'img':[],
+			'script':[]
+		}
+		self.url = self.html_content.url
+		self.baseurl = get_baseurl(self.url)
+		self.soup = BeautifulSoup(self.html_content.text, 'lxml')
+
+	def complet_url(self, link):
+		if link.startswith('/') or link.startswith('.'):
+			return urlparse.urljoin(self.baseurl, link)
+		elif link.startswith('http') or link.startswith('https'):
+			return link
+		elif link.startswith('#'): # 为了兼容某些变态的URI模式
+			return urlparse.urljoin(self.url, link)
+		else:
+			return False
+
+	def getall(self):
+		self.get_tag_a()
+		self.get_tag_link()
+		self.get_tag_img()
+		self.get_tag_script()
+		# links 去重
+		for child in self.url_links.keys():
+			self.url_links[child] = list(set(self.url_links[child]))
+		return {self.url : self.url_links}
+
+	def get_tag_a(self):
+		# 处理A链接
+		for tag in self.soup.find_all('a'):
+			if tag.attrs.has_key('href'):
+				link = tag.attrs['href']
+				# link = urlparse.urldefrag(tag.attrs['href'])[0] # 处理掉#tag标签信息
+				complet_link = self.complet_url(link.strip())
+				if complet_link:
+					self.url_links['a'].append(complet_link)
+		return self.url_links
+
+	def get_tag_link(self):
+		# 处理link链接资源
+		for tag in self.soup.find_all('link'):
+			if tag.attrs.has_key('href'):
+				link = tag.attrs['href']
+				complet_link = self.complet_url(link.strip())
+				if complet_link:
+					self.url_links['link'].append(complet_link)
+		return self.url_links
+
+	def get_tag_img(self):
+		# 处理img链接资源
+		for tag in self.soup.find_all('img'):
+			if tag.attrs.has_key('src'):
+				link = tag.attrs['src']
+				complet_link = self.complet_url(link.strip())
+				if complet_link:
+					self.url_links['img'].append(complet_link)
+		return self.url_links
+
+	def get_tag_script(self):
+		# 处理script链接资源
+		for tag in self.soup.find_all('script'):
+			if tag.attrs.has_key('src'):
+				link = tag.attrs['src']
+				complet_link = self.complet_url(link.strip())
+				if complet_link:
+					self.url_links['script'].append(complet_link)
+		return self.url_links
+
+
+
diff --git a/config.py b/config.py
@@ -0,0 +1,106 @@
+# encoding: utf-8
+# 全局配置文件
+
+import random
+
+# -------------------------------------------------
+# 最好完成一个正则解析解, 用正则来生成字典
+# -------------------------------------------------
+
+# 字典来自文件列表
+directory_dict = './dict/directory.lst'
+directory_common_dict = './dict/directory_common.lst'
+filename_dict = './dict/filename.lst'
+package_ext_dict = './dict/package_ext.lst'
+tempfile_ext_dict = './dict/tmpfile_ext.lst'
+
+# extion = './dict/filename.lst'
+# 判断文件或目录存在的状态吗
+exclude_status = [200,403]
+
+# 默认扩展名
+default_extion = 'php'
+
+# 判断文件是否存在正则
+page_not_found_reg = r'404|not found|不存在|未找到'
+
+# 数据库文件
+sqlfile = ['data','install','web','user', 'members']
+sqlfile_ext = ['.sql','.bak','.sql.tar.gz','.sql.zip','.sql.rar']
+
+# 线程数
+threads_count = 10
+
+# -------------------------------------------------
+# requests 配置项
+# -------------------------------------------------
+
+# 超时时间
+timeout = 10
+
+# 是否允许URL重定向
+allow_redirects = True
+
+# 是否允许继承http Request类的Session支持，在发出的所有请求之间保持cookies。
+allow_http_session = True
+
+# 是否允许随机User-Agent
+allow_random_useragent = True
+
+# 是否允许随机X-Forwarded-For
+allow_random_x_forward = True
+
+# 代理配置
+proxies = {
+	# "http": "http://user:pass@10.10.1.10:3128/",
+	# "https": "http://10.10.1.10:1080",
+	# "http": "http://127.0.0.1:8118", # TOR 洋葱路由器
+	# 'http': 'http://admin:admin*888@42.62.52.62:8080',
+	# 'https': 'https://admin:admin*888@42.62.52.62:8080'
+}
+
+# 随机HTTP头
+USER_AGENTS = [
+	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
+	"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
+	"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
+	"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
+	"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
+	"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
+	"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
+	"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
+	"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
+	"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
+	"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
+	"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
+	"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
+	"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
+	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
+	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
+	"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
+]
+
+# 随机生成User-Agent
+def random_useragent(condition=False):
+	if condition:
+		return random.choice(USER_AGENTS)
+	else:
+		return USER_AGENTS[0]
+
+# 随机X-Forwarded-For，动态IP
+def random_x_forwarded_for(condition=False):
+	if condition:
+		return '%d.%d.%d.%d' % (random.randint(1, 254),random.randint(1, 254),random.randint(1, 254),random.randint(1, 254))
+	else:
+		return '8.8.8.8'
+
+# HTTP 头设置
+headers = {
+	'User-Agent': random_useragent(allow_random_useragent),
+	'X_FORWARDED_FOR': random_x_forwarded_for(allow_random_x_forward),
+	# 'Referer' : 'http://www.google.com',
+	# 'Cookie': 'whoami=wyscan_dirfuzz',
+}
+
+
+