Skip to content

Commit

Permalink
weakfilescan
Browse files Browse the repository at this point in the history
动态多线程敏感信息泄露检测工具
  • Loading branch information
ring04h committed Apr 2, 2015
0 parents commit db9b98e
Show file tree
Hide file tree
Showing 106 changed files with 37,684 additions and 0 deletions.
203 changes: 203 additions & 0 deletions common.py
@@ -0,0 +1,203 @@
# encoding: utf-8
# 全局函数文件
# email: ringzero@0x557.org

from config import *
import re
import urlparse
import threading
from bs4 import BeautifulSoup
from libs.tldextract import extract, TLDExtract
import libs.requests as requests
from libs.FuzzUrlGenerator import UrlGenerator
from libs.UrlSplitParser import UrlSplitParser

if allow_http_session:
requests = requests.Session()

def get_basedomain(url):
try:
# return urlparse.urlparse(url).netloc
return extract(url).registered_domain
# return extract(url).domain # 更加有关联性的处理方法
except Exception, e:
pass

def get_baseurl(link):
netloc = urlparse.urlparse(link).netloc
if netloc:
split_url = link.split(netloc)
baseurl = '%s%s' % (split_url[0], netloc)
return baseurl

def http_request_get(url, body_content_workflow=False):
result = requests.get(url,
stream=body_content_workflow,
headers=headers,
timeout=timeout,
proxies=proxies,
allow_redirects=allow_redirects)
return result

def http_request_post(url, payload, body_content_workflow=False):
"""
payload = {'key1': 'value1', 'key2': 'value2'}
"""
result = requests.post(url,
data=payload,
headers=headers,
stream=body_content_workflow,
timeout=timeout,
proxies=proxies,
allow_redirects=allow_redirects)
return result

def checksite_possibility(siteurl): # 检查可能性
temp_weburls = [
'/ea63a430b109194d/',
'/ea63a430b109194d1/',
'/ea63a430b109194d.'+default_extion,
'/ea63a430b109194d1.'+default_extion,
]

req_result = {}
for tempurl in temp_weburls:
httpres = http_request_get(siteurl.rstrip('/')+tempurl)
is_redirect = True if len(httpres.history) > 0 else False
req_result[tempurl] = {
'status_code' : httpres.status_code,
'is_redirect' : is_redirect,
'text' : httpres.text,
'history' : httpres.history,
'request' : httpres.url,
'text_size' : len(httpres.text),
}

possibility = 100
refer_to_val = 0
regex = re.compile(page_not_found_reg)

dir1 = temp_weburls[0]
dir2 = temp_weburls[1]
file1 = temp_weburls[2]
file2 = temp_weburls[3]

# 分析状态判断结果
if req_result[dir1]['status_code'] != 404 and req_result[dir2]['status_code'] != 404:
possibility -= 10 # print '返回状态不等于404'
if not regex.findall(req_result[dir1]['text']) and not regex.findall(req_result[file1]['text']):
possibility -= 10 # print '文件和目录错误页面都没有状态标示'
else:
refer_to_val += 50 # print '有特征码可参考'
if req_result[dir1]['text_size'] != req_result[dir2]['text_size']:
possibility -= 10 # print '返回的结果大小不一样'
if dir1 in req_result[dir1]['text'] and file1 in req_result[file1]['text']:
possibility -= 10 # 请求的文件名存在于返回内容当中

if req_result[dir1]['request'] == req_result[dir2]['request']:
possibility -= 10 # 返回的请求url结果一样

if req_result[file1]['status_code'] != 404 and req_result[file2]['status_code'] != 404:
possibility -= 10 # print '返回状态不等于404'
if not regex.findall(req_result[dir1]['text']) and not regex.findall(req_result[file1]['text']):
possibility -= 10 # print '文件和目录错误页面都没有状态标示'
else:
refer_to_val += 50 # print '有特征码可参考'
if req_result[file1]['text_size'] != req_result[file2]['text_size']:
possibility -= 10 # print '返回的结果大小不一样'
if dir1 in req_result[dir1]['text'] and file1 in req_result[file1]['text']:
possibility -= 10 # 请求的文件名存在于返回内容当中

if req_result[file1]['request'] == req_result[file2]['request']:
possibility -= 10 # 返回的请求url结果一样

if refer_to_val < 50 and possibility < 65:
return {'considered':False, 'possibility':possibility, 'refer_to_val':refer_to_val}
else:
return {'considered':True, 'possibility':possibility, 'refer_to_val':refer_to_val}

def get_segments(url):
url_webdirs = []
parser_obj = UrlSplitParser(urlparse.urlparse(url))
for segment in parser_obj.get_paths()['segment']:
url_webdirs.append(parser_obj.baseurl + segment)
return url_webdirs

class LinksParser(object):
"""docstring for link_parser"""
def __init__(self, html_content):
super(LinksParser, self).__init__()
self.html_content = html_content
self.url_links = {
'a':[],
'link':[],
'img':[],
'script':[]
}
self.url = self.html_content.url
self.baseurl = get_baseurl(self.url)
self.soup = BeautifulSoup(self.html_content.text, 'lxml')

def complet_url(self, link):
if link.startswith('/') or link.startswith('.'):
return urlparse.urljoin(self.baseurl, link)
elif link.startswith('http') or link.startswith('https'):
return link
elif link.startswith('#'): # 为了兼容某些变态的URI模式
return urlparse.urljoin(self.url, link)
else:
return False

def getall(self):
self.get_tag_a()
self.get_tag_link()
self.get_tag_img()
self.get_tag_script()
# links 去重
for child in self.url_links.keys():
self.url_links[child] = list(set(self.url_links[child]))
return {self.url : self.url_links}

def get_tag_a(self):
# 处理A链接
for tag in self.soup.find_all('a'):
if tag.attrs.has_key('href'):
link = tag.attrs['href']
# link = urlparse.urldefrag(tag.attrs['href'])[0] # 处理掉#tag标签信息
complet_link = self.complet_url(link.strip())
if complet_link:
self.url_links['a'].append(complet_link)
return self.url_links

def get_tag_link(self):
# 处理link链接资源
for tag in self.soup.find_all('link'):
if tag.attrs.has_key('href'):
link = tag.attrs['href']
complet_link = self.complet_url(link.strip())
if complet_link:
self.url_links['link'].append(complet_link)
return self.url_links

def get_tag_img(self):
# 处理img链接资源
for tag in self.soup.find_all('img'):
if tag.attrs.has_key('src'):
link = tag.attrs['src']
complet_link = self.complet_url(link.strip())
if complet_link:
self.url_links['img'].append(complet_link)
return self.url_links

def get_tag_script(self):
# 处理script链接资源
for tag in self.soup.find_all('script'):
if tag.attrs.has_key('src'):
link = tag.attrs['src']
complet_link = self.complet_url(link.strip())
if complet_link:
self.url_links['script'].append(complet_link)
return self.url_links



106 changes: 106 additions & 0 deletions config.py
@@ -0,0 +1,106 @@
# encoding: utf-8
# 全局配置文件

import random

# -------------------------------------------------
# 最好完成一个正则解析解, 用正则来生成字典
# -------------------------------------------------

# 字典来自文件列表
directory_dict = './dict/directory.lst'
directory_common_dict = './dict/directory_common.lst'
filename_dict = './dict/filename.lst'
package_ext_dict = './dict/package_ext.lst'
tempfile_ext_dict = './dict/tmpfile_ext.lst'

# extion = './dict/filename.lst'
# 判断文件或目录存在的状态吗
exclude_status = [200,403]

# 默认扩展名
default_extion = 'php'

# 判断文件是否存在正则
page_not_found_reg = r'404|not found|不存在|未找到'

# 数据库文件
sqlfile = ['data','install','web','user', 'members']
sqlfile_ext = ['.sql','.bak','.sql.tar.gz','.sql.zip','.sql.rar']

# 线程数
threads_count = 10

# -------------------------------------------------
# requests 配置项
# -------------------------------------------------

# 超时时间
timeout = 10

# 是否允许URL重定向
allow_redirects = True

# 是否允许继承http Request类的Session支持,在发出的所有请求之间保持cookies。
allow_http_session = True

# 是否允许随机User-Agent
allow_random_useragent = True

# 是否允许随机X-Forwarded-For
allow_random_x_forward = True

# 代理配置
proxies = {
# "http": "http://user:pass@10.10.1.10:3128/",
# "https": "http://10.10.1.10:1080",
# "http": "http://127.0.0.1:8118", # TOR 洋葱路由器
# 'http': 'http://admin:admin*888@42.62.52.62:8080',
# 'https': 'https://admin:admin*888@42.62.52.62:8080'
}

# 随机HTTP头
USER_AGENTS = [
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
]

# 随机生成User-Agent
def random_useragent(condition=False):
if condition:
return random.choice(USER_AGENTS)
else:
return USER_AGENTS[0]

# 随机X-Forwarded-For,动态IP
def random_x_forwarded_for(condition=False):
if condition:
return '%d.%d.%d.%d' % (random.randint(1, 254),random.randint(1, 254),random.randint(1, 254),random.randint(1, 254))
else:
return '8.8.8.8'

# HTTP 头设置
headers = {
'User-Agent': random_useragent(allow_random_useragent),
'X_FORWARDED_FOR': random_x_forwarded_for(allow_random_x_forward),
# 'Referer' : 'http://www.google.com',
# 'Cookie': 'whoami=wyscan_dirfuzz',
}



0 comments on commit db9b98e

Please sign in to comment.