Skip to content

Commit

Permalink
feat: add proxy
Browse files Browse the repository at this point in the history
  • Loading branch information
staugur committed Jun 24, 2019
1 parent a61ca04 commit de22fe6
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 15 deletions.
17 changes: 12 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,30 @@ cd grab_huaban_board

1. pip install requests

2. python grab_huaban_board.py

2. python grab_huaban_board.py --help
```
usage: grab_huaban_board.py [-h] [-a ACTION] [-u USER] [-p PASSWORD] [-v]
[--board_id BOARD_ID] [--user_id USER_ID]
[--debug] [--proxy] [--proxy_apiurl PROXY_APIURL]
optional arguments:
-h, --help show this help message and exit
-a ACTION, --action ACTION
脚本动作 -> 1. getBoard: 抓取单画板(默认);
2. getUser: 抓取单用户
脚本动作 -> getBoard: 抓取单画板(默认); getUser: 抓取单用户
-u USER, --user USER 花瓣网账号-手机/邮箱
-p PASSWORD, --password PASSWORD
花瓣网账号对应密码
-v, --version 查看版本号
--board_id BOARD_ID 花瓣网单个画板id, action=getBoard时使用
--user_id USER_ID 花瓣网单个用户id, action=getUser时使用
--debug 开启debug输出
--proxy 开启IP代理池
--proxy_apiurl PROXY_APIURL
IP代理池接口:开启IP代理池后,设置此选项使用非默认接口
```

*温馨提示:目前默认IP代理池接口,获取的代理IP质量较低,所以若开启IP代理池,强烈建议使用自己的代理池接口!*

* 详细使用文档请参考: [https://blog.saintic.com/blog/204.html](https://blog.saintic.com/blog/204.html "https://blog.saintic.com/blog/204.html")


Expand All @@ -60,7 +65,9 @@ optional arguments:
1. --board_ids 多画板
2. --user_ids 多用户
3. --igonre 指定忽略画板
4. But,以上todo暂无计划,py版目前只针对bug
4. ~~ip代理池~~

But,以上todo暂无计划,py版目前只针对bug


## 友情链接
Expand Down
45 changes: 35 additions & 10 deletions grab_huaban_board.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf8 -*-

__version__ = "5.0.2"
__version__ = "5.0.3"
__author__ = "Mr.tao"
__doc__ = "https://blog.saintic.com/blog/204.html"

Expand All @@ -22,14 +22,17 @@
BASE_URL = 'https://huaban.com'
# 设置下载短暂停止时间,单位:秒
SLEEP_TIME = 1
# 开启ip代理池
WITH_IP_POOL = False
IP_POOL_API = "http://118.24.52.95:5010/get/"

logging.basicConfig(level=logging.INFO,
format='[ %(levelname)s ] %(asctime)s %(filename)s:%(threadName)s:%(process)d:%(lineno)d %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
filename='huaban.log',
filemode='a')

debug = True
debug = False
request = requests.Session()
request.verify = True
request.headers.update({'X-Request': 'JSON', 'X-Requested-With': 'XMLHttpRequest', 'Referer': BASE_URL, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'})
Expand All @@ -44,6 +47,19 @@
"Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15",
]

def get_proxy():
resp = dict()
if WITH_IP_POOL is True:
try:
_ip_proxy = request.get(IP_POOL_API, timeout=5).text
except requests.exceptions.RequestException as e:
logging.warn(e, exc_info=True)
else:
if not (_ip_proxy.startswith("http://") or _ip_proxy.startswith("https://")):
_ip_proxy = "http://%s" % _ip_proxy
resp = { "http": _ip_proxy, "https": _ip_proxy }
logging.info("Start ip_proxy_pool, get result: %s" %resp)
return resp

def printcolor(msg, color=None):
if color == "green":
Expand Down Expand Up @@ -217,18 +233,24 @@ def _crawl_user(user_id):
pool.join() # 主进程阻塞等待子进程的退出
printcolor("Current user {}, download over".format(user_id), "green")


def main(parser):
global WITH_IP_POOL,IP_POOL_API,request,debug
args = parser.parse_args()
if not args.action:
parser.print_help()
return
action = args.action or "getBoard"
action = args.action
user = args.user
password = args.password
version = args.version
board_id = args.board_id
user_id = args.user_id
if args.debug is True:
debug = True
if args.proxy is True:
WITH_IP_POOL = args.proxy
IP_POOL_API = args.proxy_apiurl or IP_POOL_API
request.proxies.update(get_proxy())
if version:
printcolor("https://github.com/staugur/grab_huaban_board, v{}".format(__version__))
return
Expand Down Expand Up @@ -264,10 +286,13 @@ def main(parser):
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-a", "--action", default="getBoard", help="脚本动作 -> 1. getBoard: 抓取单画板(默认); 2. getUser: 抓取单用户")
parser.add_argument("-u", "--user", help="花瓣网账号-手机/邮箱")
parser.add_argument("-p", "--password", help="花瓣网账号对应密码")
parser.add_argument("-v", "--version", help="查看版本号", action='store_true')
parser.add_argument("--board_id", help="花瓣网单个画板id, action=getBoard时使用")
parser.add_argument("--user_id", help="花瓣网单个用户id, action=getUser时使用")
parser.add_argument("-a", "--action", default="getBoard", help=u"脚本动作 -> getBoard: 抓取单画板(默认); getUser: 抓取单用户")
parser.add_argument("-u", "--user", help=u"花瓣网账号-手机/邮箱")
parser.add_argument("-p", "--password", help=u"花瓣网账号对应密码")
parser.add_argument("-v", "--version", help=u"查看版本号", action='store_true')
parser.add_argument("--board_id", help=u"花瓣网单个画板id, action=getBoard时使用")
parser.add_argument("--user_id", help=u"花瓣网单个用户id, action=getUser时使用")
parser.add_argument("--debug", help=u"开启debug输出", action='store_true')
parser.add_argument("--proxy", help=u"开启IP代理池", action='store_true')
parser.add_argument("--proxy_apiurl", help=u"IP代理池接口:开启IP代理池后,设置此选项使用非默认接口")
main(parser)

0 comments on commit de22fe6

Please sign in to comment.