-
Notifications
You must be signed in to change notification settings - Fork 1
/
cmdline_start_spider.py
71 lines (62 loc) · 2.38 KB
/
cmdline_start_spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding: utf-8 -*- #
"""
Created on 2018年9月19日
@author: Leo
"""
# Python内置库
import os
import sys
# Python第三方库
# 通过调用命令行进行调试
# 调用execute这个函数可调用scrapy脚本
from scrapy.cmdline import execute
# 项目内部库
from BeiKeZuFangSpider.settings import ITEM_PIPELINES
def main():
"""
启动方法
"""
print("启动爬虫...")
city_name = input("请输入城市名称:")
print("您输入的城市名称为: {}".format(city_name))
if city_name == "":
raise ValueError("城市名称不能为空!")
else:
area_name = input("请输入区域名称(可以忽略):")
if area_name != "":
print("您输入的区域名称为: {}".format(area_name))
metro_name = ""
else:
metro_name = input("请输入地铁线名称:")
print("您输入的地铁线为: {}".format(metro_name))
area_name = ""
is_kafka = input("是否使用Kafka(输入Y(y)/N(n), 也可以忽略):")
if is_kafka != "":
if is_kafka.upper() == "Y":
print("您选择了Kafka进行PIPELINE.数据将通过Kafka进行传输.")
ITEM_PIPELINES.pop('BeiKeZuFangSpider.pipelines.BeiKeZuFangSpiderPipeline')
ITEM_PIPELINES.clear()
ITEM_PIPELINES['BeiKeZuFangSpider.pipelines.BeikeZuFangSpiderKafkaPipeline'] = 1
print("城市: {}, 区域: {}, 地铁线: {}".format(city_name, area_name, metro_name))
start_spider(city_name=city_name, area_name=area_name, metro_name=metro_name)
def start_spider(
city_name: str,
area_name: str,
metro_name: str):
"""
用scrapy.cmdline命令启动Scrapy
:param city_name: 城市名称
:param area_name: 区域名称
:param metro_name: 地铁线名称
"""
# 设置工程路径,在cmd 命令更改路径而执行scrapy命令调试
# 获取main文件的父目录,os.path.abspath(__file__) 为__file__文件目录
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# 运行
execute(["scrapy", "crawl", "BeiKeErShouFang",
"-a", "city={}".format(city_name),
"-a", "area={}".format(area_name),
"-a", "metro={}".format(metro_name)])
if __name__ == '__main__':
main()
# start_spider(city_name="深圳市", area_name="南山区", metro_name="")