-
Notifications
You must be signed in to change notification settings - Fork 0
/
settings.py
70 lines (57 loc) · 1.43 KB
/
settings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
# Scrapy settings for reddit project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
#
BOT_NAME = 'reddit'
SPIDER_MODULES = ['reddit.spiders']
NEWSPIDER_MODULE = 'reddit.spiders'
RETRY_HTTP_CODES = [500, 502, 503, 504, 400, 403, 408, 429, 301, 303]
# Maximum number of times to retry, in addition to the first download.
RETRY_TIMES = 100
DOWNLOAD_DELAY = 50
PROXY = 'http://localhost:8118'
DOWNLOADER_MIDDLEWARES = {
'reddit.middlewares.RandomUserAgentMiddleware': 400,
'reddit.middlewares.ProxyMiddleware': 410,
'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None
}
ITEM_PIPELINES = {
'reddit.pipelines.RedditPipeline': 5
}
FEED_EXPORTERS = {
'csv': 'reddit.feedexport.CSVkwItemExporter',
'csv-comment': 'reddit.feedexport.CSVkwCommentItemExporter'
}
# By specifying the fields to export, the CSV export honors the order
# rather than using a random order.
EXPORT_FIELDS = [
# "url",
"thread",
"op",
"subreddit",
"date",
"time",
"sender",
"receiver",
"dogecoin",
"usd"
]
COMMENT_EXPORT_FIELDS = [
"url",
"thread",
"op",
"thread_date",
"textpost",
"comments",
"vote_points",
"upvoted",
"comment",
"user",
"time"
]
# Delimiter for csv file
CSV_DELIMITER = ','