forked from GallenQiu/FinanceReportAnalysis
-
Notifications
You must be signed in to change notification settings - Fork 1
/
新浪财经数据_mg_year.py
127 lines (97 loc) · 3.77 KB
/
新浪财经数据_mg_year.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# -*- coding : utf-8 -*- #
__author__ = "Gallen_qiu"
'''最近5年的财报'''
import requests,json,time,pymongo
from bs4 import BeautifulSoup
from multiprocessing import Queue
from concurrent.futures import ThreadPoolExecutor
from pymongo.collection import Collection
class Xinalang():
def __init__(self):
self.queue=Queue()
self.info=[]
self.dict_list=[]
def req(self,ninfo):
try:
info=json.loads(ninfo)
scode=info["SECCODE"]
year=info["year"]
# print(scode,year)
data_=info
url0='http://money.finance.sina.com.cn/corp/go.php/vFD_BalanceSheet/stockid/{}/ctrl/{}/displaytype/4.phtml'.format(scode,year)
url1='http://money.finance.sina.com.cn/corp/go.php/vFD_ProfitStatement/stockid/{}/ctrl/{}/displaytype/4.phtml'.format(scode,year)
url2='http://money.finance.sina.com.cn/corp/go.php/vFD_CashFlow/stockid/{}/ctrl/{}/displaytype/4.phtml'.format(scode,year)
url_list=[]
url_list.extend([url0,url1,url2])
# data_year=[]
data = {}
for url in url_list:
headers={}
response=requests.get(url,headers=headers,timeout=5)
soup=BeautifulSoup(response.content.decode("gb2312"),"lxml")
'''报表日期'''
trs = soup.select("tbody tr")
for tr in trs:
tds=tr.select("td")
if tds != []:
# print(tds)
try:
value = tds[1].text
if value == "--":
value = 0.00
try:
data[tds[0].text] = float(value)
except:
data[tds[0].text] = value
except:
pass
# data_year.append(data)
data_.update(data)
# data_["data"]=data_year
print(info["SECNAME"],info["year"])
self.dict_list.append(data_)
except TimeoutError:
print("超时")
self.info.append(ninfo)
except:
print("其他错误")
print("其他错误")
info = json.loads(ninfo)
print(info["SECNAME"], info["year"])
def scheduler(self):
year_list=[2014,2015,2016,2017,2018]
with open("D:\python文件库\项目\Financal analysis\A股数据分析\stockCode.txt",encoding="utf8") as f:
lines=f.readlines()
for line in lines:
info=json.loads(line)
for year in year_list:
info["year"]=year
info_str=json.dumps(info)
# print(json.loads(info_str))
self.queue.put(info_str)
pool=ThreadPoolExecutor(max_workers=8)
while self.queue.qsize()>0:
pool.submit(self.req, self.queue.get())
pool.shutdown()
print("剩下:"+str(len(self.info)))
while len(self.info)>0:
self.req(self.info.pop())
self.write_json()
def write_json(self):
try:
# 建立连接
client = pymongo.MongoClient('localhost', 27017)
# 建立数据库
db = client["XinlangFinance"]
# 从原有的txt文件导入share_id:
# 表的对象化
mgtable = Collection(db, 'FinanceReport_data')
mgtable.insert_many(self.dict_list)
except:
print("写入出错!!")
pass
if __name__ == '__main__':
start_time=time.time()
X = Xinalang()
X.scheduler()
print("总耗时:{}秒".format(time.time()-start_time))