In [10]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
from datetime import datetime
import matplotlib.pyplot as plt
import requests
import requests_cache
import time
import math
from sqlalchemy import create_engine
import execjs
import random
import io
import json
import os

engine = create_engine('sqlite:///stock.sqlite')
conn = engine.connect()

historical_data_csv_path = 'netease/historical_data/'
tmp_path = 'tmp/netease/'

exchange_types = {
    '上交所': 0,
    '深交所': 1
}

def update_prices(start, end):
    allstocks = pd.read_sql_table('stock_list', engine.connect())
    allstocks.apply(lambda x: download_historical_prices(x['代码'], x['交易所'], start, end), axis=1)
    
    error_stocks = []
    for root, dirs, files in os.walk(tmp_path, topdown=False):
        for file in files:
            filepath = root + file
            try :
                df = pd.read_csv(filepath)
                regular(df).to_sql('historical_prices', engine, index=False, if_exists='append')
            except :
                error_stocks.append(file)
    return error_stocks

def download_historical_prices(code, exchange, start, end, fields=None):
    fields = 'TCLOSE;LCLOSE;TURNOVER;VOTURNOVER;VATURNOVER;TCAP;MCAP' if fields == None else fields
    url = 'http://quotes.money.163.com/service/chddata.html'
    headers = {
        'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/56.0.2924.87 Safari/537.36',
    }
    data = {
        'code': str(exchange) + str(code),
        'start': start,
        'end': end,
        'fields': fields,
    }
    r = requests.post(url, data=data, headers=headers)
    r.encoding='gb2312'
    filepath = tmp_path + str(code) + '.csv'
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write(r.text)
        f.close()
        
def update_today_prices():
    today = datetime.now().strftime('%Y%m%d')
    update_prices(today, today)

def get_netease_zycwzb(code):
    url = 'http://quotes.money.163.com/service/zycwzb_' + str(code) + '.html'
    data = {
        'type': 'report'
    }
    headers = {
        'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/56.0.2924.87 Safari/537.36',
        'Referer': 'http://www.sse.com.cn/assortment/stock/list/share/'
    }
    r = requests.post(url, data=data, headers=headers)
    return pd.read_csv(io.StringIO(r.text), index_col=0)

def update_historical_data_from_netease(end=None):
    if(end == None):
        end = datetime.now().strftime('%Y%m%d')
    stocks = pd.read_sql_table('stock_list', conn, index_col='index')
    stocks.apply(lambda row: get_netease_historical_data(exchange_types[row['交易所']], row['代码'], end=end), axis=1)

def regular(df):
    df.sort_index(inplace=True)
    df['股票代码'] = df['股票代码'].apply(lambda x: x.lstrip("'"))
    df['收盘价'].replace(0, np.NaN, inplace=True)
    df['收盘价'].fillna(method='ffill' , inplace=True)
    df.dropna(inplace=True)
    return df

def restoration_of_rights(df):
    df['收益率'] = df['收盘价'] / df['前收盘'] - 1
    df.iloc[0,-1] = np.NaN
    df['后复权'] = df.iloc[0]['收盘价'] * (1 + df.loc[:,'收益率']).cumprod()
    return df

In [11]:
update_today_prices()

In [18]:
%%time
sql = 'select * from historical_prices where 日期="2020-12-16"'
pd.read_sql_query(sql, conn)

CPU times: user 3.01 s, sys: 3.59 s, total: 6.6 s
Wall time: 34.2 s


Unnamed: 0,日期,股票代码,名称,收盘价,前收盘,换手率,成交量,成交金额,总市值,流通市值
0,2020-12-16,000514,渝 开 发,3.80,3.82,0.1751,1477576,5.626066e+06,3.206330e+09,3.206330e+09
1,2020-12-16,002138,顺络电子,25.06,25.39,0.8590,6926407,1.746170e+08,2.020634e+10,2.020634e+10
2,2020-12-16,000903,云内动力,4.19,4.17,2.0912,41212604,1.735113e+08,8.257656e+09,8.257656e+09
3,2020-12-16,601882,海天精工,11.92,11.80,0.2812,1467932,1.739449e+07,6.222240e+09,6.222240e+09
4,2020-12-16,002296,辉煌科技,7.08,7.26,0.6925,2608500,1.870117e+07,2.758767e+09,2.666727e+09
...,...,...,...,...,...,...,...,...,...,...
4115,2020-12-16,002481,双塔食品,16.24,15.41,1.6158,20090541,3.214563e+08,2.019265e+10,2.019265e+10
4116,2020-12-16,603016,新宏泰,24.98,24.60,0.4452,659635,1.633112e+07,3.701037e+09,3.701037e+09
4117,2020-12-16,600856,*ST中天,1.49,1.49,0.6422,8627988,1.303700e+07,2.036315e+09,2.001699e+09
4118,2020-12-16,603203,快克股份,31.70,31.47,0.6140,961192,3.031842e+07,4.962121e+09,4.962121e+09


In [3]:
update_prices('19900101', '20201216')

['603618.csv',
 '000565.csv',
 '300643.csv',
 '300647.csv',
 '000507.csv',
 '688013.csv']

In [4]:
# pd.read_csv('tmp/netease/688018.csv')
# f = open('tmp/netease/600856.csv', 'r')
# a = f.read()
# print(a)
# f.close()

In [5]:
%%time
dfs = pd.read_sql_table('stock_list', conn)
dfs['代码'][0]

CPU times: user 36.6 ms, sys: 8.1 ms, total: 44.7 ms
Wall time: 44.3 ms


'600000'