In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
from arctic import Arctic, CHUNK_STORE
lib = Arctic('localhost')
fund = lib.get_library('fund')



In [3]:
import pandas as pd

In [4]:
symbols = fund.list_symbols()

In [67]:
def get_fund_split_data(symbol):
    url = f'http://fundf10.eastmoney.com/fhsp_{symbol}.html'
    res = requests.get(url)
    html = BeautifulSoup(res.text)
    split_data = html.find('table', attrs={'class':'w782 comm fhxq'})
    split = pd.read_html(str(split_data))[0]
    
    if split.iloc[0,0] == '暂无拆分信息!':
        return pd.DataFrame()
    
    rename = {'年份' : 'year', 
              '拆分折算日' : 'date', 
              '拆分类型' : 'splitType', 
              '拆分折算比例' : 'amount'}

    split = split.rename(columns=rename)
    
    split['date'] = pd.to_datetime(split['date'])
    split['amount'] = split['amount'].str.split(':', expand=True)[1].astype(float)

    split = split.drop(['year', 'splitType'], axis=1).set_index('date').sort_index()
    
    return split

In [73]:
def get_fund_div_data(symbol):
    
    url = 'https://stock.finance.sina.com.cn/fundInfo/api/openapi.php/FundPageInfoService.tabfh'

    data_input = {
        'symbol' : symbol, 
        'format' : 'json',
    }

    resp = requests.get(url, params=data_input)
    data = resp.json()
    fhdata = data['result']['data']['fhdata']
    
    if len(fhdata)==0:
        return pd.DataFrame()
    
    div = pd.DataFrame(fhdata).astype({'mffh':float})
    
    div = div[div['mffh'] > 0.0]
    
    div = div.drop('fhr', axis=1)

    RENAME = {
        'djr' : 'date',
        'mffh' : 'amount',
    }

    div = div.rename(columns = RENAME)
    div['date'] = pd.to_datetime(div['date'])
    # sort div data frame from old to new
    div = div.set_index('date').sort_index()
    
    return div

In [74]:
def get_fund_adj_data(symbol):
    split = get_fund_split_data(symbol).assign(type='split')
    div = get_fund_div_data(symbol).assign(type='div')
    adj = pd.concat([split, div]).sort_index()
    return adj

In [6]:
import tqdm

In [92]:
for symbol in tqdm.tqdm(symbols[2131:]):
    adj = get_fund_adj_data(symbol)
    adj.to_csv(f'adj_temp/{symbol}.csv')

100%|██████████| 1668/1668 [45:08<00:00,  1.62s/it]  


In [93]:
import os

In [115]:
lib.initialize_library('fund_adj', CHUNK_STORE)



In [116]:
fund_adj = lib.get_library('fund_adj')

In [122]:
adj_path = 'adj_temp/'
for csv in os.listdir(adj_path):
    if not csv.endswith('.csv'): 
        continue

    symbol = csv.split('.')[0]
    if fund_adj.has_symbol(symbol): 
        continue
    df = pd.read_csv(  os.path.join( adj_path, csv ), index_col=0, parse_dates=True )
    df.index.name = 'date'
    if not df.empty:
        print( 'Appending symbol {} to db'.format( symbol ) )
        fund_adj.append( symbol, df, upsert=True, chunk_size = 'M' )

Appending symbol 000655 to db
Appending symbol 002407 to db
Appending symbol 002265 to db
Appending symbol 288002 to db
Appending symbol 000025 to db
Appending symbol 240004 to db
Appending symbol 000245 to db
Appending symbol 163001 to db
Appending symbol 000334 to db
Appending symbol 003487 to db
Appending symbol 162006 to db
Appending symbol 540006 to db
Appending symbol 519733 to db
Appending symbol 000190 to db
Appending symbol 370023 to db
Appending symbol 000128 to db
Appending symbol 180020 to db
Appending symbol 002435 to db
Appending symbol 002667 to db
Appending symbol 510030 to db
Appending symbol 290007 to db
Appending symbol 519697 to db
Appending symbol 003665 to db
Appending symbol 003615 to db
Appending symbol 001202 to db
Appending symbol 379010 to db
Appending symbol 450004 to db
Appending symbol 320008 to db
Appending symbol 541005 to db
Appending symbol 163823 to db
Appending symbol 160633 to db
Appending symbol 510120 to db
Appending symbol 002009 to db
Appending 

In [123]:
len(fund_adj.list_symbols())

2391