In [1]:
import baostock as bs
import pandas as pd
from datetime import datetime, timedelta

In [2]:
lg = bs.login()
print(f'login respond code: {lg.error_code}')
print(f'login respond msg: {lg.error_msg}')
params = ','.join(['date', 'open', 'high', 'low', 'close', 'preclose','volume','amount','turn', 'tradestatus', 'pctChg','isST'])

login success!
login respond code: 0
login respond msg: success


In [3]:
def query_history_k_data_plus_with_df(**kwargs) -> pd.DataFrame:
    rs = bs.query_history_k_data_plus(**kwargs)
    data_list = []
    if rs.error_code!='0':
        raise Exception(f'error in fetch message: {rs.error_msg}')
    while rs.error_code == '0' and rs.next():
        data_list.append(rs.get_row_data())
    return pd.DataFrame(data_list, columns=rs.fields)

In [4]:
def fill_suspension(raw_df: pd.DataFrame, start_date: str, end_date: str) -> pd.DataFrame:
    start = datetime.strptime(start_date,'%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    date_counter = dict()
    columns = raw_df.columns.tolist()
    date_index = columns.index('date')
    close_index = columns.index('close')
    for r in raw_df.values:
        date_counter[r[date_index]] = list(r)
    first_record = raw_df.iloc[0]
    first_date = datetime.strptime(first_record['date'],'%Y-%m-%d')
    current = start
    last_close = first_record['preclose']
    while current < first_date:
        current_str = current.strftime('%Y-%m-%d')
        date_counter[current_str] = [current_str,last_close, last_close, last_close,last_close,last_close,0,0.0,0.0,0,0.0,0]
        current = current + timedelta(days=1)
    while current <= end:
        current_str = current.strftime('%Y-%m-%d')
        if date_counter.get(current_str) is None:
            last_day_str = (current + timedelta(days=-1)).strftime('%Y-%m-%d')
            last = date_counter.get(last_day_str) 
            last_close = last[close_index]
            date_counter[current_str] = [current_str,last_close, last_close, last_close,last_close,last_close,0,0.0,0.0,0,0.0,0] 
        current = current + timedelta(days=1)
    new_data = sorted(date_counter.values(),key=lambda x: x[date_index])
    return pd.DataFrame(new_data,columns=columns)

In [5]:
import os
import csv

def load_history_k_data_plus_with_df(**kwargs) -> pd.DataFrame:
    code = kwargs.get('code')
    frequency = kwargs.get('frequency')
    adjust = kwargs.get('adjustflag')
    path = os.path.join('.','resources',f'{code}-{frequency}-{adjust}.csv')
    if not os.path.exists(path):
        rs = query_history_k_data_plus_with_df(**kwargs)
        rs = fill_suspension(rs, kwargs.get('start_date'), kwargs.get('end_date'))
        rs.to_csv(path, index=False, encoding='utf-8', quoting=csv.QUOTE_NONNUMERIC)
    else:
        rs = pd.read_csv(path, quoting=csv.QUOTE_NONNUMERIC)
    return rs

In [6]:

start_date = '2006-01-01'
end_date = '2019-10-20'

In [7]:
# back adjust
rs_ba = load_history_k_data_plus_with_df(code="sh.000001",start_date=start_date, end_date=end_date,fields=params,frequency='d', adjustflag = '1')
# front adjust
rs_ba = load_history_k_data_plus_with_df(code="sh.000001",start_date=start_date, end_date=end_date,fields=params,frequency='d', adjustflag = '2')
# no adjust
rs_no  = load_history_k_data_plus_with_df(code="sh.000001",start_date=start_date, end_date=end_date,fields=params,frequency='d', adjustflag = '3')

In [8]:
rs_ba.head()

Unnamed: 0,date,open,high,low,close,preclose,volume,amount,turn,tradestatus,pctChg,isST
0,2006-01-01,1161.057,1161.057,1161.057,1161.057,1161.057,0,0.0,0.0,0,0.0,0
1,2006-01-02,1161.057,1161.057,1161.057,1161.057,1161.057,0,0.0,0.0,0,0.0,0
2,2006-01-03,1161.057,1161.057,1161.057,1161.057,1161.057,0,0.0,0.0,0,0.0,0
3,2006-01-04,1163.878,1181.004,1161.906,1180.963,1161.057,2325854200,11970425035.0,0.015049,1,1.714473,0
4,2006-01-05,1183.305,1197.837,1180.451,1197.269,1180.963,2948447900,14767201450.0,0.019075,1,1.38074,0
