# Stock Trade Information

In [22]:
import sys
import importlib
import numpy as np
import pandas as pd
import akshare as ak
from joblib import Parallel, delayed
from libs import AkShare, format_code

## Get the stock trade for the day

In [3]:
# benchmark: 25min
start = '20050101'
codes = list(map(format_code, AkShare.stock_quote(code_only=True)))
stock_data = Parallel(n_jobs=-1, backend='threading')(delayed(AkShare.market_daily)(code, start=start) for code in codes)
data = pd.concat(stock_data, axis=0, keys=codes, names=['instrument', 'datetime'], sort=True)
data = data.swaplevel().sort_index()
data.to_parquet('../data/raw_data/em_daily_stock/daily_stock.parquet', compression='gzip')

After fetching data and store in `data` variable in memory, we need to dump it into the disk

## Stock Financial Report Collector

In [None]:
def get_balance_sheet(code):
    """Only returns numerical balance_sheet_data"""
    try:
        data = ak.stock_balance_sheet_by_report_em(symbol=code)
        if data.empty:
            return None
        data = data.loc[:, data.columns[data.astype('f8', 
            errors='ignore').dtypes != 'object'].union(['SECUCODE', 'REPORT_DATE', 'NOTICE_DATE'])]
        data = data.replace({None: np.nan})
        data = data.astype('f8', errors='ignore')
        data[['REPORT_DATE', 'NOTICE_DATE']] = data[['REPORT_DATE', 'NOTICE_DATE']].astype('datetime64[ns]')
        data = data.set_index('REPORT_DATE')
        data = data.reindex(pd.date_range(data.index.min(), data.index.max(), freq='q'))
        data.index.name = "REPORT_DATE"
        data = data.reset_index()
        data['SECUCODE'] = data['SECUCODE'][~data['SECUCODE'].isna()].iloc[0]
        return data
    except:
        return None

In [None]:
codes = ak.stock_zh_a_spot_em()['代码'].to_list()
joblibres = Parallel(n_jobs=8)(delayed(get_balance_sheet)(code) for code in codes)