# Stock Trade Information

In [None]:
import sys
import importlib
import numpy as np
import pandas as pd
import akshare as ak
from functools import partial
from joblib import Parallel, delayed
from libs import AkShare, format_code

## Get the stock trade daily

In [None]:
# benchmark: 25min
start = '20050101'
codes = list(map(format_code, AkShare.stock_quote(code_only=True)))
stock_data = Parallel(n_jobs=-1, backend='threading')(delayed(AkShare.market_daily)(code, start=start) for code in codes)
data = pd.concat(stock_data, axis=0, keys=codes, names=['instrument', 'datetime'], sort=True)
data = data.swaplevel().sort_index()
data.to_parquet('../data/raw_data/em_daily_stock/daily_stock.parquet', compression='gzip')

After fetching data and store in `data` variable in memory, we need to dump it into the disk

## Stock Financial Report Collector

In [None]:
# benchmark: 23m+
format_code_partial = partial(format_code, format_str="{market}{code}")
codes = list(map(format_code_partial, ak.stock_zh_a_spot_em()['代码'].to_list()))
# comment the `akshare/stock_feature/stock_three_report_em.py:56` tqdm part
joblibres = Parallel(n_jobs=12, backend='loky')(delayed(AkShare.balance_sheet)(code) for code in codes)
data = pd.concat(joblibres)
data = data.sort_index()
data.to_parquet('../data/financials/balance_sheet.parquet', compression='gzip')

In [None]:
# benchmark: 18m+
format_code_partial = partial(format_code, format_str="{market}{code}")
codes = list(map(format_code_partial, ak.stock_zh_a_spot_em()['代码'].to_list()))
# comment the `akshare/stock_feature/stock_three_report_em.py:56` tqdm part
joblibres = Parallel(n_jobs=12, backend='loky')(delayed(AkShare.profit_sheet)(code) for code in codes)
data = pd.concat(joblibres)
data = data.sort_index()
data.to_parquet('../data/financials/profit_sheet.parquet', compression='gzip')

In [None]:
# benchmark: 18m+
format_code_partial = partial(format_code, format_str="{market}{code}")
codes = list(map(format_code_partial, ak.stock_zh_a_spot_em()['代码'].to_list()))
# comment the `akshare/stock_feature/stock_three_report_em.py:56` tqdm part
joblibres = Parallel(n_jobs=12, backend='loky')(delayed(AkShare.cashflow_sheet)(code) for code in codes)
data = pd.concat(joblibres)
data = data.sort_index()
data.to_parquet('../data/financials/cashflow_sheet.parquet', compression='gzip')