In [1]:
import pandas as pd
import numpy as np
from IPython.display import display

## External DB
### 1) **Simulatable** instruments
- Filtering conditions:

- e.g.: `itemtype` may be ETF, stocks and etc.

In [2]:
itemtype = 'ETF'

#### Load external data formatted as plain files, and then pickle them.
- We <span class="mark">drop</span> any instrument if:
  - `min_datapoints`: <span class="mark"># of data points >= 3-year long.</span>

In [3]:
data_files = ['price_d', 'volume_d', 'price_w', 'volume_w', 'price_m', 'volume_m']
filepath = '../../data/external/'
df = {}
for filename in data_files:
    df[filename] = pd.read_csv(filepath + filename + '.dat', header=7)
    df[filename].drop(range(0,6), inplace=True)
    df[filename].rename(columns={'Code': 'date'}, inplace=True)
    df[filename].date = pd.to_datetime(df[filename].date)
    name_cols = df[filename].columns[1:]
    df[filename][name_cols] = df[filename][name_cols].apply(pd.to_numeric, errors='coerce', axis=1)


#### Merge data sets and melt them into one tall and this data frame.
- `df_db[freq]` is your final data frame.
  - `freq` = {d|w|m} for different frequencies.
- df_db[freq].`trading_amt_mln` is a three-month average trading amount.

In [4]:
df_pr = {}
df_vol = {}
df_db = {}

frequency = ['d', 'w', 'm']
# We need an extra 1 record for return calcaultion.
min_datapoints = {'d': 365*3+1, 'w': 52*3+1, 'm': 12*3+1}
window_3m = {'d': 90, 'w': 12, 'm': 3}

for freq in frequency:
    pr = df['price_'+freq].dropna(thresh=min_datapoints[freq], axis=1).dropna()
    vol = df['volume_'+freq].dropna(thresh=min_datapoints[freq], axis=1).dropna()
    df_pr[freq] = pd.melt(pr, id_vars=['date'], var_name='itemcode', value_name='price')
    df_vol[freq] = pd.melt(vol, id_vars=['date'], var_name='itemcode', value_name='volume')
    df_db[freq]= pd.merge(df_pr[freq], df_vol[freq], left_on=['date', 'itemcode'], right_on=['date', 'itemcode'], how='outer')
    df_db[freq] = df_db[freq].assign(trading_amt_mln=(df_db[freq].price*df_db[freq].volume).divide(10**6).rolling(window_3m[freq], min_periods=1).mean())
    df_db[freq] = df_db[freq].assign(ret=np.log(1+df_db[freq].groupby('itemcode').price.pct_change()))
    df_db[freq]['itemtype'] = itemtype
#     df_db[freq].to_pickle(filepath + 'price_db_' + freq + '.pkl')

### 2) **Non-tradable** instruments
- e.g.: rates

In [5]:
df['ecos_w'] = pd.read_csv(filepath + 'ecos_w' + '.dat', header=3, parse_dates=['date'])
df['ecos_w']['itemcode'] = 'CALL'
df['ecos_w']['itemtype'] = 'riskfree'
df['ecos_w'].rename(columns={'call':'price'}, inplace=True)

In [6]:
df_db['w'] = pd.concat([df_db['w'], df['ecos_w']])

## Pickle final restuls

`price_db_#.pkl`

- Upsampling (business days -> everyday)

In [7]:
df_db['d'] = df_db['d'].set_index('date').groupby('itemcode').resample('D').fillna('pad').reset_index(level=0, drop=True).reset_index()

In [8]:
for freq in frequency:
    df_db[freq].to_pickle(filepath + 'price_db_' + freq + '.pkl')

`simulatable_instruments.pkl`

In [9]:
df_simulatables = df_db['w'][df_db['w'].itemtype=='ETF']
simulatable_instruments = df_simulatables.loc[df_simulatables.date==df_simulatables.date.max()].drop(['date', 'ret'], axis=1).reset_index(drop=True)
simulatable_instruments.to_pickle(filepath + 'simulatable_instruments.pkl')

In [10]:
for freq in frequency:
    display(df_db[freq])

Unnamed: 0,date,itemcode,price,volume,trading_amt_mln,ret,itemtype
0,2016-08-17,A069500,23459.0,2999724.0,70370.525316,,ETF
1,2016-08-18,A069500,23629.0,3610366.0,77839.931765,0.007221,ETF
2,2016-08-19,A069500,23684.0,3608250.0,80379.218843,0.002325,ETF
3,2016-08-20,A069500,23684.0,3608250.0,80379.218843,0.002325,ETF
4,2016-08-21,A069500,23684.0,3608250.0,80379.218843,0.002325,ETF
...,...,...,...,...,...,...,...
39348,2021-04-19,A251350,18470.0,93497.0,2958.409974,0.004885,ETF
39349,2021-04-20,A251350,18395.0,112891.0,2976.973377,-0.004069,ETF
39350,2021-04-21,A251350,18180.0,188809.0,3012.059288,-0.011757,ETF
39351,2021-04-22,A251350,18425.0,29159.0,3015.940633,0.013386,ETF


Unnamed: 0,date,itemcode,price,volume,trading_amt_mln,ret,itemtype
0,2018-03-30,A069500,29992.0000,8620284.0,258539.557728,,ETF
1,2018-04-06,A069500,29664.0000,6384268.0,223961.241840,-0.010996,ETF
2,2018-04-13,A069500,29997.0000,6843943.0,217740.080617,0.011163,ETF
3,2018-04-20,A069500,30278.0000,4767086.0,199389.517940,0.009324,ETF
4,2018-04-27,A069500,30550.0000,7854372.0,207501.827272,0.008943,ETF
...,...,...,...,...,...,...,...
742,2021-03-26,CALL,0.0048,,,,riskfree
743,2021-04-02,CALL,0.0044,,,,riskfree
744,2021-04-09,CALL,0.0049,,,,riskfree
745,2021-04-16,CALL,0.0048,,,,riskfree


Unnamed: 0,date,itemcode,price,volume,trading_amt_mln,ret,itemtype
0,2018-03-30,A069500,29992.0,8620284.0,258539.557728,,ETF
1,2018-04-30,A069500,30750.0,4494562.0,198373.669614,0.024959,ETF
2,2018-05-31,A069500,29607.0,18580502.0,315620.087314,-0.037879,ETF
3,2018-06-29,A069500,28603.0,14304926.0,365828.167531,-0.034499,ETF
4,2018-07-31,A069500,28436.0,5386124.0,370812.181052,-0.005856,ETF
...,...,...,...,...,...,...,...
1068,2020-11-30,A237370,11225.0,5554.0,56.002922,0.027547,ETF
1069,2020-12-30,A237370,11485.0,15390.0,91.878707,0.022898,ETF
1070,2021-01-29,A237370,11670.0,7987.0,110.768697,0.015980,ETF
1071,2021-02-26,A237370,11730.0,10908.0,132.637760,0.005128,ETF


In [11]:
display(simulatable_instruments)

Unnamed: 0,itemcode,price,volume,trading_amt_mln,itemtype
0,A069500,43425.0,4199882.0,339086.018967,ETF
1,A105190,43770.0,173908.0,16654.415431,ETF
2,A102110,43290.0,770037.0,44294.68989,ETF
3,A232080,15085.0,90507.0,2925.09328,ETF
4,A278540,13850.0,347864.0,5962.748135,ETF
5,A229200,15025.0,1534186.0,47941.22846,ETF
6,A196230,104410.0,69091.0,55410.362766,ETF
7,A293180,43440.0,11583.0,2707.662064,ETF
8,A130730,101070.0,1048977.0,88507.386146,ETF
9,A122260,101290.0,13766.0,29709.41133,ETF


In [13]:
df['price_d']

Unnamed: 0,date,A069500,A105190,A102110,A232080,A278540,A229200,A196230,A371460,A293180,...,A153130,A364990,A266370,A114260,A195930,A273130,A245340,A329750,A329650,A237370
6,2006-12-28,14900.0,,,,,,,,,...,,,,,,,,,,
7,2007-01-02,14952.0,,,,,,,,,...,,,,,,,,,,
8,2007-01-03,14739.0,,,,,,,,,...,,,,,,,,,,
9,2007-01-04,14526.0,,,,,,,,,...,,,,,,,,,,
10,2007-01-05,14424.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3540,2021-04-19,43850.0,44205.0,43730.0,15330.0,13980.0,15250.0,104410.0,10990.0,43865.0,...,102800.0,11920.0,21995.0,58100.0,13625.0,109325.0,19200.0,9575.0,11135.0,12035.0
3541,2021-04-20,44125.0,44465.0,43960.0,15275.0,14070.0,15190.0,104405.0,10965.0,44120.0,...,102805.0,11920.0,22060.0,58055.0,13585.0,109195.0,19160.0,9535.0,11100.0,12065.0
3542,2021-04-21,43335.0,43680.0,43215.0,15060.0,13850.0,14990.0,104405.0,11035.0,43355.0,...,102805.0,11700.0,21640.0,58085.0,13315.0,109160.0,19080.0,9570.0,11070.0,12015.0
3543,2021-04-22,43400.0,43760.0,43285.0,15115.0,13850.0,15020.0,104415.0,11055.0,43430.0,...,102810.0,11565.0,21660.0,58100.0,13495.0,109240.0,19215.0,9570.0,11095.0,12055.0


In [14]:
pd.read_pickle('../../data/external/price_db_d-old.pkl')

Unnamed: 0,date,itemcode,price,volume,trading_amt_mln,ret,itemtype
0,2018-03-30,A069500,29992.0000,8620284.0,258539.557728,,ETF
1,2018-04-02,A069500,29922.0000,5388437.0,209886.184821,-0.002337,ETF
2,2018-04-03,A069500,29861.0000,11930407.0,258675.417690,-0.002041,ETF
3,2018-04-04,A069500,29401.0000,5962195.0,237830.187066,-0.015525,ETF
4,2018-04-05,A069500,29809.0000,7580651.0,235458.474785,0.013782,ETF
...,...,...,...,...,...,...,...
3510,2021-03-16,CALL,0.0047,,,,riskfree
3511,2021-03-17,CALL,0.0046,,,,riskfree
3512,2021-03-18,CALL,0.0048,,,,riskfree
3513,2021-03-19,CALL,0.0047,,,,riskfree


In [16]:
pd.read_pickle('../../data/external/price_db_d.pkl')

Unnamed: 0,date,itemcode,price,volume,trading_amt_mln,ret,itemtype
0,2018-03-30,A069500,29992.0000,8620284.0,258539.557728,,ETF
1,2018-03-31,A069500,29992.0000,8620284.0,258539.557728,,ETF
2,2018-04-01,A069500,29992.0000,8620284.0,258539.557728,,ETF
3,2018-04-02,A069500,29922.0000,5388437.0,209886.184821,-0.002337,ETF
4,2018-04-03,A069500,29861.0000,11930407.0,258675.417690,-0.002041,ETF
...,...,...,...,...,...,...,...
37735,2021-04-19,CALL,0.0049,,,,riskfree
37736,2021-04-20,CALL,0.0047,,,,riskfree
37737,2021-04-21,CALL,0.0048,,,,riskfree
37738,2021-04-22,CALL,0.0049,,,,riskfree


In [17]:
pd.read_pickle('../../data/processed/advised_portfolios.pkl')

Unnamed: 0,date,risk_profile,itemcode,weights,tracking_code,itemname,price,volume,trading_amt_mln,asset_class
0,2019-01-02,2,A273130,0.240000,ACT_BOND_D4,KODEX 종합채권(AA-이상)액티브,104320.0,26468.0,858.104496,Fixed Income
1,2019-01-02,2,A214980,0.240000,MM_PLUS,KODEX 단기채권PLUS,100050.0,10160.0,508.874866,Fixed Income
2,2019-01-02,2,A196230,0.207716,BOK_D.4,KBSTAR 단기통안채,101423.0,393085.0,16568.430895,Fixed Income
3,2019-01-02,2,A114260,0.100000,KTB_D3,KODEX 국고채3년,55690.0,5449.0,251.127486,Fixed Income
4,2019-01-02,2,A122260,0.092284,BOK_D1,KOSEF 통안채1년,97761.0,42453.0,13238.397304,Fixed Income
...,...,...,...,...,...,...,...,...,...,...
19314,2021-04-27,4,A237370,0.240000,BAL_KO3KTB7,KODEX 배당성장채권혼합,12120.0,6424.0,123.481189,Alternative
19315,2021-04-27,4,A278540,0.240000,MSCI_KR,KODEX MSCI Korea TR,13980.0,320774.0,16041.347167,Equity
19316,2021-04-27,4,A266370,0.136000,IT,KODEX IT,21835.0,18570.0,1113.118976,Equity
19317,2021-04-27,4,A292150,0.084000,TOP10,TIGER TOP10,14880.0,244322.0,15103.241318,Equity


In [18]:
pd.read_pickle('../../data/processed/advised_portfolios-updated.pkl')

Unnamed: 0,date,risk_profile,itemcode,weights,tracking_code,itemname,price,volume,trading_amt_mln,asset_class
0,2019-01-02,2,A273130,0.240000,ACT_BOND_D4,KODEX 종합채권(AA-이상)액티브,104320.0,26468.0,858.104496,Fixed Income
1,2019-01-02,2,A214980,0.240000,MM_PLUS,KODEX 단기채권PLUS,100050.0,10160.0,508.874866,Fixed Income
2,2019-01-02,2,A196230,0.207716,BOK_D.4,KBSTAR 단기통안채,101423.0,393085.0,16568.430895,Fixed Income
3,2019-01-02,2,A114260,0.100000,KTB_D3,KODEX 국고채3년,55690.0,5449.0,251.127486,Fixed Income
4,2019-01-02,2,A122260,0.092284,BOK_D1,KOSEF 통안채1년,97761.0,42453.0,13238.397304,Fixed Income
...,...,...,...,...,...,...,...,...,...,...
19314,2021-04-27,4,A237370,0.240000,BAL_KO3KTB7,KODEX 배당성장채권혼합,12120.0,6424.0,123.481189,Alternative
19315,2021-04-27,4,A278540,0.240000,MSCI_KR,KODEX MSCI Korea TR,13980.0,320774.0,16041.347167,Equity
19316,2021-04-27,4,A266370,0.136000,IT,KODEX IT,21835.0,18570.0,1113.118976,Equity
19317,2021-04-27,4,A292150,0.084000,TOP10,TIGER TOP10,14880.0,244322.0,15103.241318,Equity


In [19]:
len([])

0