In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import statsmodels.api as sm

In [2]:
krx = '/data/hun/KRX_marketdata/'

## 1. 파일 가져오기

#### Ticker

In [3]:
#이건 industry code가 없음
tickers = pd.read_csv(krx+'ticker_recent.csv', encoding ='cp949')
tickers.columns = ('full_ticker', 'ticker', 'name_full', 'name',
              'name_eng', 'listed_data', 'market', 'security_category',
              'related_department', 'preferred', 'face_value', 'shares')
tickers = tickers.drop(['related_department'], axis=1).dropna().reset_index(drop=True)

#### Price

In [23]:
prices = pd.read_csv(krx+"krx_marketdata_monthly.csv")
prices['ticker'] = prices.ticker.apply(lambda x: '0'*(6-len(str(x)))+str(x))

  interactivity=interactivity, compiler=compiler, result=result)


In [25]:
prices = pd.merge(prices, tickers[['ticker','shares']], how = 'left', on='ticker')
prices.dropna(inplace = True)

#### Finance

In [31]:
fin = pd.read_csv(krx+"krx_fin_monthly2.csv")
fin['date'] = fin.date.apply(lambda x: f'{str(x)[:4]}/{str(x)[5:7]}')
fin['ticker'] = fin.ticker.apply(lambda x: '0'*(6-len(str(x)))+str(x))
fin.drop(['close','daily_change','daily_return','date2'], axis=1, inplace = True)
fin.rename(columns={'주당배당금': 'div', '배당수익률':'div_ret'}, inplace = True)

In [32]:
prices = pd.merge(prices, fin[["date","ticker","PER","PBR","div_ret"]], how='left', on=['ticker','date'])

#### Macro

In [35]:
#이자율
interest1 = pd.read_excel(krx+'stat_107302.xls')
interest2 = pd.read_excel(krx+'stat_107301.xls')
interest = pd.concat([interest1.transpose(),interest2.transpose()]).reset_index()
int_col = ['date','tb3y','tb5y','tb10y','cb3y','cd3m','call','br']
interest.columns = int_col
interest.drop([0],axis=0, inplace=True)
interest["date"] = interest.date.apply(lambda x: f'{x[:4]}/{x[4:6]}')
interest.replace('-', np.nan, inplace=True)

In [36]:
interest.drop(['tb10y','br'], axis=1, inplace =True)

In [38]:
prices = pd.merge(prices, interest, how = 'left', on ='date')

In [80]:
prices[interest.columns[1:]] = prices[interest.columns[1:]].astype('float')

In [39]:
# bpr = nan인 경우, 우량주 등
prices.drop(prices[prices.PBR.isna()].index, inplace = True)

## 팩터 전처리

In [475]:
# prices.to_csv(krx+'price_before_factor_from2010', index = False)

In [41]:
prices['size'] = prices.close * prices.shares

In [42]:
prices['ret'] = 0
for i in tqdm(tickers.ticker):
    stock = prices[prices.ticker == i]
    ind = stock.index
    prices.loc[ind, 'ret'] = stock.close.pct_change(-1).shift(1)

100%|██████████| 2600/2600 [01:00<00:00, 43.32it/s]


In [43]:
dates = prices.date.unique()
len(dates)

297

In [44]:
def size_bin(data: pd.Series) -> pd.Series:
    size_rnk = data.rank(pct=True)
    size_bin = pd.cut(size_rnk, bins=[0, 0.2, 0.4, 0.6, 0.8, 1], include_lowest=True, labels=[1,2,3,4,5])
    return size_bin.astype('int')

def size_bin_factor(data: pd.Series) -> pd.Series:
    size_rnk = data.rank(pct=True)
    size_bin = pd.cut(size_rnk, bins=[0, 0.5, 1], include_lowest=True, labels=[1,2])
    return size_bin.astype('int')

In [45]:
##시총가중 월별수익률
def size_weighted_return(data: pd.DataFrame) -> int:
    size_sum = data['size'].sum()
    weighted_return = data.apply(lambda x: x['size']*x['ret'], axis=1).sum()
    return weighted_return/size_sum

In [52]:
# size_bin, size_weighted_return
prices['size_bin'] = 0
prices['SW_ret'] = 0
for i in tqdm(dates):
    stock = prices[prices.date == i]
    if len(stock) ==0:
        continue
    else:
        ind = stock.index
        prices.loc[ind, 'size_bin'] =  size_bin(stock['size'])
        prices.loc[ind, 'SW_ret'] = size_weighted_return(stock)

100%|██████████| 297/297 [00:14<00:00, 20.77it/s]


In [66]:
prices['RM_RF'] = prices['SW_ret'] - prices['cd3m'].astype('float')/1200

In [67]:
def inv(x):
    if x != 0:
        return 1/x
    else:
        return 0
def HML_bin(data: pd.Series) -> pd.Series:
    data = data.apply(inv)
    size_rnk = data.rank(pct=True)
    size_bin = pd.cut(size_rnk, bins=[0, 0.2, 0.4, 0.6, 0.8, 1], include_lowest=True, labels=[1,2,3,4,5])
    try:
        return size_bin.astype('int')
    except:
        print(data[data == np.NaN])
def HML_bin_factor(data: pd.Series) -> pd.Series:
    data = data.apply(inv)
    size_rnk = data.rank(pct=True)
    size_bin = pd.cut(size_rnk, bins=[0, 0.4, 0.7, 1], include_lowest=True, labels=[1,2,3])
    return size_bin.astype('int')

In [68]:
# HML bin
prices.bpr_bin = 0
prices.bpr_bin_factor = 0
prices.size_bin_factor = 0
for i in tqdm(dates):
    stock = prices[prices.date == i]
    ind = stock.index
    prices.loc[ind, 'bpr_bin'] =  HML_bin(stock['PBR'])
    prices.loc[ind, 'bpr_bin_factor'] =  HML_bin_factor(stock['PBR'])
    prices.loc[ind, 'size_bin_factor'] =  size_bin_factor(stock['size'])

100%|██████████| 297/297 [00:10<00:00, 28.47it/s]


In [69]:
prices['smb*hml'] = prices.apply(lambda x: (x.size_bin_factor-1)*3 + x.bpr_bin_factor, axis=1)

Beta

In [88]:
def get_beta(data: pd.DataFrame):
    X = data.RM_RF.astype('float')
    Y = data.ret
    X = sm.add_constant(X)
    model = sm.OLS(Y,X)
    results = model.fit()
    params = results.params
    if len(params==1):
        params['const']=0
    mse_resid = results.mse_resid
    return params, mse_resid

In [89]:
prices['beta'] = 0
prices['const'] = 0
prices['ido_vol'] = 0
for i in tqdm(tickers.ticker):
    stock = prices[prices.ticker == i]
    if len(stock) <= 1:
        if len(stock) ==1 :
            print(stock.ticker[0], stock.name[0])
            pass
    else:
        ind = stock.index[1:]
        stock = stock.loc[ind]
        for j in range(len(ind),0,-1):
            ind_temp = ind[max(j-12,0):j]
            temp = stock.loc[ind_temp]
            params, mse_resid = get_beta(temp)
            prices.loc[ind_temp, 'const'] = params['const']
            prices.loc[ind_temp, 'beta'] = params['RM_RF']
            prices.loc[ind_temp, 'ido_vol'] = mse_resid
prices['ido_vol'] = prices['ido_vol'].fillna(0)

100%|██████████| 2600/2600 [51:04<00:00,  1.18s/it] 


In [90]:
prices.to_csv(krx+'prices_final.csv',index=False)

## 팩터 포트폴리오

In [279]:
port = pd.DataFrame(columns = [1,2,3,4,5], index = [1,2,3,4,5])
port.columns.name = 'BPR'
port.index.name = 'Size'

In [280]:
port

BPR,1,2,3,4,5
Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,,,,,
2,,,,,
3,,,,,
4,,,,,
5,,,,,


## 팩터 계산

In [70]:
factors = prices[['date','ticker','name','ret']].copy()

In [71]:
# factors.to_csv(krx+'factors+until2010.csv')

In [72]:
factors

Unnamed: 0,date,ticker,name,ret
0,2021/09,098120,마이크로컨텍솔,
1,2021/08,098120,마이크로컨텍솔,-0.114160
2,2021/07,098120,마이크로컨텍솔,0.130273
3,2021/06,098120,마이크로컨텍솔,0.034660
4,2021/05,098120,마이크로컨텍솔,0.209627
...,...,...,...,...
430267,2017/11,238490,힘스,-0.106343
430268,2017/10,238490,힘스,0.237875
430269,2017/09,238490,힘스,0.021226
430270,2017/08,238490,힘스,-0.179884


In [81]:
#RM-RF
factors['RM_RF'] = prices['RM_RF']

In [103]:
def smb_hml(data):
    data = data.dropna(subset = ['ret'])
    res = []
    for i in range(1,7):
        res.append(data[data['smb*hml'] == i].ret.mean())
    smb = np.mean(res[:3]) - np.mean(res[3:])
    hml = (res[2]+res[5])/2 - (res[0]+res[3])/2
    return res, smb, hml

In [110]:
### HML, SMB
prices.smb = 0
factors.hml = 0
smb_hml_port = pd.DataFrame(columns = ["SL", "SM", "SH", "BL", "BM", "BH"])
for i in tqdm(dates):
    stock = prices[prices.date == i]
    ind = stock.index
    res, smb, hml = smb_hml(stock)
    factors.loc[ind, 'smb'] = smb
    factors.loc[ind, 'hml'] = hml
# factors.to_csv(krx+"ff3.csv", index = False)

  2%|▏         | 6/297 [00:00<00:10, 27.73it/s]

2021/09 nan nan
2021/08 -0.005351680756814747 0.03771549528274665
2021/07 -0.023582022247856948 -0.0042843020641569265
2021/06 -0.0008562602206512084 -0.020802207125245248
2021/05 0.03937651286454319 0.020717338638420103
2021/04 0.11671746496550137 0.06037768775197677
2021/03 0.05208198489480465 0.032682897484419965


  4%|▍         | 12/297 [00:00<00:09, 28.61it/s]

2021/02 0.0496615469684615 0.07460991985843698
2021/01 0.004102893299664501 0.05050261742661352
2020/12 0.035239475986280605 0.05512396470125142
2020/11 -0.017059823328327783 0.019468297427194813
2020/10 -0.013116964687386393 0.0256596496738129
2020/09 0.05742883746115121 0.05091469930619171
2020/08 0.04675258906823273 0.00808644783254273


  6%|▌         | 18/297 [00:00<00:09, 29.14it/s]

2020/07 -0.008581945464083557 -0.007440447121422417
2020/06 0.05504440818650684 -0.0015727718065611518
2020/05 0.019466959039572518 -0.08390335021980927
2020/04 0.019384234157256153 -0.02415026649422039
2020/03 0.06320607281391824 0.05394439940118706
2020/02 0.00488849810081371 -0.06317614925925816
2020/01 0.01082362000463058 -0.024290340625517506


  9%|▉         | 26/297 [00:00<00:09, 29.64it/s]

2019/12 0.04583747270298766 -0.022085509378947696
2019/11 -0.002785251483620313 -0.01353722101716203
2019/10 0.019089855447158136 0.031010448060014243
2019/09 0.005243937130359122 -0.053412353842870026
2019/08 0.04196035632390663 0.03997918210517329
2019/07 0.026723652370407726 -0.0005786248023134464
2019/06 0.002914531009349519 0.005010984085345588


 11%|█         | 33/297 [00:01<00:08, 29.90it/s]

2019/05 0.027477945650852362 0.02453833569390957
2019/04 0.05595567940194918 0.01936470523519454
2019/03 0.053520896077250815 -0.014821767446613038
2019/02 0.011774782410722252 -0.030284021450255823
2019/01 -0.00934462102920764 -0.004424726735810682
2018/12 0.03495738708667395 0.013136950090431365
2018/11 0.023503802243398552 0.05195503938019242


 14%|█▍        | 41/297 [00:01<00:08, 30.11it/s]

2018/10 0.046273236737487344 -0.010740055109642543
2018/09 -0.012260380099555324 0.04026044032706594
2018/08 -0.006483057756342794 0.0069470640258752344
2018/07 0.017019086141630477 -0.021534681764194687
2018/06 0.005405473047156526 0.0022141324709133367
2018/05 0.0027005507844392873 0.006952227456054991
2018/04 0.08386445681258237 0.00136275573655191


 16%|█▌        | 48/297 [00:01<00:08, 30.19it/s]

2018/03 0.027959260053438233 0.05111504861838406
2018/02 0.014863073300543346 -0.02545617695442782
2018/01 0.04052080604298205 -0.0030602199879395595
2017/12 0.014072848478910188 -0.014060671272889125
2017/11 0.002816537761017984 -0.022863340965671337
2017/10 -0.006211302907636038 -0.048664858666254904
2017/09 -0.004622772208771702 -0.023640507951782436


 19%|█▉        | 56/297 [00:01<00:07, 30.26it/s]

2017/08 0.009419345816781263 0.0008924137653097339
2017/07 0.007989291116029796 -0.02522503326632817
2017/06 -0.010032573237437423 0.017067508756435337
2017/05 -0.0016953930567762433 0.001528463181079436
2017/04 0.03889543282302232 0.0335008783423732
2017/03 -0.00409246974164972 -0.0005236049076696846
2017/02 0.0214715514177184 -0.005079650392732315


 20%|██        | 60/297 [00:02<00:07, 30.29it/s]

2017/01 -0.020098466813098143 0.03084670665992138
2016/12 0.03752612155122531 0.029925323524760357
2016/11 0.01578742780429903 -0.0006932957522091873
2016/10 0.030307639213669378 0.05210995196088031
2016/09 0.013398901225375869 0.029431718237271087
2016/08 0.04041316859314646 0.017073535775486356
2016/07 0.011098663678610891 0.054137482610807544


 23%|██▎       | 68/297 [00:02<00:07, 30.41it/s]

2016/06 0.03227823088775562 0.03530402083538646
2016/05 0.01895226879618412 -0.004487226674311033
2016/04 0.034094488711731985 -0.020757486817676682
2016/03 0.016367255083928608 0.013329471736356575
2016/02 0.013155857950875358 0.03219611326221705
2016/01 0.026399910715900397 0.04118145995287778
2015/12 -0.006432329413420836 -0.045237897454194806


 26%|██▌       | 76/297 [00:02<00:07, 30.46it/s]

2015/11 0.0260001199620733 0.0455512246286366
2015/10 0.03573695728267134 0.013881579820248803
2015/09 0.06175363601807953 0.0014759325292092798
2015/08 0.04539606702226718 0.0324521416638127
2015/07 0.00011495410989781352 -0.004113450634867302
2015/06 0.043556839595292156 -0.004533337116621235
2015/05 0.15735291351071465 -0.12528220340715426


 28%|██▊       | 84/297 [00:02<00:06, 30.54it/s]

2015/04 0.022559412606333276 -0.03742571235557763
2015/03 0.03493402020764374 0.016985657593241
2015/02 0.10120491455433345 0.0006143462834566626
2015/01 0.04631733313293894 0.009070842030335008
2014/12 0.05516641716228892 -0.04193808587236175
2014/11 0.03670346467443364 -0.016567810759363415
2014/10 0.00466065426263368 0.02065970854888593


 30%|██▉       | 88/297 [00:02<00:06, 30.61it/s]

2014/09 0.004430672172666475 -0.015358025601565253
2014/08 0.015350510977689047 -0.03548604722837665
2014/07 0.006688650421367348 -0.001061952823793566
2014/06 0.008912669237491237 0.019998357377100193
2014/05 -0.0008263700500417081 0.0006993646353342753
2014/04 0.03845615260818204 0.01763514948690452
2014/03 0.0221505428872799 0.014131890442460202


 32%|███▏      | 96/297 [00:03<00:06, 30.64it/s]

2014/02 0.03522435564797098 -0.0007343333619673834
2014/01 0.062467954453122386 -0.03536359148178905
2013/12 0.04159359939580786 -0.017348056608887937
2013/11 -0.010261288826212914 0.03348627548031599
2013/10 -0.014807333236551042 0.011207952031648358
2013/09 8.706074095569082e-05 0.005886752431055582
2013/08 0.0029647393937059507 -0.029088715065518682


 35%|███▌      | 104/297 [00:03<00:06, 30.68it/s]

2013/07 0.03493257062379076 0.06993388151540447
2013/06 0.0016726589471083991 -0.003025993315149751
2013/05 -0.01408470540332879 0.022798445222269675
2013/04 0.08633714497374431 -0.04719737730330569
2013/03 0.03354966178640151 0.03644822258789239
2013/02 0.02004251197616247 -0.013950617472490254
2013/01 -0.004707399261184679 -0.01747760640395865


 38%|███▊      | 112/297 [00:03<00:06, 30.76it/s]

2012/12 0.1326981947524069 -0.03219317894509792
2012/11 0.03129694000091288 0.07503338393642961
2012/10 -0.014612312143100655 0.027785480727551208
2012/09 -0.008334641959067548 0.012181884773342123
2012/08 0.004145680436363659 0.03359729418548524
2012/07 0.014861685160186676 -0.033385512362440306
2012/06 0.08823511869417827 -0.0841160289383513


 39%|███▉      | 116/297 [00:03<00:05, 30.68it/s]

2012/05 0.04671813391440256 -0.0034097684466802836
2012/04 0.03990158900367376 -0.01087250007517334
2012/03 0.029160229606485427 0.048918611118849674
2012/02 0.045973184174657754 -0.004471173382036876
2012/01 0.06153359072606965 0.09064926421872349
2011/12 0.06860896879643616 0.0770374779643257
2011/11 0.0261969389180458 0.03288325007674618


 42%|████▏     | 124/297 [00:04<00:05, 30.80it/s]

2011/10 0.01635433402083106 0.018982194247363818
2011/09 0.019125006856879304 -0.030700312756777565
2011/08 -0.0008484709351770092 0.004071520140106713
2011/07 0.048049386660615215 -0.019734465597087414
2011/06 0.03931897240259259 0.01047998511795678
2011/05 0.012434255013819447 0.008112261852432854
2011/04 0.03734223536537419 0.019743488209872723


 44%|████▍     | 132/297 [00:04<00:05, 30.84it/s]

2011/03 0.013185690887404986 -0.00625620639717735
2011/02 -0.00029714093789798973 -0.026440894356317947
2011/01 0.024145464317662034 -0.009730846485090643
2010/12 0.03850687096236731 0.020414463689850663
2010/11 -0.01883360236190514 0.024721145702063248
2010/10 0.031626636948130095 0.028838365274122934
2010/09 0.02852713524411246 -0.004891115108246261


 47%|████▋     | 140/297 [00:04<00:05, 30.87it/s]

2010/08 -0.013394837353205048 0.008723736983411251
2010/07 0.06977426472876361 0.02384615358914873
2010/06 0.14418984245928237 0.15735523781625305
2010/05 -0.016476352670880955 0.02042445963190504
2010/04 0.015792221320609354 -0.000146728281735245
2010/03 0.033141887157415936 0.02154232710004682
2010/02 -5.732391104953938e-05 0.034573780858207365


 48%|████▊     | 144/297 [00:04<00:04, 30.85it/s]

2010/01 0.029668935736745266 -0.00769056241774848
2009/12 0.12163399440517542 -0.14100225819618423
2009/11 0.02106201511895342 -0.05687070588650853
2009/10 0.02523466518515277 0.04415257897032197
2009/09 0.016535393559249798 0.022983878125973894
2009/08 0.0495155789179372 -0.004475597039258636
2009/07 0.018097393783061185 -0.0018490633666987458


 51%|█████     | 152/297 [00:05<00:04, 30.98it/s]

2009/06 0.07425819438820726 -0.05283280774897353
2009/05 -0.002470126212885237 0.035777251328353826
2009/04 0.08553971558164059 0.04366928117152814
2009/03 0.09439847238595014 0.06466418197577489
2009/02 0.043433944139817576 0.01232955482474879
2009/01 0.034922485694272275 0.0384974266976792
2008/12 0.08360341993670611 -0.056405170589792664


 54%|█████▍    | 160/297 [00:05<00:04, 30.98it/s]

2008/11 0.008553583944947873 -0.0031586499685216374
2008/10 0.001651996333231551 -0.005512780178336446
2008/09 0.017122877595702357 0.0045503586874651725
2008/08 -0.009765487848659903 -0.019435632545981886
2008/07 -0.0057443759789369275 0.024442029651805988
2008/06 -0.0003086919066849908 0.01984907248879776
2008/05 0.01885626546696515 0.026812711303976056


 56%|█████▌    | 167/297 [00:05<00:04, 29.70it/s]

2008/04 0.12057028963050628 0.16793495450149287
2008/03 0.004617824981625836 0.011327843969476187
2008/02 0.005438492180713631 -0.001855020096617449
2008/01 0.007888808851110596 -0.0372214301007239
2007/12 0.03550424498362208 0.04810330299487943
2007/11 0.014704032690852742 0.04230615365237707
2007/10 0.01344292994368805 0.008088956228885447


 59%|█████▊    | 174/297 [00:05<00:04, 29.93it/s]

2007/09 -0.032884921594993954 -0.05126838160904051
2007/08 0.018225564339834473 0.012134984573117728
2007/07 0.28604318617768043 -0.30651877108540604
2007/06 -0.005969936314990543 -0.0066185675853905335
2007/05 0.028756262914469262 -0.062092897547325804
2007/04 0.07775826907068933 -0.07824344112246936
2007/03 -0.025278924506192874 0.022347617850063603


 61%|██████▏   | 182/297 [00:05<00:03, 30.58it/s]

2007/02 0.06030367030142529 0.05279992482542832
2007/01 0.01635033890516946 0.024429185602143647
2006/12 0.046290055550059696 0.02788982929342298
2006/11 0.02330809513329161 0.030895564587778476
2006/10 -0.005798192386556544 0.08826490815225391
2006/09 0.0045134846822832255 0.01755407494159197
2006/08 0.013590379549376662 0.021645951682896347


 63%|██████▎   | 186/297 [00:06<00:03, 30.74it/s]

2006/07 0.04370180765556057 -0.03618820785903768
2006/06 0.021083844660373198 -0.0006005449299783944
2006/05 0.008237812577778905 0.018002992857881303
2006/04 0.06799362538562158 0.05672117740210042
2006/03 0.038834986703187045 0.022757520651305686
2006/02 0.16101376734577336 -0.1781397479425488
2006/01 0.0077031226030504765 -0.00296839115646367


 65%|██████▌   | 194/297 [00:06<00:03, 31.08it/s]

2005/12 0.002036651636938693 -0.010662028695519495
2005/11 -0.0018947748025652834 0.06950522478532645
2005/10 0.027535838026131465 -0.023184385436323895
2005/09 0.10163259710040554 -0.03745832331067142
2005/08 0.34116964789328197 -0.47908321608065785
2005/07 -0.02903826745603861 0.03775060111012549
2005/06 0.09324095761769487 0.01643447759735442


 68%|██████▊   | 202/297 [00:06<00:03, 31.27it/s]

2005/05 0.0631530671500779 -0.12676135642494352
2005/04 0.020331468706499517 -0.08313224976826647
2005/03 0.013343243858855353 0.04314692453115526
2005/02 0.06296384996710735 0.02059167655674121
2005/01 0.13563408367391883 0.10143460762113815
2004/12 0.2039162479694063 -0.16928916661985066
2004/11 0.06354878616681776 -0.05153094866355023


 71%|███████   | 210/297 [00:06<00:02, 31.34it/s]

2004/10 0.01642475079859939 0.0249045863960161
2004/09 0.03524572442265677 0.02168794367128823
2004/08 -0.0038384384590526704 0.05168589295757654
2004/07 0.0028481722028079193 0.014453559984308487
2004/06 0.04231845463534595 0.05085109665131205
2004/05 -0.015207748830396772 0.05159165887948408
2004/04 0.11435885955075555 0.037471787484902576


 72%|███████▏  | 214/297 [00:07<00:02, 31.23it/s]

2004/03 0.04696099853240751 0.002778060933886599
2004/02 0.014606562933246157 0.008477026994431092
2004/01 -0.025875960072821252 0.01559643300835759
2003/12 0.01704830310727661 0.04074271767272522
2003/11 0.06411244413403659 0.12096276699918423
2003/10 0.00498101834998638 0.008647729707718647
2003/09 -0.00467614466459737 -0.01274574056527418


 75%|███████▍  | 222/297 [00:07<00:02, 31.43it/s]

2003/08 0.04013746560256148 -0.008292110001496183
2003/07 -0.02796364614282361 0.049311994725545
2003/06 0.17071838711349177 -0.010814286532218591
2003/05 0.013242574207510252 -0.00208217144365333
2003/04 0.11020005053324286 0.049730810102228185
2003/03 -0.04610539278252132 0.04957738999046371
2003/02 0.013495513368322123 0.010298210023226825


 77%|███████▋  | 230/297 [00:07<00:02, 31.52it/s]

2003/01 -0.01014584777837188 -0.07319653500133684
2002/12 0.03855781706521098 -0.01431086206817612
2002/11 0.0013453964766016735 -0.011800977058546464
2002/10 -0.020325340916025322 0.018693371800517326
2002/09 0.02322618128991604 -0.0226510728256026
2002/08 -0.03083500598775546 0.0878872952449066
2002/07 0.008084252352698572 -0.003978871832137826


 80%|████████  | 238/297 [00:07<00:01, 30.82it/s]

2002/06 0.09424117349868669 0.029430541530524757
2002/05 0.02644062057207762 0.012386107812575278
2002/04 nan nan
2002/03 nan nan
2002/02 nan nan
2002/01 nan nan
2001/12 nan nan


 83%|████████▎ | 246/297 [00:08<00:01, 31.42it/s]

2001/11 nan nan
2001/10 nan nan
2001/09 nan nan
2001/08 nan nan
2001/07 nan nan
2001/06 nan nan
2001/05 nan nan
2001/04 nan nan


 86%|████████▌ | 254/297 [00:08<00:01, 31.80it/s]

2001/03 nan nan
2001/02 nan nan
2001/01 nan nan
2000/12 nan nan
2000/11 nan nan
2000/10 nan nan
2000/09 nan nan
2000/08 nan nan


 88%|████████▊ | 262/297 [00:08<00:01, 32.02it/s]

2000/07 nan nan
2000/06 nan nan
2000/05 nan nan
2000/04 nan nan
2000/03 nan nan
2000/02 nan nan
2000/01 nan nan
1999/12 nan nan


 91%|█████████ | 270/297 [00:08<00:00, 32.20it/s]

1999/11 nan nan
1999/10 nan nan
1999/09 nan nan
1999/08 nan nan
1999/07 nan nan
1999/06 nan nan
1999/05 nan nan
1999/04 nan nan


 94%|█████████▎| 278/297 [00:09<00:00, 32.26it/s]

1999/03 nan nan
1999/02 nan nan
1999/01 nan nan
1998/12 nan nan
1998/11 nan nan
1998/10 nan nan
1998/09 nan nan
1998/08 nan nan


 96%|█████████▋| 286/297 [00:09<00:00, 32.25it/s]

1998/07 nan nan
1998/06 nan nan
1998/04 nan nan
1998/03 nan nan
1998/02 nan nan
1998/01 nan nan
1997/12 nan nan
1997/11 nan nan


 99%|█████████▉| 294/297 [00:09<00:00, 32.28it/s]

1998/05 nan nan
1997/09 nan nan
1997/07 nan nan
1997/06 nan nan
1997/05 nan nan
1997/04 nan nan
1997/03 nan nan
1997/02 nan nan


100%|██████████| 297/297 [00:09<00:00, 30.87it/s]

1997/01 nan nan
1997/10 nan nan
1997/08 nan nan





Unnamed: 0,date,ticker,name,close,high,low,total_volume,avg_volume,total_transactions,avg_transactions,...,size_bin,SW_ret,RM_RF,bpr_bin,bpr_bin_factor,size_bin_factor,smb*hml,beta,const,ido_vol


In [119]:
# factors.to_csv(krx+"ff3.csv", index = False)

In [86]:
# prices.to_csv(krx+'ff3_prices.csv', index=False)

In [91]:
len(factors), len(prices)

(418562, 418562)

In [99]:
factors['div_ret'] = factors.div_ret.fillna(0)

In [93]:
# Dividend to price
factors['div_ret'] = prices['div_ret']

In [139]:
#Beta, Beta_squared
factors['beta'] = prices['beta']
factors['beta_seq'] = prices['beta']**2

In [541]:
len(factors), len(prices)

(205304, 205304)

In [140]:
#Idiosyncratic return volatiliy, FF3 return vol
factors['ido_vol'] = prices['ido_vol']

In [120]:
#EPR (PER 역수)
factors['EPR'] = prices['PER'].apply(inv)
factors['BPR'] = prices['PBR'].apply(inv)

In [131]:
#Size
prices['size_factor'] =0
for i in tqdm(dates):
    stock = prices[prices.date == i]
    ind = stock.index
    size_rnk = stock["size"].rank(pct=True)
    prices.loc[ind, 'size_factor'] = size_rnk

100%|██████████| 297/297 [00:07<00:00, 41.22it/s]


In [134]:
factors['size']=prices["size_factor"]

In [136]:
#Share turnover = transaction/supply
factors['share_turn'] =0
for i in tqdm(tickers.ticker):
    share = tickers[tickers.ticker ==i].shares.values[0]
    stock = prices[prices.ticker == i]
    ind = stock.index
    factors.loc[ind, 'share_turn'] = stock.total_volume/share

100%|██████████| 2600/2600 [01:00<00:00, 43.04it/s]


In [137]:
#Momentum 1m, 12m, 36m, 6m, change in 6m momentum
factors['mom1'] =0
factors['mom6'] =0
factors['mom12'] =0
for i in tqdm(tickers.ticker):
    stock = prices[prices.ticker == i]
    ind = stock.index
    temp = stock[::-1]
    mom1 = stock.ret.shift(-1)
    mom6 = temp.ret.rolling(6, min_periods=1).mean()[::-1]
    mom12 = temp.ret.rolling(12, min_periods=1).mean()[::-1]
    factors.loc[ind, 'mom1'] = mom1
    factors.loc[ind, 'mom6'] = mom6
    factors.loc[ind, 'mom12'] = mom12

100%|██████████| 2600/2600 [01:10<00:00, 36.96it/s]


In [142]:
factors.to_csv(krx+'factors.csv', index=False)