In [24]:
import FinanceDataReader as fdr
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

pd.options.display.float_format = '{:,.3f}'.format

### 매도 전략 데이터 프로세싱
모델 개발을 위해서는 매도 전략에 따는 수익을 계산을 할 수 있어야 합니다. 이번 장에서는 기본적인 몇 가지 전략의 수익율을 계산해보겠습니다. 저장해 둔 mdl_data pickle 파일을 읽습니다.


In [25]:
mdl_data = pd.read_pickle('mdl_data.pkl')

### <br>매도 전략 1 - 모든 종목 종가 매수 후, 5 영업일 기간 6% 익절 매도
한가지 전략을 테스트 해 보겠습니다. 모든 종목을 같은 금액으로 매일 종가 매수합니다. 매수 후 5 영업일 동안 수익율이 6% 이상되면 곧바로 익절합니다. 나머지 종목은 5 영업일에 전부 종가 매도하면 수익율은 어떻게 될까요?   

In [40]:
kosdaq_list = pd.read_pickle('kosdaq_list.pkl')

data_all_1 = pd.DataFrame()

ub = 1.06

for code in kosdaq_list['code']:
    
    # 종목별 처리
    data = mdl_data[mdl_data['code']==code].sort_index().copy()
    
    # 고가, 저가, 종가 수익율
    for i in [1,2,3,4,5]:

        data['high_r' + str(i)] = data['high'].shift(-1*i)/data['close']      
        data['low_r' + str(i)] = data['low'].shift(-1*i)/data['close']   
        data['close_r' + str(i)] = data['close'].shift(-1*i)/data['close']    
        
    data['max_high']  = (data[['high_r1','high_r2','high_r3','high_r4','high_r5']].max(axis=1) > ub).astype(int)          
    data['ub_return'] = np.where(data['max_high']==1, ub, data['close_r5'])
       
    data.dropna(subset=['close_r1','close_r2','close_r3','close_r4','close_r5'], inplace=True)   
    data_all_1 = pd.concat([data, data_all_1], axis=0)

data_all_1.to_pickle('data_all_1.pkl')    
data_all_1.head()    

Unnamed: 0_level_0,open,high,low,close,volume,change,code,name,kosdaq_return,return,...,low_r3,close_r3,high_r4,low_r4,close_r4,high_r5,low_r5,close_r5,max_high,ub_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-04,13000,13300,12750,13050,297172,0.02,238490,힘스,,,...,0.996,1.004,1.015,0.981,0.992,1.0,0.954,1.0,0,1.0
2021-01-05,13000,13050,12750,12900,190192,-0.011,238490,힘스,1.008,0.989,...,0.992,1.004,1.012,0.965,1.012,1.081,1.008,1.039,1,1.06
2021-01-06,13050,13300,12700,13100,287008,0.016,238490,힘스,0.996,1.016,...,0.95,0.996,1.065,0.992,1.023,1.046,1.004,1.015,1,1.06
2021-01-07,13200,13350,13000,13100,203149,0.0,238490,힘스,1.008,1.0,...,0.992,1.023,1.046,1.004,1.015,1.019,1.0,1.008,1,1.06
2021-01-08,13200,13250,12800,12950,209722,-0.011,238490,힘스,0.999,0.989,...,1.015,1.027,1.031,1.012,1.019,1.073,1.019,1.027,1,1.06


In [41]:
data_all_1 = pd.read_pickle('data_all_1.pkl')
print(data_all_1['ub_return'].describe(percentiles=[0.01, 0.1, 0.5, 0.9, 0.99]))
print(data_all_1.groupby('max_high')['ub_return'].describe())

count   422,037.000
mean          1.002
std           0.055
min           0.291
1%            0.843
10%           0.931
50%           1.002
90%           1.060
99%           1.060
max           1.060
Name: ub_return, dtype: float64
               count  mean   std   min   25%   50%   75%   max
max_high                                                      
0        281,895.000 0.974 0.046 0.291 0.952 0.984 1.002 1.060
1        140,142.000 1.060 0.000 1.060 1.060 1.060 1.060 1.060


### <br>매도 전략 2 - 모든 종목을 종가 매수 후, 아래와 같은 순서로 매도  
1. (익일 고가/매수 종가) 값이 (당일 고가/ 매수 종가) 값 보다 크면 2 영업일 시가 매도   
2.  1 조건 만족하지 않으면 2 영업일 종가 매도  

위와 같은 매도 전략은 수익율이 어떻게 될까요?   

In [23]:
kosdaq_list = pd.read_pickle('kosdaq_list.pkl')

data_all_2 = pd.DataFrame()

def final_r(x):
    
    if   x['high_r0'] < x['high_r1']:  #  (당일 고가/매수 종가) 비율이 (익일 고가/매수 종가) 비율 값이 작으면 2 영업일 시가 매도     
        return x['open_r2']    
    
    else:
        return x['close_r2'] # 매도 안된 종목은 전부 2 영업일 종가 매도         
    
for code in kosdaq_list['code']:    
    
    # 종목별 처리
    data = mdl_data[mdl_data['code']==code].sort_index().copy()
    
    # 최고/최저 수익율
    for i in [0, 1, 2]:

        data['high_r' + str(i)] = data['high'].shift(-1*i)/data['close']        
        data['close_r' + str(i)] = data['close'].shift(-1*i)/data['close']
        data['open_r' + str(i)] = data['open'].shift(-1*i)/data['close']
        
    data['final_return'] = data.apply(final_r, axis=1)
                                                                                                                                                 
    data.dropna(subset=['close_r0','close_r1', 'close_r2'], inplace=True)   
    data_all_2 = pd.concat([data, data_all_2], axis=0)

data_all_2.to_pickle('data_all_2.pkl')    
data_all_2.head()    

Unnamed: 0_level_0,open,high,low,close,volume,change,code,name,kosdaq_return,return,...,high_r0,close_r0,open_r0,high_r1,close_r1,open_r1,high_r2,close_r2,open_r2,final_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-04,13000,13300,12750,13050,297172,0.02,238490,힘스,,,...,1.019,1.0,0.996,1.0,0.989,0.996,1.019,1.004,1.0,1.004
2021-01-05,13000,13050,12750,12900,190192,-0.011,238490,힘스,1.008,0.989,...,1.012,1.0,1.008,1.031,1.016,1.012,1.035,1.016,1.023,1.023
2021-01-06,13050,13300,12700,13100,287008,0.016,238490,힘스,0.996,1.016,...,1.015,1.0,0.996,1.019,1.0,1.008,1.011,0.989,1.008,1.008
2021-01-07,13200,13350,13000,13100,203149,0.0,238490,힘스,1.008,1.0,...,1.019,1.0,1.008,1.011,0.989,1.008,0.996,0.996,0.981,0.996
2021-01-08,13200,13250,12800,12950,209722,-0.011,238490,힘스,0.999,0.989,...,1.023,1.0,1.019,1.008,1.008,0.992,1.077,1.035,1.023,1.035


<br>만약 5 영업일 기간동안 익절라인(6%)과 손절라인(6%) 동시에 도달하는 경우는 수익율 계산에서 제외하겠습니다. 아래를 value_count 로 비율을 보니 전체 데이터의 5.5% 에 해당합니다. 

In [42]:
data_all_2 = pd.read_pickle('data_all_2.pkl')  
data_all_2['final_return'].describe(percentiles=[0.01, 0.1, 0.5, 0.9, 0.99])

count   426,300.000
mean          1.001
std           0.048
min           0.000
1%            0.892
10%           0.955
50%           1.000
90%           1.045
99%           1.146
max           1.690
Name: final_return, dtype: float64