# feature はできたのでTargetを作成する
基準日付の終値から20営業日の間の最高値への変化率
https://japanexchangegroup.github.io/J-Quants-Tutorial/

In [1]:
import datetime
import numpy as np
import pandas as pd
import yfinance as yf
import seaborn as sns
import matplotlib.pyplot as plt
import time,sys,os
today = datetime.date.today()

prod = '1y'
dys = 365
dates = '2021-01-08'

last = today +datetime.timedelta(days=-dys)
last = pd.to_datetime(last)

os.makedirs('./csv/'+str(today),exist_ok = True)


In [2]:
"""TOPIX リストの読み込み"""
Core30 = pd.read_csv('./csv/JPX_data/TOPIX_Core30_20210608.csv')
data =pd.DataFrame(Core30) 

stocks = [str(s)+".T" for s in data.code]
#stocks.append("^N225")
tickers = yf.Tickers(" ".join(stocks))
stocks

['3382.T',
 '4063.T',
 '4452.T',
 '4502.T',
 '4503.T',
 '4568.T',
 '6098.T',
 '6367.T',
 '6501.T',
 '6594.T',
 '6758.T',
 '6861.T',
 '6954.T',
 '6981.T',
 '7203.T',
 '7267.T',
 '7741.T',
 '7974.T',
 '8001.T',
 '8031.T',
 '8058.T',
 '8306.T',
 '8316.T',
 '8411.T',
 '8766.T',
 '9022.T',
 '9432.T',
 '9433.T',
 '9984.T']

In [3]:
len(stocks)

29

In [4]:
closes= []

for i in range(len(tickers.tickers)):
    closes.append(tickers.tickers[i].history(period = prod).Close)

closes = pd.DataFrame(closes).T.ffill()
closes.columns = stocks
print(closes)

                 3382.T        4063.T       4452.T       4502.T       4503.T  \
Date                                                                           
2020-06-17  3626.668213  12315.406250  8496.340820  3826.471680  1736.043823   
2020-06-18  3572.102783  12192.301758  8458.332031  3817.864502  1714.099487   
2020-06-19  3613.027100  12216.923828  8409.604492  3848.468506  1726.778442   
2020-06-22  3613.027100  12157.832031  8430.070312  3855.163086  1754.574585   
2020-06-23  3553.589355  12079.044922  8411.553711  3833.166260  1794.562134   
...                 ...           ...          ...          ...          ...   
2021-06-11  4899.000000  18470.000000  6869.000000  3779.000000  1928.500000   
2021-06-14  4910.000000  18905.000000  6827.000000  3737.000000  1911.500000   
2021-06-15  5006.000000  19175.000000  6885.000000  3800.000000  1944.500000   
2021-06-16  5000.000000  19085.000000  6875.000000  3798.000000  1951.500000   
2021-06-17  5000.000000  18880.000000  6

## 予測用データとモデル作成用データに分ける

In [91]:
ttl_closes = closes.copy()
_closes = closes.copy()
feat_closes = _closes.iloc[:-120,:]
val_closes = _closes.iloc[-120:-100,:]
target_closes = _closes.iloc[-100:-20,:]
test_closes = _closes.iloc[-20:,:]


In [92]:
print(feat_closes.shape,val_closes.shape,target_closes.shape,test_closes.shape)

(125, 29) (20, 29) (80, 29) (20, 29)


## 特徴量作成　終値

In [93]:
#終値の営業日リターン
return_1month = feat_closes.copy().pct_change(20).T
return_2month = feat_closes.copy().pct_change(40).T
return_3month = feat_closes.copy().pct_change(60).T
# 終値の営業日ボラティリティ
volatility_1month = feat_closes.copy().diff().rolling(20).std().T
volatility_2month = feat_closes.copy().diff().rolling(40).std().T
volatility_3month = feat_closes.copy().diff().rolling(60).std().T
# 終値と営業日の単純移動平均線の乖離
ma_gap_1month = feat_closes.copy().rolling(20).mean().T
ma_gap_2month = feat_closes.copy().rolling(40).mean().T
ma_gap_3month = feat_closes.copy().rolling(60).mean().T

#終値の営業日リターン
target_return_1month = target_closes.copy().pct_change(20).T
target_return_2month = target_closes.copy().pct_change(40).T
target_return_3month = target_closes.copy().pct_change(60).T
# 終値の営業日ボラティリティ
target_volatility_1month = target_closes.copy().diff().rolling(20).std().T
target_volatility_2month = target_closes.copy().diff().rolling(40).std().T
target_volatility_3month = target_closes.copy().diff().rolling(60).std().T
# 終値と営業日の単純移動平均線の乖離
target_ma_gap_1month = target_closes.copy().rolling(20).mean().T
target_ma_gap_2month = target_closes.copy().rolling(40).mean().T
target_ma_gap_3month = target_closes.copy().rolling(60).mean().T



In [94]:
df_feat = pd.DataFrame()
df_feat['return_1month'] = return_1month.iloc[:,-1]
df_feat['return_2month'] = return_2month.iloc[:,-1]
df_feat['return_3month'] = return_3month.iloc[:,-1]

df_feat['volatility_1month'] = volatility_1month.iloc[:,-1]
df_feat['volatility_2month'] = volatility_2month.iloc[:,-1]
df_feat['volatility_3month'] = volatility_3month.iloc[:,-1]

df_feat['ma_gap_1month'] = ma_gap_1month.iloc[:,-1]
df_feat['ma_gap_2month'] = ma_gap_2month.iloc[:,-1]
df_feat['ma_gap_3month'] = ma_gap_3month.iloc[:,-1]

df_target = pd.DataFrame()
df_target['return_1month'] = target_return_1month.iloc[:,-1]
df_target['return_2month'] = target_return_2month.iloc[:,-1]
df_target['return_3month'] = target_return_3month.iloc[:,-1]

df_target['volatility_1month'] = target_volatility_1month.iloc[:,-1]
df_target['volatility_2month'] = target_volatility_2month.iloc[:,-1]
df_target['volatility_3month'] = target_volatility_3month.iloc[:,-1]

df_target['ma_gap_1month'] = target_ma_gap_1month.iloc[:,-1]
df_target['ma_gap_2month'] = target_ma_gap_2month.iloc[:,-1]
df_target['ma_gap_3month'] = target_ma_gap_3month.iloc[:,-1]

In [95]:
df_feat

Unnamed: 0,return_1month,return_2month,return_3month,volatility_1month,volatility_2month,volatility_3month,ma_gap_1month,ma_gap_2month,ma_gap_3month
3382.T,0.074738,0.007567,0.082831,64.452933,74.324109,70.135696,3440.60863,3364.354303,3368.34775
4063.T,0.134418,0.179434,0.222543,281.985745,257.688651,222.101495,16918.560254,15804.677539,15171.153662
4452.T,-0.007125,0.00567,-0.036305,93.843738,96.519929,94.06206,7710.605005,7650.793066,7684.322135
4502.T,0.069517,0.096127,-0.005723,34.179669,53.998462,57.168955,3722.279614,3573.077307,3587.683553
4503.T,-0.035703,0.019536,-0.041219,29.859095,28.727042,25.856932,1511.709174,1519.884451,1517.615529
4568.T,-0.074167,0.208923,0.048351,87.056681,84.352394,77.105459,3511.616431,3308.177612,3195.966903
6098.T,-0.062584,-0.035862,-0.022462,82.505185,104.350711,91.749962,4240.832422,4311.991986,4290.05245
6367.T,-0.096037,0.087812,0.122856,426.176848,451.226046,410.047153,23150.670898,22310.67627,21260.223242
6501.T,0.060812,0.165512,0.161722,55.10065,86.255892,80.44957,4047.377515,3872.149017,3770.029879
6594.T,0.128194,0.238994,0.356353,241.825846,261.416882,227.974149,12647.276221,11862.418188,11237.285954


In [96]:
df_target

Unnamed: 0,return_1month,return_2month,return_3month,volatility_1month,volatility_2month,volatility_3month,ma_gap_1month,ma_gap_2month,ma_gap_3month
3382.T,0.0596,0.070667,0.150041,103.218968,103.813183,96.736538,4676.75,4580.35,4481.714551
4063.T,-0.041063,-0.01557,-0.01026,330.058209,304.892159,333.610915,18502.25,18677.915674,18390.583854
4452.T,-0.062017,-0.069853,-0.095016,81.732217,78.726971,84.751699,6942.8,7124.525,7149.3
4502.T,0.015389,-0.100579,0.027841,38.78724,49.603655,54.153161,3694.6,3815.033502,3810.515381
4503.T,0.024272,-0.05315,-0.039848,23.723739,24.311487,27.01565,1664.8,1674.132385,1701.908704
4568.T,-0.125957,-0.212042,-0.204676,63.024071,60.060904,62.027292,2824.625,2966.869916,3036.296407
6098.T,0.010559,-0.003426,-0.025189,143.743961,119.809604,121.395658,5037.65,5134.477014,5160.656063
6367.T,-0.080962,-0.096997,-0.101074,592.126576,495.998267,506.269694,21855.25,22104.838184,22053.961654
6501.T,0.046219,0.006488,0.138799,108.149738,115.704171,110.863773,5272.55,5204.251233,5182.213826
6594.T,-0.120745,-0.09343,-0.157697,322.397577,263.725602,309.576463,12705.5,13182.032031,13247.231966


## 特徴量作成　損益計算書

In [97]:
#tickers.tickers[3].financials.T["Net Income"]
tickers.tickers[3].quarterly_financials.T['Net Income']


2021-03-31    1.97098e+11
2020-12-31     9.2359e+10
2020-09-30      4.037e+09
2020-06-30     8.2511e+10
Name: Net Income, dtype: object

In [98]:
'''当期・前期　当期純利益データフレームの作成'''
earnings = []
prev_earnings = []

dummy = tickers.tickers[0].quarterly_financials.T["Net Income"]
dummy[:] = np.nan

for i in range(len(tickers.tickers)):
    try:
        latest_earnings = tickers.tickers[i].quarterly_financials\
        .T["Net Income"]
        previous_earnings = tickers.tickers[i].quarterly_financials\
        .T["Net Income"][1:]
        earnings.append(latest_earnings)
        prev_earnings.append(previous_earnings)
    except:
        earnings.append(dummy)  
        prev_earnings.append(dummy) # エラー発生時はダミーを入れる

earnings = pd.DataFrame(earnings).T  # DataFrame化
prev_earnings = pd.DataFrame(prev_earnings).T  # DataFrame化
earnings.columns = stocks            # カラム名の設定
prev_earnings.columns = stocks            # カラム名の設定

_earnings = earnings.ffill().fillna(0).T.iloc[:,-1]
_prev_earnings = prev_earnings.ffill().fillna(0).T.iloc[:,-1]

print(_earnings)
print(_prev_earnings)

3382.T    4.827500e+10
4063.T    8.041100e+10
4452.T    2.571900e+10
4502.T    1.970980e+11
4503.T   -1.232800e+10
4568.T    1.520000e+08
6098.T    3.443000e+10
6367.T    2.750900e+10
6501.T    1.937350e+11
6594.T    3.834100e+10
6758.T    1.070000e+11
6861.T    6.492600e+10
6954.T    3.295000e+10
6981.T    6.073400e+10
7203.T    7.771970e+11
7267.T    2.133230e+11
7741.T    3.101800e+10
7974.T    1.635420e+11
8001.T    3.711500e+10
8031.T    1.365210e+11
8058.T    3.404000e+09
8306.T    1.699810e+11
8316.T    1.637990e+11
8411.T    1.166160e+11
8766.T    4.902100e+10
9022.T   -9.006200e+10
9432.T    8.500700e+10
9433.T    1.027700e+11
9984.T    1.171951e+12
Name: 2021-03-31 00:00:00, dtype: float64
3382.T    5.846800e+10
4063.T    7.301500e+10
4452.T    4.020100e+10
4502.T    9.235900e+10
4503.T    6.007900e+10
4568.T    2.413900e+10
6098.T    5.504900e+10
6367.T    4.413200e+10
6501.T    5.712300e+10
6594.T    3.485300e+10
6758.T    3.718910e+11
6861.T    4.804000e+10
6954.T    2.723

In [99]:
#当期・前 自己資本データフレームの作成
equity   = [] # 自己資本
prev_equity = []

dummy = tickers.tickers[0].quarterly_balance_sheet.T["Total Stockholder Equity"]
dummy[:] = np.nan

for i in range(len(tickers.tickers)):
    try:
        latest_equity = tickers.tickers[i].quarterly_balance_sheet\
        .T["Total Stockholder Equity"]
        previous_equity = tickers.tickers[i].quarterly_balance_sheet\
        .T["Total Stockholder Equity"][1:]
        equity.append(latest_equity)
        prev_equity.append(previous_equity)
    except:
        equity.append(dummy)  
        prev_equity.append(dummy)   # エラー発生時はダミーを入れる

equity = pd.DataFrame(equity).T      # DataFrame化
prev_equity = pd.DataFrame(prev_equity).T   
equity.columns = stocks              # カラム名の設定
prev_equity.columns = stocks 

_equity = equity.ffill().fillna(0).T.iloc[:,-1]
_prev_equity = prev_equity.ffill().fillna(0).T.iloc[:,-1]

print(_equity)
print(_prev_equity)

3382.T    0.000000e+00
4063.T    2.815153e+12
4452.T    9.087920e+11
4502.T    5.173037e+12
4503.T    1.386115e+12
4568.T    1.272053e+12
6098.T    1.091571e+12
6367.T    1.667707e+12
6501.T    3.525502e+12
6594.T    1.096054e+12
6758.T    5.575839e+12
6861.T    1.912844e+12
6954.T    1.425479e+12
6981.T    1.920805e+12
7203.T    2.340455e+13
7267.T    9.082306e+12
7741.T    6.880000e+11
7974.T    1.736023e+12
8001.T    3.316281e+12
8031.T    4.570420e+12
8058.T    5.613647e+12
8306.T    1.680257e+13
8316.T    1.144562e+13
8411.T    9.256410e+12
8766.T    3.666399e+12
9022.T    3.642515e+12
9432.T    7.437733e+12
9433.T    4.759720e+12
9984.T    7.625039e+12
Name: 2021-03-31 00:00:00, dtype: float64
3382.T    0.000000e+00
4063.T    2.745017e+12
4452.T    9.236870e+11
4502.T    4.635353e+12
4503.T    1.368609e+12
4568.T    1.291308e+12
6098.T    1.059796e+12
6367.T    1.556515e+12
6501.T    2.928327e+12
6594.T    9.823960e+11
6758.T    5.390560e+12
6861.T    1.843959e+12
6954.T    1.373

In [100]:
#発行株数データフレームの作成
shares   = [] # 発行株数

for i in range(len(tickers.tickers)):
    try:
        shares.append(tickers.tickers[i].info["sharesOutstanding"])
    except:
        shares.append(np.nan)        # エラー発生時はNAN値を入れる

shares = pd.Series(shares)           # Series化
shares.index = stocks                # インデックス名の設定

print(shares)

3382.T      882966976
4063.T      415240992
4452.T      474879008
4502.T     1576390016
4503.T     1857510016
4568.T     1916169984
6098.T     1634189952
6367.T      292584992
6501.T      966835008
6594.T      585488000
6758.T     1239030016
6861.T      242527008
6954.T      191812992
6981.T      639816000
7203.T     2795960064
7267.T     1726610048
7741.T      365756000
7974.T      119124000
8001.T     1487430016
8031.T     1638579968
8058.T     1478989952
8306.T    12844699648
8316.T     1370429952
8411.T     2535269888
8766.T      693273024
9022.T      196355008
9432.T     3622010112
9433.T     2275399936
9984.T     1738520064
dtype: int64


In [101]:
#EPS、ROEデータフレームの作成
eps = _earnings/shares.values      # EPS
prev_eps = _prev_earnings/shares.values      # previous EPS
roe = _earnings/_equity    # ROE
prev_roe = _prev_earnings/_prev_equity   # previous ROE

eps = eps.ffill().fillna(0).replace([np.inf,-np.inf],0)                 # 欠損データの補完
prev_eps = prev_eps.ffill().fillna(0) .replace([np.inf,-np.inf],0)  
roe = roe.ffill().fillna(0).replace([np.inf,-np.inf],0)  
prev_roe = prev_roe.ffill().fillna(0).replace([np.inf,-np.inf],0)  

#eps = eps.drop(["^N225"], axis=1) # ^N225カラムは削除しておく
#roe = roe.drop(["^N225"], axis=1)

print(eps)
print(prev_eps)
print(roe)
print(prev_roe)

3382.T      54.673619
4063.T     193.648993
4452.T      54.159059
4502.T     125.031241
4503.T      -6.636842
4568.T       0.079325
6098.T      21.068542
6367.T      94.020544
6501.T     200.380622
6594.T      65.485544
6758.T      86.357876
6861.T     267.706267
6954.T     171.781899
6981.T      94.924166
7203.T     277.971424
7267.T     123.550190
7741.T      84.805171
7974.T    1372.871965
8001.T      24.952434
8031.T      83.316654
8058.T       2.301571
8306.T      13.233552
8316.T     119.523803
8411.T      45.997470
8766.T      70.709516
9022.T    -458.669228
9432.T      23.469565
9433.T      45.165686
9984.T     674.108412
Name: 2021-03-31 00:00:00, dtype: float64
3382.T     66.217652
4063.T    175.837649
4452.T     84.655248
4502.T     58.588927
4503.T     32.343836
4568.T     12.597525
6098.T     33.685803
6367.T    150.834804
6501.T     59.082470
6594.T     59.528120
6758.T    300.146885
6861.T    198.081032
6954.T    141.971614
6981.T    119.507796
7203.T    299.967089
7267.

In [102]:
df_feat['eps'] = prev_eps
df_feat['roe'] = prev_roe

df_target['eps'] = eps
df_target['roe'] = roe

In [103]:
df_feat

Unnamed: 0,return_1month,return_2month,return_3month,volatility_1month,volatility_2month,volatility_3month,ma_gap_1month,ma_gap_2month,ma_gap_3month,eps,roe
3382.T,0.074738,0.007567,0.082831,64.452933,74.324109,70.135696,3440.60863,3364.354303,3368.34775,66.217652,0.0
4063.T,0.134418,0.179434,0.222543,281.985745,257.688651,222.101495,16918.560254,15804.677539,15171.153662,175.837649,0.026599
4452.T,-0.007125,0.00567,-0.036305,93.843738,96.519929,94.06206,7710.605005,7650.793066,7684.322135,84.655248,0.043522
4502.T,0.069517,0.096127,-0.005723,34.179669,53.998462,57.168955,3722.279614,3573.077307,3587.683553,58.588927,0.019925
4503.T,-0.035703,0.019536,-0.041219,29.859095,28.727042,25.856932,1511.709174,1519.884451,1517.615529,32.343836,0.043898
4568.T,-0.074167,0.208923,0.048351,87.056681,84.352394,77.105459,3511.616431,3308.177612,3195.966903,12.597525,0.018693
6098.T,-0.062584,-0.035862,-0.022462,82.505185,104.350711,91.749962,4240.832422,4311.991986,4290.05245,33.685803,0.051943
6367.T,-0.096037,0.087812,0.122856,426.176848,451.226046,410.047153,23150.670898,22310.67627,21260.223242,150.834804,0.028353
6501.T,0.060812,0.165512,0.161722,55.10065,86.255892,80.44957,4047.377515,3872.149017,3770.029879,59.08247,0.019507
6594.T,0.128194,0.238994,0.356353,241.825846,261.416882,227.974149,12647.276221,11862.418188,11237.285954,59.52812,0.035478


In [104]:
df_target

Unnamed: 0,return_1month,return_2month,return_3month,volatility_1month,volatility_2month,volatility_3month,ma_gap_1month,ma_gap_2month,ma_gap_3month,eps,roe
3382.T,0.0596,0.070667,0.150041,103.218968,103.813183,96.736538,4676.75,4580.35,4481.714551,54.673619,0.0
4063.T,-0.041063,-0.01557,-0.01026,330.058209,304.892159,333.610915,18502.25,18677.915674,18390.583854,193.648993,0.028564
4452.T,-0.062017,-0.069853,-0.095016,81.732217,78.726971,84.751699,6942.8,7124.525,7149.3,54.159059,0.0283
4502.T,0.015389,-0.100579,0.027841,38.78724,49.603655,54.153161,3694.6,3815.033502,3810.515381,125.031241,0.038101
4503.T,0.024272,-0.05315,-0.039848,23.723739,24.311487,27.01565,1664.8,1674.132385,1701.908704,-6.636842,-0.008894
4568.T,-0.125957,-0.212042,-0.204676,63.024071,60.060904,62.027292,2824.625,2966.869916,3036.296407,0.079325,0.000119
6098.T,0.010559,-0.003426,-0.025189,143.743961,119.809604,121.395658,5037.65,5134.477014,5160.656063,21.068542,0.031542
6367.T,-0.080962,-0.096997,-0.101074,592.126576,495.998267,506.269694,21855.25,22104.838184,22053.961654,94.020544,0.016495
6501.T,0.046219,0.006488,0.138799,108.149738,115.704171,110.863773,5272.55,5204.251233,5182.213826,200.380622,0.054952
6594.T,-0.120745,-0.09343,-0.157697,322.397577,263.725602,309.576463,12705.5,13182.032031,13247.231966,65.485544,0.034981


# 目的変数
20日分の株価　四本値 (始値、高値、安値、終値)から
翌営業日以降N（5，10，20）営業日間における最高値及び最安値への変化率
## 120日以前のデータでモデリング
## 119日から100日前の20日分のデータで、120日目の終値と比べてどの程度変化（最高値・最安値）を20，10，5日で算出

In [107]:
high= []
low = []

for i in range(len(tickers.tickers)):
    high.append(tickers.tickers[i].history(period = prod).High)
    low.append(tickers.tickers[i].history(period = prod).Low)

high = pd.DataFrame(high).T.ffill()
low = pd.DataFrame(low).T.ffill()
high.columns = stocks
low.columns = stocks
print(high)
print(low)

                 3382.T        4063.T       4452.T       4502.T       4503.T  \
Date                                                                           
2020-06-15          NaN           NaN          NaN          NaN          NaN   
2020-06-16          NaN           NaN          NaN          NaN          NaN   
2020-06-17  3677.336334  12487.752999  8588.925401  3902.025607  1756.525239   
2020-06-18  3624.719682  12290.785617  8563.586077  3837.948459  1743.358654   
2020-06-19  3620.822196  12354.801243  8487.570463  3857.075916  1737.994456   
...                 ...           ...          ...          ...          ...   
2021-06-11  4905.000000  18605.000000  6869.000000  3779.000000  1949.000000   
2021-06-14  4927.000000  18925.000000  6892.000000  3783.000000  1927.500000   
2021-06-15  5027.000000  19240.000000  6886.000000  3811.000000  1944.500000   
2021-06-16  5045.000000  19180.000000  6928.000000  3824.000000  1958.000000   
2021-06-17  5010.000000  18975.000000  6

In [145]:
_high = high.copy()
_low = low.copy()
val_zeroday = _closes.iloc[-121:-120,:] #基準の終値

_val_high = _high.iloc[-120:-100,:]
val_high = pd.concat([val_zeroday,_val_high])

_val_low = _low.iloc[-120:-100,:]
val_low = pd.concat([val_zeroday,_val_low])

test_zeroday = _closes.iloc[-21:-20,:] #基準の終値
_test_high = _high.iloc[-20:,:]
test_high = pd.concat([test_zeroday,_test_high])

_test_low = _low.iloc[-20:,:]
test_low = pd.concat([test_zeroday,_test_low])

In [162]:
(test_low.iloc[:5,:].min() / test_zeroday).T

Date,2021-05-20
3382.T,0.985264
4063.T,0.999451
4452.T,0.99838
4502.T,0.988301
4503.T,0.977488
4568.T,0.974491
6098.T,1.0
6367.T,0.99712
6501.T,1.0
6594.T,0.99511


In [170]:
test_zeroday

Unnamed: 0_level_0,3382.T,4063.T,4452.T,4502.T,4503.T,4568.T,6098.T,6367.T,6501.T,6594.T,...,8031.T,8058.T,8306.T,8316.T,8411.T,8766.T,9022.T,9432.T,9433.T,9984.T
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-05-20,4818.0,18215.0,6791.0,3761.0,1688.0,2626.5,5168.0,20830.0,5410.0,12270.0,...,2389.0,2921.5,631.900024,3998.0,1692.0,5299.0,15795.0,2858.0,3678.0,8500.0


In [181]:
df_val = pd.DataFrame()
df_val['zeroday_price'] = val_low.iloc[0,:].T
df_val['label_low_5'] = (val_low.iloc[:5,:].min() / test_zeroday).T
df_val['label_low_10'] = (val_low.iloc[:10,:].min() / test_zeroday).T
df_val['label_low_20'] = (val_low.iloc[:20,:].min() / test_zeroday).T

df_val['label_high_5'] = (val_high.iloc[:5,:].max() / test_zeroday).T
df_val['label_high_10'] = (val_high.iloc[:10,:].max() / test_zeroday).T
df_val['label_high_20'] = (val_high.iloc[:20,:].max() / test_zeroday).T

df_test = pd.DataFrame()
df_test['zeroday_price'] = test_low.iloc[0,:].T
df_test['label_low_5'] = (test_low.iloc[:5,:].min() / test_zeroday).T
df_test['label_low_10'] = (test_low.iloc[:10,:].min() / test_zeroday).T
df_test['label_low_20'] = (test_low.iloc[:20,:].min() / test_zeroday).T

df_test['label_high_5'] = (test_high.iloc[:5,:].max() / test_zeroday).T
df_test['label_high_10'] = (test_high.iloc[:10,:].max() / test_zeroday).T
df_test['label_high_20'] = (test_high.iloc[:20,:].max() / test_zeroday).T


In [182]:
df_val

Unnamed: 0,zeroday_price,label_low_5,label_low_10,label_low_20,label_high_5,label_high_10,label_high_20
3382.T,3552.112305,0.71511,0.71511,0.71511,0.742181,0.762688,0.824622
4063.T,16964.470703,0.921537,0.921537,0.921537,0.961047,1.001919,1.07222
4452.T,7667.717285,1.120998,1.120998,1.120998,1.148053,1.187748,1.187748
4502.T,3793.162842,0.970312,0.958605,0.932592,1.016616,1.016616,1.016616
4503.T,1520.946533,0.89167,0.89167,0.89167,0.911862,0.945223,0.978584
4568.T,3319.356689,1.240665,1.240665,1.240665,1.310433,1.375273,1.424565
6098.T,4185.741211,0.793519,0.793519,0.793519,0.821135,0.849524,0.9007
6367.T,21478.154297,1.031116,1.031116,1.031116,1.081106,1.123919,1.15286
6501.T,4160.749023,0.727374,0.725545,0.725545,0.779878,0.779878,0.804393
6594.T,12776.236328,0.998567,0.998567,0.998567,1.046544,1.074191,1.186815


In [183]:
df_test

Unnamed: 0,zeroday_price,label_low_5,label_low_10,label_low_20,label_high_5,label_high_10,label_high_20
3382.T,4818.0,0.985264,0.967414,0.967414,1.019925,1.019925,1.047115
4063.T,18215.0,0.999451,0.999451,0.999451,1.039528,1.067801,1.082624
4452.T,6791.0,0.99838,0.957738,0.955824,1.018701,1.018701,1.020174
4502.T,3761.0,0.988301,0.952672,0.952672,1.018878,1.018878,1.018878
4503.T,1688.0,0.977488,0.977488,0.977488,1.005332,1.099526,1.159953
4568.T,2626.5,0.974491,0.919855,0.898153,1.023415,1.023415,1.023415
6098.T,5168.0,1.0,1.0,1.0,1.10565,1.118228,1.128483
6367.T,20830.0,0.99712,0.99712,0.955593,1.026164,1.065771,1.065771
6501.T,5410.0,1.0,1.0,1.0,1.058226,1.090758,1.158965
6594.T,12270.0,0.99511,0.99511,0.988998,1.02934,1.042787,1.047677
