##Class 및 func 정리

In [1]:
import numpy as np
import random
import os
import math
import pandas as pd
import torch
from tqdm import tqdm

In [2]:
class moving_avg(torch.nn.Module):
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = torch.nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x

In [3]:
class series_decomp(torch.nn.Module):
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        residual = x - moving_mean
        return moving_mean, residual

In [4]:
class LTSF_DLinear(torch.nn.Module):
    def __init__(self, window_size, forcast_size, kernel_size, individual, feature_size):
        super(LTSF_DLinear, self).__init__()
        self.window_size = window_size
        self.forcast_size = forcast_size
        self.decompsition = series_decomp(kernel_size)
        self.individual = individual
        self.channels = feature_size
        if self.individual:
            self.Linear_Seasonal = torch.nn.ModuleList()
            self.Linear_Trend = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear_Trend.append(torch.nn.Linear(self.window_size, self.forcast_size))
                self.Linear_Trend[i].weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))
                self.Linear_Seasonal.append(torch.nn.Linear(self.window_size, self.forcast_size))
                self.Linear_Seasonal[i].weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))
        else:
            self.Linear_Trend = torch.nn.Linear(self.window_size, self.forcast_size)
            self.Linear_Trend.weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))
            self.Linear_Seasonal = torch.nn.Linear(self.window_size,  self.forcast_size)
            self.Linear_Seasonal.weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))

    def forward(self, x):
        trend_init, seasonal_init = self.decompsition(x)
        trend_init, seasonal_init = trend_init.permute(0,2,1), seasonal_init.permute(0,2,1)
        if self.individual:
            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.forcast_size], dtype=trend_init.dtype).to(trend_init.device)
            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.forcast_size], dtype=seasonal_init.dtype).to(seasonal_init.device)
            for idx in range(self.channels):
                trend_output[:, idx, :] = self.Linear_Trend[idx](trend_init[:, idx, :])
                seasonal_output[:, idx, :] = self.Linear_Seasonal[idx](seasonal_init[:, idx, :])
        else:
            trend_output = self.Linear_Trend(trend_init)
            seasonal_output = self.Linear_Seasonal(seasonal_init)
        x = seasonal_output + trend_output
        return x.permute(0,2,1)

In [5]:
from torch.utils.data import DataLoader, Dataset
class Data(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

In [6]:
def time_slide_df(df, window_size, forcast_size, date, target):
    df_ = df.copy()
    data_list = []
    dap_list = []
    date_list = []
    for idx in range(0, df_.shape[0]-window_size-forcast_size+1):
        x = df_.loc[idx:idx+window_size-1, target].values.reshape(window_size, 1)
        y = df_.loc[idx+window_size:idx+window_size+forcast_size-1, target].values
        date_ = df_.loc[idx+window_size:idx+window_size+forcast_size-1, date].values
        data_list.append(x)
        dap_list.append(y)
        date_list.append(date_)
    return np.array(data_list, dtype='float32'), np.array(dap_list, dtype='float32'), np.array(date_list)

##데이터 불러오기

In [7]:
pip install -U finance-datareader

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.


In [None]:
#21분 1초

import FinanceDataReader as fdr

df_krx = fdr.StockListing('KRX') #국내 종목 데이터
df_krx_list=df_krx.loc[df_krx['Market']=='KOSPI','Code'] # 국내 종목 코드

start = '2016-12-16' #2017년까지 데이터 + 10개 추가 for RSI, Sharpe_rario

Close_prices = fdr.DataReader(df_krx_list,start).reset_index(drop=True)
Close_prices.fillna(method='bfill',inplace=True)

Close_prices.head(3)

Unnamed: 0,005930,000660,373220,005380,207940,005935,000270,068270,005490,105560,...,009415,011155,004415,000227,002787,014915,002785,001527,001525,002995
0,35860,46400,505000.0,139500,142809,28400,39250,92646,265500,44100,...,3520,17500,1638,5988,9774,34000,4712,12900,14800,18200
1,35900,45150,505000.0,142000,148244,28600,39250,92646,258500,44250,...,3665,17800,1636,6039,9664,34650,4700,13150,14550,17900
2,36240,45150,505000.0,143500,149727,28760,39350,90408,259000,44000,...,3515,17800,1622,6241,9688,33400,4725,13000,14800,18050


In [None]:
path = '/content/drive/MyDrive/BITamin/Project(2024.3~2024.6)'
Close_prices.to_csv(path + '/0525.csv')

In [None]:
path = '/content/drive/MyDrive/BITamin/Project(2024.3~2024.6)'
Close_prices = pd.read_csv(path + '/0525.csv')

In [None]:
#거래정지 위험 종목 제거
stop_col = list()

for col in Close_prices.columns[1:]:
  if (Close_prices[col].iloc[-1]==Close_prices[col].iloc[-2]) & (Close_prices[col].iloc[-1]==Close_prices[col].iloc[-3]):
    stop_col.append(col)

Close = Close_prices[[x for x in Close_prices.columns if x not in stop_col]].iloc[:,1:] #date 제거
Close = np.log1p(Close.fillna(method='bfill'))
Close.tail(3)  # (1828,927)

Unnamed: 0,005930,000660,373220,005380,207940,005935,000270,068270,005490,105560,...,012205,011155,004415,000227,002787,014915,002785,001527,001525,002995
1825,11.268316,12.206078,12.799402,12.511721,13.554147,11.074436,11.699413,12.119975,12.867474,11.255462,...,8.343078,10.01909,7.453562,9.356257,9.433564,8.719481,8.358432,9.31479,8.61993,9.320181
1826,11.237185,12.199053,12.782689,12.496879,13.529162,11.036517,11.695255,12.100162,12.851869,11.250274,...,8.343078,10.051951,7.46049,9.350189,9.430359,8.712924,8.332308,9.321076,8.601718,9.318387
1827,11.22792,12.240479,12.785494,12.509878,13.52516,11.031675,11.726431,12.096821,12.837347,11.250274,...,8.331105,10.038936,7.456455,9.338822,9.429556,8.712924,8.325064,9.297527,8.603554,9.31479


In [None]:
#sum(Close.iloc[-190,:]==Close.iloc[-180,:]) #22개
sum(Close.iloc[0,:]==Close.iloc[-190,:]) #test data까지 상장이 되지 않은 종목 10개

# 변동성 낮거나 신규 상장

#Close.columns[Close.iloc[-190,:]==Close.iloc[-180,:]]

10

In [None]:
list_1 = Close.columns[Close.iloc[0,:]==Close.iloc[-190,:]] # test_data까지 상장이 되지 않은 종목
Close[list_1]

Unnamed: 0,443060,450080,454910,278470,475150,017860,092790,003830,111380,465770
0,12.007018,10.954327,10.847413,12.668236,9.460398,12.899222,9.281823,11.571204,10.280244,10.392098
1,12.007018,10.954327,10.847413,12.668236,9.460398,12.899222,9.281823,11.571204,10.280244,10.392098
2,12.007018,10.954327,10.847413,12.668236,9.460398,12.899222,9.281823,11.571204,10.280244,10.392098
3,12.007018,10.954327,10.847413,12.668236,9.460398,12.899222,9.281823,11.566476,10.280244,10.392098
4,12.007018,10.954327,10.847413,12.668236,9.460398,12.899222,9.281823,11.561725,10.280244,10.392098
...,...,...,...,...,...,...,...,...,...,...
1823,12.019749,11.408687,11.188593,12.723380,10.062668,11.640448,8.970940,11.614588,10.106469,9.012011
1824,11.983560,11.356283,11.207772,12.668236,10.323874,11.620892,8.973478,11.635152,10.098273,9.022926
1825,11.916395,11.349241,11.194110,12.711386,10.328788,11.601861,8.981053,11.618195,10.104426,9.020511
1826,11.908347,11.330216,11.178864,12.799402,10.252418,11.585255,8.998508,11.621789,10.075380,8.984819


##RSI

In [None]:
def RSICalculator(df, window_size=10): # window_size = 10

  df_RSI = pd.DataFrame(columns=df.columns)
  df_pct = df.pct_change().fillna(method='bfill').copy()

  for col in df_RSI:
    df_col = df_pct[[col]].copy()
    df_col[col+'_상승폭'] = np.where(df_col[col] >= 0, df_col[col], 0)
    df_col[col+'_하락폭'] = np.where(df_col[col] < 0, df_col[col].abs() , 0)
    df_col[col+'_AU'] = df_col[col+'_상승폭'].ewm(alpha=1/window_size, min_periods = window_size).mean()
    df_col[col+'_AD'] = df_col[col+'_하락폭'].ewm(alpha=1/window_size, min_periods = window_size).mean()
    df_RSI[col] = df_col[col+'_AU'] / (df_col[col+'_AU'] + df_col[col+'_AD'])

  return df_RSI.iloc[window_size:,:].reset_index(drop=True) #2016년 data 제거

In [None]:
RSI = RSICalculator(Close, window_size=10).fillna(method='bfill')
RSI # (1815,1405)

Unnamed: 0,005930,000660,373220,005380,207940,005935,000270,068270,005490,105560,...,012205,011155,004415,000227,014915,002787,002785,001527,001525,002995
0,0.572520,0.451355,0.000000,0.863195,0.777484,0.644697,0.580530,0.773471,0.433041,0.101165,...,0.233908,0.636661,0.221961,0.880073,0.440476,0.354057,0.196182,0.594412,0.263366,0.332297
1,0.675373,0.604015,0.000000,0.892077,0.792857,0.635400,0.755010,0.667027,0.361593,0.522843,...,0.389497,0.646747,0.277163,0.946099,0.473329,0.304989,0.352346,0.617634,0.443057,0.332297
2,0.551454,0.521489,0.000000,0.899797,0.671886,0.641150,0.795551,0.636255,0.340512,0.605320,...,0.293431,0.591845,0.194668,0.961568,0.435428,0.271222,0.359439,0.629348,0.521979,0.275215
3,0.397447,0.561608,0.000000,0.865478,0.713643,0.439279,0.745951,0.647115,0.260412,0.382722,...,0.445526,0.637419,0.284258,0.965189,0.441678,0.314336,0.333637,0.691518,0.451084,0.518692
4,0.547407,0.638810,0.000000,0.712888,0.713643,0.544081,0.664780,0.576760,0.324205,0.365776,...,0.427951,0.715360,0.235639,0.965189,0.571003,0.278429,0.333637,0.632996,0.401729,0.365393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1810,0.461061,0.685650,0.475633,0.564966,0.482498,0.443864,0.504691,0.615186,0.524138,0.676208,...,0.499282,0.864544,0.491217,0.674159,0.502366,0.474674,0.429042,0.501721,0.397547,0.406388
1811,0.419322,0.633030,0.425331,0.511474,0.515083,0.415802,0.444786,0.581599,0.468487,0.644490,...,0.460538,0.646926,0.491217,0.668501,0.478410,0.464624,0.414843,0.528435,0.460761,0.451398
1812,0.511186,0.635056,0.384146,0.601710,0.531406,0.445077,0.515168,0.490802,0.507340,0.672832,...,0.456540,0.561455,0.550884,0.662319,0.463023,0.520892,0.482824,0.571276,0.460761,0.355213
1813,0.483108,0.655043,0.324883,0.630309,0.440322,0.414308,0.499329,0.475890,0.421347,0.672832,...,0.501761,0.516194,0.436921,0.629871,0.446824,0.520892,0.493555,0.583499,0.610416,0.325946


##Sharpe Ratio

In [None]:
def SharpeCaculator(df, window_size=10, risk_free_rate=0.035):

  df_sharpe = pd.DataFrame(columns=df.columns)
  df_pct = df.pct_change().fillna(method='bfill').copy()

  days_10_risk = ( risk_free_rate / 10) * (10 / 365) # (10년 국채 금리 / 10년) * (365일중 10일)

#-----------------------------------------------------------#

  for col in df_sharpe.columns:
    df_col = df_pct[[col]].copy()

    #10일간 수익률 평균
    df_col[col+'10_Avg'] = df_col[col].rolling(window=10).mean()
    df_col[col+'10_Avg'].fillna(df_col[col+'10_Avg'].mean(), inplace=True) #어차피 지울것이므로 임의 생성.

    ##10일간 수익률 표준편차
    df_col[col+'10_vol'] = df_col[col].rolling(window=10).std()

    ##sarpe ratio 계산
    df_sharpe[col] = (df_col[col+'10_Avg'] - days_10_risk) / df_col[col+'10_vol']


#-----------------------------------------------------------#
  return  df_sharpe.iloc[window_size:,:].reset_index(drop=True) #2016년 data 제거

In [None]:
Sharpe = SharpeCaculator(Close, window_size=10, risk_free_rate=0.035)
Sharpe.replace([np.inf, -np.inf], int(0), inplace=True)
Sharpe #(1815, 1405)

Unnamed: 0,005930,000660,373220,005380,207940,005935,000270,068270,005490,105560,...,012205,011155,004415,000227,014915,002787,002785,001527,001525,002995
0,-0.044566,-0.148000,0.000000,0.510524,0.277231,0.045618,-0.043759,0.202095,-0.176223,-0.919606,...,-0.567961,0.023606,-0.646313,0.469415,-0.160102,-0.293183,-0.454494,0.078143,-0.344268,-0.357230
1,0.073331,0.223071,0.000000,0.517858,0.180415,-0.039381,0.212109,0.126692,-0.132899,-0.219792,...,-0.353670,-0.044651,-0.564628,0.526846,-0.190931,-0.256130,-0.283852,0.027892,-0.103409,-0.215379
2,-0.148190,0.113055,0.000000,0.486577,0.034872,-0.095629,0.288396,0.237859,-0.180655,-0.063006,...,-0.336243,-0.085046,-0.610059,0.526914,-0.126335,-0.331756,-0.302102,0.098094,-0.102116,-0.439202
3,-0.257244,0.189361,0.000000,0.450723,0.223541,-0.213413,0.267878,0.385263,-0.466991,-0.257941,...,-0.166276,-0.016900,-0.463119,0.634264,-0.201560,-0.266353,-0.401904,0.210911,-0.191456,-0.034764
4,-0.082082,0.242226,0.000000,0.373741,0.397295,-0.006100,0.298737,0.358935,-0.332137,-0.282536,...,-0.275046,0.214261,-0.635437,0.531785,-0.104734,-0.112284,-0.266430,0.092653,-0.070115,-0.116261
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1810,0.048951,0.400749,-0.035988,-0.192725,-0.015121,-0.073634,-0.183913,0.220475,-0.251148,0.152973,...,-0.043362,0.824420,-0.407528,0.400078,-0.040665,-0.204049,-0.139713,0.042756,-0.066436,-0.146774
1811,-0.066861,0.356114,-0.527238,-0.253085,-0.043633,-0.172624,-0.245217,0.010215,-0.302998,0.160540,...,-0.169492,0.313997,-0.407528,0.222937,0.019340,-0.331145,-0.327224,0.117264,0.041066,-0.185216
1812,0.004282,0.381963,-0.643410,-0.031892,0.070216,-0.117147,-0.214316,-0.082855,-0.081491,0.467329,...,-0.682461,0.207997,-0.020169,0.268073,-0.015441,-0.289727,-0.044404,0.403008,0.041066,-0.142425
1813,-0.002129,0.449215,-0.873530,0.262680,-0.026584,-0.117425,-0.037151,-0.156322,-0.374243,0.383552,...,-0.513528,0.078799,-0.352946,0.270142,-0.044434,-0.140539,-0.262697,0.312194,0.205158,-0.219943


## 3차 데이터 생성

In [None]:
Close = Close.iloc[10:,:] #RSI, 샤프지수 계산 위해 마지막에 2016년 데이터 제거

# 3차원 병합
main = np.stack((Close, RSI, Sharpe),axis=0)
main.shape

(3, 1815, 1405)

In [None]:
window_size = 10
forcast_size= 5
batch_size = 32
date = 'Date'

In [None]:
main_len = main.shape[1]

test_len = 180 + window_size
train_len = main_len - test_len
valid_len = int(round(train_len*(1/3)))

test_len,train_len,valid_len

(190, 1625, 542)

In [None]:
test_y_size = test_len - window_size
days = pd.DataFrame({'Date':list(range(main_len))}).reset_index(drop=True)
ex=days.iloc[-test_y_size,:].reset_index(drop=True)

##임시

##실행 (1/3)

In [None]:
#28분 15초
ex=days.iloc[-test_y_size:,:].reset_index(drop=True)

for i in range(401):

  targets = Close.columns[i]

  train_df_fe = pd.concat([days[['Date']].iloc[:-test_len].reset_index(drop=True), pd.DataFrame(np.transpose(main[:,:-test_len,i]),columns=[targets,'RSI','Sharpe_ratio']).reset_index(drop=True)],axis=1)

  test_df_fe = pd.concat([days[['Date']].iloc[-test_len:].reset_index(drop=True), pd.DataFrame(np.transpose(main[:,-test_len:,i]),columns=[targets,'RSI','Sharpe_ratio']).reset_index(drop=True)],axis=1)
  test_df_fe=test_df_fe.reset_index(drop=True)


  train_x1, train_y1, train_date1 = time_slide_df(train_df_fe, window_size, forcast_size, date, targets)
  test_x1, test_y1, test_date1 = time_slide_df(test_df_fe, window_size, forcast_size, date, targets)

  train_ds1 = Data(train_x1[:-valid_len], train_y1[:-valid_len])
  valid_ds1 = Data(train_x1[-valid_len:], train_y1[-valid_len:])
  test_ds1 = Data(test_x1, test_y1)

  train_dl1 = DataLoader(train_ds1, batch_size = batch_size, shuffle=False,)
  valid_dl1 = DataLoader(valid_ds1, batch_size = train_x1[-valid_len:].shape[0], shuffle=False)
  test_dl1  = DataLoader(test_ds1,  batch_size = test_x1.shape[0], shuffle=False)

  train_loss_list = []
  valid_loss_list = []
  test_loss_list = []
  epoch = 30
  lr = 0.001
  DLinear_model = LTSF_DLinear(
                              window_size=window_size,
                              forcast_size=forcast_size,
                              kernel_size=25,
                              individual=False,
                              feature_size=1,
                              )
  criterion = torch.nn.MSELoss()
  optimizer = torch.optim.Adam(DLinear_model.parameters(), lr=lr)
  max_loss = 9999999

  for epoch in tqdm(range(1, epoch+1)):
    loss_list = []
    DLinear_model.train()

    for batch_idx, (data, target) in enumerate(train_dl1):
        optimizer.zero_grad()
        output = DLinear_model(data)
        loss = criterion(output, target.unsqueeze(-1))
        loss.backward()
        optimizer.step()
        loss_list.append(loss.item())

    train_loss_list.append(np.mean(loss_list))

  for epoch in tqdm(range(1, epoch+1)):
    loss_list = []
    DLinear_model.train()
    for batch_idx, (data, target) in enumerate(train_dl1):
        optimizer.zero_grad()
        output = DLinear_model(data)
        loss = criterion(output, target.unsqueeze(-1))
        loss.backward()
        optimizer.step()
        loss_list.append(loss.item())
    train_loss_list.append(np.mean(loss_list))

    DLinear_model.eval()
    with torch.no_grad():
        for data, target in valid_dl1:
            output = DLinear_model(data)
            valid_loss = criterion(output, target.unsqueeze(-1))
            valid_loss_list.append(valid_loss)

        for data, target in test_dl1:
            output = DLinear_model(data)
            test_loss = criterion(output, target.unsqueeze(-1))
            test_loss_list.append(test_loss)

    if valid_loss < max_loss:
        torch.save(DLinear_model, 'DLinear_model.pth')
        max_loss = valid_loss
        dlinear_best_epoch = epoch
        dlinear_best_train_loss = np.mean(loss_list)
        dlinear_best_valid_loss = np.mean(valid_loss.item())
        dlinear_best_test_loss = np.mean(test_loss.item())

  final_output = torch.cat([output[i] for i in range(output.shape[0]) if i % 5 == 0], dim=0).squeeze()
  final=pd.DataFrame(final_output, columns = [targets]).reset_index(drop=True)
  ex=pd.concat([ex, final], axis=1)

100%|██████████| 30/30 [00:02<00:00, 13.02it/s]
100%|██████████| 30/30 [00:02<00:00, 14.27it/s]
100%|██████████| 30/30 [00:01<00:00, 19.12it/s]
100%|██████████| 30/30 [00:01<00:00, 16.14it/s]
100%|██████████| 30/30 [00:01<00:00, 18.77it/s]
100%|██████████| 30/30 [00:01<00:00, 16.40it/s]
100%|██████████| 30/30 [00:02<00:00, 14.76it/s]
100%|██████████| 30/30 [00:02<00:00, 12.36it/s]
100%|██████████| 30/30 [00:01<00:00, 18.99it/s]
100%|██████████| 30/30 [00:01<00:00, 16.39it/s]
100%|██████████| 30/30 [00:01<00:00, 19.11it/s]
100%|██████████| 30/30 [00:01<00:00, 16.16it/s]
100%|██████████| 30/30 [00:01<00:00, 18.09it/s]
100%|██████████| 30/30 [00:02<00:00, 11.51it/s]
100%|██████████| 30/30 [00:01<00:00, 19.33it/s]
100%|██████████| 30/30 [00:01<00:00, 16.28it/s]
100%|██████████| 30/30 [00:01<00:00, 18.89it/s]
100%|██████████| 30/30 [00:01<00:00, 16.21it/s]
100%|██████████| 30/30 [00:01<00:00, 18.92it/s]
100%|██████████| 30/30 [00:02<00:00, 12.87it/s]
100%|██████████| 30/30 [00:02<00:00, 14.

In [None]:
ex.to_csv(path+'/res_1.csv')

##실행(2/3)

In [None]:
#28분 6초
ex1=days.iloc[-test_y_size:,:].reset_index(drop=True)

for i in range(401,801):

  targets = Close.columns[i]

  train_df_fe = pd.concat([days[['Date']].iloc[:-test_len].reset_index(drop=True), pd.DataFrame(np.transpose(main[:,:-test_len,i]),columns=[targets,'RSI','Sharpe_ratio']).reset_index(drop=True)],axis=1)

  test_df_fe = pd.concat([days[['Date']].iloc[-test_len:].reset_index(drop=True), pd.DataFrame(np.transpose(main[:,-test_len:,i]),columns=[targets,'RSI','Sharpe_ratio']).reset_index(drop=True)],axis=1)
  test_df_fe=test_df_fe.reset_index(drop=True)


  train_x1, train_y1, train_date1 = time_slide_df(train_df_fe, window_size, forcast_size, date, targets)
  test_x1, test_y1, test_date1 = time_slide_df(test_df_fe, window_size, forcast_size, date, targets)

  train_ds1 = Data(train_x1[:-valid_len], train_y1[:-valid_len])
  valid_ds1 = Data(train_x1[-valid_len:], train_y1[-valid_len:])
  test_ds1 = Data(test_x1, test_y1)

  train_dl1 = DataLoader(train_ds1, batch_size = batch_size, shuffle=False,)
  valid_dl1 = DataLoader(valid_ds1, batch_size = train_x1[-valid_len:].shape[0], shuffle=False)
  test_dl1  = DataLoader(test_ds1,  batch_size = test_x1.shape[0], shuffle=False)

  train_loss_list = []
  valid_loss_list = []
  test_loss_list = []
  epoch = 30
  lr = 0.001
  DLinear_model = LTSF_DLinear(
                              window_size=window_size,
                              forcast_size=forcast_size,
                              kernel_size=25,
                              individual=False,
                              feature_size=1,
                              )
  criterion = torch.nn.MSELoss()
  optimizer = torch.optim.Adam(DLinear_model.parameters(), lr=lr)
  max_loss = 9999999

  for epoch in tqdm(range(1, epoch+1)):
    loss_list = []
    DLinear_model.train()

    for batch_idx, (data, target) in enumerate(train_dl1):
        optimizer.zero_grad()
        output = DLinear_model(data)
        loss = criterion(output, target.unsqueeze(-1))
        loss.backward()
        optimizer.step()
        loss_list.append(loss.item())

    train_loss_list.append(np.mean(loss_list))

  for epoch in tqdm(range(1, epoch+1)):
    loss_list = []
    DLinear_model.train()
    for batch_idx, (data, target) in enumerate(train_dl1):
        optimizer.zero_grad()
        output = DLinear_model(data)
        loss = criterion(output, target.unsqueeze(-1))
        loss.backward()
        optimizer.step()
        loss_list.append(loss.item())
    train_loss_list.append(np.mean(loss_list))

    DLinear_model.eval()
    with torch.no_grad():
        for data, target in valid_dl1:
            output = DLinear_model(data)
            valid_loss = criterion(output, target.unsqueeze(-1))
            valid_loss_list.append(valid_loss)

        for data, target in test_dl1:
            output = DLinear_model(data)
            test_loss = criterion(output, target.unsqueeze(-1))
            test_loss_list.append(test_loss)

    if valid_loss < max_loss:
        torch.save(DLinear_model, 'DLinear_model.pth')
        max_loss = valid_loss
        dlinear_best_epoch = epoch
        dlinear_best_train_loss = np.mean(loss_list)
        dlinear_best_valid_loss = np.mean(valid_loss.item())
        dlinear_best_test_loss = np.mean(test_loss.item())

  final_output = torch.cat([output[i] for i in range(output.shape[0]) if i % 5 == 0], dim=0).squeeze()
  final=pd.DataFrame(final_output, columns = [targets]).reset_index(drop=True)
  ex1=pd.concat([ex1, final], axis=1)

100%|██████████| 30/30 [00:01<00:00, 18.91it/s]
100%|██████████| 30/30 [00:01<00:00, 16.43it/s]
100%|██████████| 30/30 [00:01<00:00, 19.00it/s]
100%|██████████| 30/30 [00:01<00:00, 16.12it/s]
100%|██████████| 30/30 [00:01<00:00, 19.01it/s]
100%|██████████| 30/30 [00:02<00:00, 11.91it/s]
100%|██████████| 30/30 [00:01<00:00, 16.76it/s]
100%|██████████| 30/30 [00:01<00:00, 16.12it/s]
100%|██████████| 30/30 [00:01<00:00, 18.99it/s]
100%|██████████| 30/30 [00:01<00:00, 16.36it/s]
100%|██████████| 30/30 [00:01<00:00, 18.84it/s]
100%|██████████| 30/30 [00:02<00:00, 13.77it/s]
100%|██████████| 30/30 [00:02<00:00, 13.82it/s]
100%|██████████| 30/30 [00:01<00:00, 16.22it/s]
100%|██████████| 30/30 [00:01<00:00, 19.19it/s]
100%|██████████| 30/30 [00:01<00:00, 16.03it/s]
100%|██████████| 30/30 [00:01<00:00, 18.86it/s]
100%|██████████| 30/30 [00:01<00:00, 16.15it/s]
100%|██████████| 30/30 [00:02<00:00, 13.71it/s]
100%|██████████| 30/30 [00:02<00:00, 13.65it/s]
100%|██████████| 30/30 [00:01<00:00, 18.

In [None]:
ex1.head()

Unnamed: 0,Date,000490,001630,122900,000480,092220,006390,004380,011760,013890,...,900140_y,093050_y,432320_y,026890_y,034310_y,071970_y,009470_y,005810_y,020000_y,011930_y
0,1635,9.257268,10.898163,9.116613,9.028359,7.438001,9.69894,9.378936,9.91058,10.016859,...,7.805194,9.561024,8.184832,8.894729,9.463487,9.40059,9.819673,10.057652,9.807188,7.991758
1,1636,9.243171,10.918475,9.115921,9.04192,7.384022,9.749578,9.3655,9.911255,10.03345,...,7.830549,9.575493,8.190775,8.865266,9.455796,9.319863,9.816538,10.072473,9.797405,7.938889
2,1637,9.266404,10.919818,9.109998,9.040177,7.395966,9.718382,9.380121,9.917602,10.006729,...,7.834327,9.5693,8.183854,8.867268,9.464965,9.326224,9.791315,10.080141,9.83605,7.959924
3,1638,9.270108,10.875464,9.113674,9.050629,7.371703,9.716755,9.397004,9.933447,10.05016,...,7.832393,9.576867,8.193782,8.889754,9.458014,9.335649,9.793776,10.087221,9.818611,7.96833
4,1639,9.279221,10.907961,9.122095,9.062838,7.459572,9.720527,9.41439,9.943252,10.010218,...,7.827249,9.569177,8.19065,8.866941,9.462282,9.35236,9.75828,10.092585,9.801464,8.006415


In [None]:
ex1.to_csv(path+'/res_2.csv')

##실행(3/3)

In [None]:
#42분 16초
ex2=days.iloc[-test_y_size:,:].reset_index(drop=True)

for i in range(801,1405):

  targets = Close.columns[i]

  train_df_fe = pd.concat([days[['Date']].iloc[:-test_len].reset_index(drop=True), pd.DataFrame(np.transpose(main[:,:-test_len,i]),columns=[targets,'RSI','Sharpe_ratio']).reset_index(drop=True)],axis=1)

  test_df_fe = pd.concat([days[['Date']].iloc[-test_len:].reset_index(drop=True), pd.DataFrame(np.transpose(main[:,-test_len:,i]),columns=[targets,'RSI','Sharpe_ratio']).reset_index(drop=True)],axis=1)
  test_df_fe=test_df_fe.reset_index(drop=True)


  train_x1, train_y1, train_date1 = time_slide_df(train_df_fe, window_size, forcast_size, date, targets)
  test_x1, test_y1, test_date1 = time_slide_df(test_df_fe, window_size, forcast_size, date, targets)

  train_ds1 = Data(train_x1[:-valid_len], train_y1[:-valid_len])
  valid_ds1 = Data(train_x1[-valid_len:], train_y1[-valid_len:])
  test_ds1 = Data(test_x1, test_y1)

  train_dl1 = DataLoader(train_ds1, batch_size = batch_size, shuffle=False,)
  valid_dl1 = DataLoader(valid_ds1, batch_size = train_x1[-valid_len:].shape[0], shuffle=False)
  test_dl1  = DataLoader(test_ds1,  batch_size = test_x1.shape[0], shuffle=False)

  train_loss_list = []
  valid_loss_list = []
  test_loss_list = []
  epoch = 30
  lr = 0.001
  DLinear_model = LTSF_DLinear(
                              window_size=window_size,
                              forcast_size=forcast_size,
                              kernel_size=25,
                              individual=False,
                              feature_size=1,
                              )
  criterion = torch.nn.MSELoss()
  optimizer = torch.optim.Adam(DLinear_model.parameters(), lr=lr)
  max_loss = 9999999

  for epoch in tqdm(range(1, epoch+1)):
    loss_list = []
    DLinear_model.train()

    for batch_idx, (data, target) in enumerate(train_dl1):
        optimizer.zero_grad()
        output = DLinear_model(data)
        loss = criterion(output, target.unsqueeze(-1))
        loss.backward()
        optimizer.step()
        loss_list.append(loss.item())

    train_loss_list.append(np.mean(loss_list))

  for epoch in tqdm(range(1, epoch+1)):
    loss_list = []
    DLinear_model.train()
    for batch_idx, (data, target) in enumerate(train_dl1):
        optimizer.zero_grad()
        output = DLinear_model(data)
        loss = criterion(output, target.unsqueeze(-1))
        loss.backward()
        optimizer.step()
        loss_list.append(loss.item())
    train_loss_list.append(np.mean(loss_list))

    DLinear_model.eval()
    with torch.no_grad():
        for data, target in valid_dl1:
            output = DLinear_model(data)
            valid_loss = criterion(output, target.unsqueeze(-1))
            valid_loss_list.append(valid_loss)

        for data, target in test_dl1:
            output = DLinear_model(data)
            test_loss = criterion(output, target.unsqueeze(-1))
            test_loss_list.append(test_loss)

    if valid_loss < max_loss:
        torch.save(DLinear_model, 'DLinear_model.pth')
        max_loss = valid_loss
        dlinear_best_epoch = epoch
        dlinear_best_train_loss = np.mean(loss_list)
        dlinear_best_valid_loss = np.mean(valid_loss.item())
        dlinear_best_test_loss = np.mean(test_loss.item())

  final_output = torch.cat([output[i] for i in range(output.shape[0]) if i % 5 == 0], dim=0).squeeze()
  final=pd.DataFrame(final_output, columns = [targets]).reset_index(drop=True)
  ex2=pd.concat([ex2, final], axis=1)

100%|██████████| 30/30 [00:01<00:00, 16.36it/s]
100%|██████████| 30/30 [00:02<00:00, 11.07it/s]
100%|██████████| 30/30 [00:01<00:00, 18.96it/s]
100%|██████████| 30/30 [00:01<00:00, 16.31it/s]
100%|██████████| 30/30 [00:01<00:00, 19.05it/s]
100%|██████████| 30/30 [00:01<00:00, 16.52it/s]
100%|██████████| 30/30 [00:01<00:00, 18.93it/s]
100%|██████████| 30/30 [00:02<00:00, 12.10it/s]
100%|██████████| 30/30 [00:01<00:00, 16.53it/s]
100%|██████████| 30/30 [00:01<00:00, 16.24it/s]
100%|██████████| 30/30 [00:01<00:00, 19.30it/s]
100%|██████████| 30/30 [00:01<00:00, 16.19it/s]
100%|██████████| 30/30 [00:01<00:00, 18.95it/s]
100%|██████████| 30/30 [00:02<00:00, 14.38it/s]
100%|██████████| 30/30 [00:02<00:00, 13.62it/s]
100%|██████████| 30/30 [00:01<00:00, 16.45it/s]
100%|██████████| 30/30 [00:01<00:00, 19.04it/s]
100%|██████████| 30/30 [00:01<00:00, 16.12it/s]
100%|██████████| 30/30 [00:01<00:00, 18.62it/s]
100%|██████████| 30/30 [00:01<00:00, 16.04it/s]
100%|██████████| 30/30 [00:02<00:00, 13.

In [None]:
ex2.head()

Unnamed: 0,Date,000500_y,377740_y,097520_y,249420_y,357120_y,003960_y,003300_y,126560_y,108670_y,...,012205,011155,004415,000227,014915,002787,002785,001527,001525,002995
0,1635,9.992087,8.570997,10.159151,9.7013,8.574469,10.266736,9.338449,8.016565,10.862996,...,8.261745,9.923352,7.526543,9.243686,8.652568,9.322583,8.320328,9.258341,8.63101,9.569148
1,1636,9.96313,8.605833,10.242762,9.687303,8.579151,10.297446,9.328001,8.022245,10.872506,...,8.270052,9.908205,7.475145,9.181864,8.646002,9.325887,8.324165,9.244159,8.633807,9.647989
2,1637,9.96736,8.562182,10.149048,9.677042,8.58486,10.280785,9.321897,8.022642,10.874975,...,8.24659,9.907648,7.5025,9.218321,8.667654,9.305037,8.321079,9.253056,8.608589,9.569782
3,1638,9.963386,8.571014,10.198954,9.66685,8.582177,10.3095,9.324792,8.03097,10.879111,...,8.252639,9.867992,7.531542,9.23509,8.646689,9.295206,8.342647,9.264004,8.589811,9.629152
4,1639,9.997019,8.598892,10.197044,9.656306,8.591508,10.302402,9.336605,8.038826,10.859241,...,8.196548,9.866273,7.480431,9.213776,8.670781,9.300656,8.305517,9.281704,8.610929,9.613576


In [None]:
ex2.to_csv(path+'/res_3.csv')

##결과 도출

In [None]:
result = pd.concat([ex,ex1.iloc[:,1:],ex2.iloc[:,1:]],axis=1)
result.head()

Unnamed: 0,Date,005930,000660,373220,005380,207940,005935,000270,068270,005490,...,012205,011155,004415,000227,014915,002787,002785,001527,001525,002995
0,1635,11.100691,11.642915,13.192471,12.147408,13.553034,10.903432,11.260262,11.864115,13.203036,...,8.261745,9.923352,7.526543,9.243686,8.652568,9.322583,8.320328,9.258341,8.63101,9.569148
1,1636,11.091111,11.646854,13.196395,12.128482,13.554383,10.884483,11.241193,11.874046,13.203304,...,8.270052,9.908205,7.475145,9.181864,8.646002,9.325887,8.324165,9.244159,8.633807,9.647989
2,1637,11.12406,11.640551,13.195735,12.13238,13.575973,10.89159,11.260768,11.841694,13.243601,...,8.24659,9.907648,7.5025,9.218321,8.667654,9.305037,8.321079,9.253056,8.608589,9.569782
3,1638,11.104518,11.653893,13.193955,12.124678,13.573547,10.887918,11.272438,11.845758,13.227606,...,8.252639,9.867992,7.531542,9.23509,8.646689,9.295206,8.342647,9.264004,8.589811,9.629152
4,1639,11.105541,11.674006,13.194737,12.145208,13.579659,10.910362,11.312941,11.873485,13.2695,...,8.196548,9.866273,7.480431,9.213776,8.670781,9.300656,8.305517,9.281704,8.610929,9.613576


In [None]:
result.iloc[-5:,:]

Unnamed: 0,Date,005930,000660,373220,005380,207940,005935,000270,068270,005490,...,012205,011155,004415,000227,014915,002787,002785,001527,001525,002995
175,1810,11.253556,12.129617,12.86342,12.421836,13.573939,11.070396,11.626419,12.15531,12.895407,...,8.350849,10.06358,7.501264,9.336016,8.728863,9.442623,8.32725,9.319905,8.61215,9.324968
176,1811,11.243,12.120481,12.864588,12.39725,13.575138,11.058414,11.626575,12.158775,12.892396,...,8.35177,10.047602,7.43642,9.280805,8.713924,9.440975,8.330853,9.29759,8.617273,9.415953
177,1812,11.273633,12.124565,12.867356,12.398986,13.594639,11.064571,11.627872,12.12633,12.904298,...,8.328597,10.043464,7.465173,9.315573,8.728234,9.425884,8.327009,9.295703,8.602128,9.324926
178,1813,11.25863,12.129949,12.874798,12.400831,13.58997,11.058378,11.647644,12.12903,12.906766,...,8.330538,10.00383,7.499149,9.333435,8.716496,9.416591,8.352286,9.30114,8.6041,9.39399
179,1814,11.257821,12.143148,12.871525,12.41828,13.594515,11.07445,11.672716,12.154096,12.934448,...,8.28079,9.997736,7.445031,9.313881,8.736869,9.415907,8.314452,9.315875,8.633033,9.371143


In [None]:
result.to_csv(path+'/앞단 결과.csv')

In [None]:
((result.iloc[-1,1:] - result.iloc[-5,1:])).sort_values(ascending=False)[:6]

002870      0.190820
267260      0.169233
002710      0.158987
001470_y    0.143580
011000      0.139071
002690      0.138226
dtype: float64