In [71]:
from tqdm import tqdm
from statsmodels.tsa.arima.model import ARIMA
import pmdarima as pm

import warnings
warnings.filterwarnings("ignore")

In [75]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller

def stationarity(timeseries):
    #1) Seasonality 
    for _ in range(5): #maximum differencing 5 times
        try:
            exception = 0
            result = adfuller(timeseries)
        except:
            exception = 1 # when input closing price data is the same, error occurs -> break
            break
        if result[1] < 0.05: # pvalue < 0.05
            break
            
        # differencing
        timeseries = timeseries.diff().dropna() #timeseries.diff() = timeseries - timeseries.shift()     
    
    #2) Trend
    if exception < 1:
        if result[1] > 0.05: #If Still not stationary, apply log transformation
            timeseries = timeseries.apply(np.log)
            logtrans = 1
    return timeseries

In [76]:
train = pd.read_csv("./open/train.csv")

In [78]:
# 추론 결과를 저장하기 위한 dataframe 생성
results_df = pd.DataFrame(columns=['종목코드', 'final_return'])

# train 데이터에 존재하는 독립적인 종목코드 추출
unique_codes = train['종목코드'].unique()

# 각 종목코드에 대해서 모델 학습 및 추론 반복
for code in tqdm(unique_codes):
    logtrans = 0
    # 학습 데이터 생성
    train_close = train[train['종목코드'] == code][['일자', '종가']]
    train_close['일자'] = pd.to_datetime(train_close['일자'], format='%Y%m%d')
    train_close['종가'] = stationarity(train_close['종가'])
    train_close = train_close.dropna()
    train_close.set_index('일자', inplace=True)
    tc = train_close['종가']
    
    # 모델 선언, 학습 및 추론
    auto_arima_model = pm.auto_arima(tc, seasonal=False,m=1)
    predictions = auto_arima_model.predict(15)
    
#    model = ARIMA(tc, order=(2, 1, 2))
#    model_fit = model.fit()
#    predictions = model_fit.forecast(steps=15) # 향후 15개의 거래일에 대해서 예측
    
    if logtrans == 1:
        predictions = np.exp(predictions)
    # 최종 수익률 계산
    if predictions.iloc[0] == 0:
        final_return = 0
    else:
        final_return = (predictions.iloc[-1] - predictions.iloc[0]) / predictions.iloc[0]
    
    # 결과 저장
    results_df = pd.concat([results_df, pd.DataFrame({'종목코드': [code], 'final_return': [final_return]})])

100%|███████████████████████████████████████| 2000/2000 [40:06<00:00,  1.20s/it]


In [91]:
results_df['final_return'] = results_df['final_return'].fillna(0)

In [92]:
results_df

Unnamed: 0,종목코드,final_return
0,A060310,0.000000
0,A095570,0.000000
0,A006840,-0.000807
0,A054620,0.000000
0,A265520,0.000000
...,...,...
0,A189980,-0.364562
0,A000540,-1.000000
0,A003280,-1.000000
0,A037440,0.000000


In [93]:
results_df['순위'] = results_df['final_return'].rank(method='first', ascending=False).astype('int') # 각 순위를 중복없이 생성
results_df

Unnamed: 0,종목코드,final_return,순위
0,A060310,0.000000,147
0,A095570,0.000000,148
0,A006840,-0.000807,876
0,A054620,0.000000,149
0,A265520,0.000000,150
...,...,...,...
0,A189980,-0.364562,1019
0,A000540,-1.000000,1711
0,A003280,-1.000000,1712
0,A037440,0.000000,866


In [97]:
submission = results_df.drop('final_return',axis=1)

In [98]:
submission.to_csv("submission_stationarity_autoarima.csv")