## Import

In [1]:
import pandas as pd
import numpy as np
import random
import os

from tqdm import tqdm
from statsmodels.tsa.arima.model import ARIMA

import warnings
warnings.filterwarnings("ignore")

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

## Data Load

In [3]:
train = pd.read_csv('./train.csv')

## Model Define, Train and Inference

In [12]:
# 추론 결과를 저장하기 위한 dataframe 생성
results_df = pd.DataFrame(columns=['종목코드', 'final_return'])

# train 데이터에 존재하는 독립적인 종목코드 추출
unique_codes = train['종목코드'].unique()

# 각 종목코드에 대해서 모델 학습 및 추론 반복
for code in tqdm(unique_codes):
    
    # 학습 데이터 생성
    train_close = train[train['종목코드'] == code][['일자', '종가', '거래량']]
    train_close[train_close['거래량'] == 0]['거래량'] = 1
    train_close['노출도'] = train_close['종가'] * train_close['거래량']
    train_close['일자'] = pd.to_datetime(train_close['일자'], format='%Y%m%d')
    train_close.set_index('일자', inplace=True)
    tc = train_close['노출도']
    
    # 모델 선언, 학습 및 추론
    model = ARIMA(tc, order=(2, 1, 2))  # order 안의 파라미터는 (AR, Difference, MA)
    """
    - AR : AR이 몇 번째 과거까지를 바라보는지에 대한 파라미터
    - Difference : 차분
    - MA : MA이 몇 번째 과거까지를 바라보는지에 대한 파라미터
    """
    model_fit = model.fit()
    predictions = model_fit.forecast(steps=15) # 향후 15개의 거래일에 대해서 예측
    
    # 최종 수익률 계산
    final_return = (predictions.iloc[-1] - predictions.iloc[0])
    if final_return == 0:
        final_return = -1
    else:
        final_return /= predictions.iloc[0]
    # 결과 저장
    new_df = pd.DataFrame({'종목코드':[code], "final_return":[final_return]})
    results_df = pd.concat([results_df, new_df], ignore_index=True)

100%|██████████| 2000/2000 [09:54<00:00,  3.37it/s]


In [13]:
results_df['순위'] = results_df['final_return'].rank(method='first', ascending=False).astype('int') # 각 순위를 중복없이 생성
results_df


Unnamed: 0,종목코드,final_return,순위
0,A060310,-0.979439,1973
1,A095570,0.149555,853
2,A006840,-0.038906,1562
3,A054620,0.612859,203
4,A265520,-0.082208,1672
...,...,...,...
1995,A189980,0.084865,1061
1996,A000540,0.013831,1355
1997,A003280,0.014230,1351
1998,A037440,0.486948,301


## Submit

In [14]:
sample_submission = pd.read_csv('./sample_submission.csv')
sample_submission

Unnamed: 0,종목코드,순위
0,A000020,1
1,A000040,2
2,A000050,3
3,A000070,4
4,A000080,5
...,...,...
1995,A375500,1996
1996,A378850,1997
1997,A383220,1998
1998,A383310,1999


In [15]:
submission = sample_submission[['종목코드']].merge(results_df[['종목코드', '순위']], on='종목코드', how='left')
submission

Unnamed: 0,종목코드,순위
0,A000020,1903
1,A000040,529
2,A000050,1646
3,A000070,606
4,A000080,1680
...,...,...
1995,A375500,1458
1996,A378850,395
1997,A383220,1549
1998,A383310,377


In [16]:
submission.to_csv('submission0712.csv', index=False)