In [11]:
import pandas as pd
import numpy as np
import os
import FinanceDataReader as fdr
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from tqdm import tqdm
from sklearn.model_selection import GridSearchCV

In [12]:
path = './open'
list_name = 'Stock_List.csv'
sample_name = 'sample_submission.csv'

stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))
stock_list

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,005930,KOSPI
1,SK하이닉스,000660,KOSPI
2,NAVER,035420,KOSPI
3,카카오,035720,KOSPI
4,삼성바이오로직스,207940,KOSPI
...,...,...,...
365,맘스터치,220630,KOSDAQ
366,다날,064260,KOSDAQ
367,제이시스메디칼,287410,KOSDAQ
368,크리스에프앤씨,110790,KOSDAQ


In [13]:
# select start, end data

start_date = '20210308'
end_date = '20211105'

start_weekday = pd.to_datetime(start_date).weekday()
max_weeknum = pd.to_datetime(end_date).strftime('%V')
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

print(f'WEEKDAY of "start_date" : {start_weekday}')
print(f'NUM of WEEKS to "end_date" : {max_weeknum}')
print(f'HOW MANY "Business_days" : {Business_days.shape}', )
display(Business_days.head())

WEEKDAY of "start_date" : 0
NUM of WEEKS to "end_date" : 44
HOW MANY "Business_days" : (175, 1)


Unnamed: 0,Date
0,2021-03-08
1,2021-03-09
2,2021-03-10
3,2021-03-11
4,2021-03-12


In [14]:
# training, predict method
def get_prediction(x_close, y_close, x_close_public):
    
    param_grid = {
        'max_depth': [None, 50, 80, 110],
        'criterion': ["mse", "mae"],
        'n_estimators': [25, 50, 100, 150, 200]
    }

    prediction_close = 0.0
    for i, model in enumerate(models):
        if "max_depth" in dir(model):  # randomforest
            grid_search = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
            grid_search.fit(x_close, y_close)
            best_grid = grid_search.best_estimator_
            prediction_close += best_grid.predict(np.expand_dims(x_close_public, 0))*models_rate[i]
        else:            
            model.fit(x_close, y_close)
            prediction_close += model.predict(np.expand_dims(x_close_public, 0))*models_rate[i]
    
    return prediction_close

sample_name = 'sample_submission.csv'
sample_submission = pd.read_csv(os.path.join(path,sample_name))

In [15]:
model1 = LinearRegression()
model2 = RandomForestRegressor(criterion="mae", n_estimators=200)
models = [model1,model2]


model1_rate = 0.7
model2_rate = 0.3
models_rate = [model1_rate,model2_rate]

for code in tqdm(stock_list['종목코드'].values):
    data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    data = pd.merge(Business_days, data, how = 'outer')
    data['weekday'] = data.Date.apply(lambda x : x.weekday())
    data['weeknum'] = data.Date.apply(lambda x : x.strftime('%V'))
    data.Close = data.Close.ffill()
    data.Close = data.Close.bfill()

    data_close = pd.pivot_table(data = data, values = 'Close', columns = 'weekday', index = 'weeknum')

    # data flatten  # data flatten for reshape
    data_close_flatten = np.ravel(data_close.to_numpy())

    predictions = []

    
    # predict day 1

    # close
    data_close_day1 = data_close_flatten.reshape((-1,1))

    x_close = data_close_day1[:-6]  # training x
    y_close = data_close_day1[1:-5]  # training y
    x_close_public = data_close_day1[-6]  # predict x

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 1 finish
    
    
    # predict day 2
    
    # close
    data_close_day2 = np.append(data_close_flatten, 0)  # padding
    data_close_day2 = data_close_day2.reshape((-1,2))

    x_close = data_close_day2[:-4]
    y_close = data_close_day2[1:-3][:, 1]  # 2일차만 훈련, 예측함
    x_close_public = data_close_day2[-4]   

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 2 finish
    
    
    # predict day 3
    
    # close
    data_close_day3 = np.append(data_close_flatten, 0)
    data_close_day3 = np.insert(data_close_day3, 0, data_close_day3[0])
    data_close_day3 = data_close_day3.reshape((-1, 3))

    x_close = data_close_day3[:-3]
    y_close = data_close_day3[1:-2][:, 2]
    x_close_public = data_close_day3[-3]
        
    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 3 finish

    
    # predict day 4
    
    # close
    data_close_day4 = np.insert(data_close_flatten, 0, data_close_flatten[0])
    data_close_day4 = np.insert(data_close_day4, 0, data_close_day4[0])
    data_close_day4 = np.append(data_close_day4, 0)
    data_close_day4 = np.append(data_close_day4, 0)
    data_close_day4 = np.append(data_close_day4, 0)
    data_close_day4 = data_close_day4.reshape((-1, 4))

    x_close = data_close_day4[:-3]
    y_close = data_close_day4[1:-2][:, 3]
    x_close_public = data_close_day4[-3]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 4 finish
    
    # predict day 5
    
    # close
    data_close_day5 = data_close_flatten.reshape((-1, 5))
    
    x_close = data_close_day5[:-2]
    y_close = data_close_day5[1:-1][:, 4]
    x_close_public = data_close_day5[-2]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 5 finish
        
    #sample_submission.loc[:,code] = predictions*2
    sample_submission.loc[:4,code] = predictions
sample_submission.isna().sum().sum()

  self.best_estimator_.fit(X, y, **fit_params)
  return asarray(a).ndim
  self.best_estimator_.fit(X, y, **fit_params)
  0%|          | 1/370 [02:46<17:05:09, 166.69s/it]


KeyboardInterrupt: 

In [None]:
sample_submission.to_csv('BASELINE_Linear_rf.csv',index=False)

In [None]:
sample_submission

Unnamed: 0,Day,000060,000080,000100,000120,000150,000240,000250,000270,000660,...,330860,336260,336370,347860,348150,348210,352820,357780,363280,950130
0,2021-11-01,27718.619929,34817.223899,60181.92668,142031.959156,102982.138534,16735.475278,47625.246917,85049.587685,103041.550598,...,48842.034973,51966.16468,82466.256215,35918.044871,26140.372177,53195.756792,333693.002002,262196.473946,27210.375679,17249.844578
1,2021-11-02,27959.569456,34889.673582,60696.443723,142569.972492,103013.565581,16785.117326,48074.292897,85194.705297,103107.140382,...,48490.166705,51844.049225,82403.166088,35565.267742,26249.029191,53432.614236,332105.044849,263129.949367,27315.851946,17476.908483
2,2021-11-03,28424.28617,35012.162499,61015.747669,142151.992911,103174.094933,17151.754414,48447.380356,84943.440973,102657.598377,...,48173.595122,51534.217306,82872.706697,35367.31984,26014.676549,53590.570772,333687.222768,263860.367387,27394.852843,17610.817877
3,2021-11-04,29842.920969,34981.436303,61223.889325,141853.049875,104311.069133,17183.61036,50621.68182,84961.180503,104201.529416,...,47261.287097,50450.427984,82711.381148,39547.748873,26464.214935,53660.339345,334429.241626,263551.020502,27622.730987,17830.759475
4,2021-11-05,28287.744658,34539.402594,60806.166769,141346.730119,108929.787446,16688.94133,46315.997584,83908.334092,102618.325832,...,48754.01436,49155.496428,86376.33706,34032.882585,25772.376175,55128.9085,327714.260242,259875.009434,27914.059034,17821.168983
5,2021-11-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2021-11-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,2021-12-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,2021-12-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
