In [2]:
!pip install finance-datareader

Collecting finance-datareader
  Downloading finance_datareader-0.9.31-py3-none-any.whl (17 kB)
Collecting requests-file
  Downloading requests_file-1.5.1-py2.py3-none-any.whl (3.7 kB)
Installing collected packages: requests-file, finance-datareader
Successfully installed finance-datareader-0.9.31 requests-file-1.5.1


In [65]:
import FinanceDataReader as fdr
from tqdm import tqdm
import pandas as pd

In [6]:
!pip install pycaret

Collecting pycaret
  Downloading pycaret-2.3.5-py3-none-any.whl (288 kB)
[K     |████████████████████████████████| 288 kB 5.3 MB/s 
Collecting mlxtend>=0.17.0
  Downloading mlxtend-0.19.0-py2.py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 42.3 MB/s 
Collecting Boruta
  Downloading Boruta-0.3-py3-none-any.whl (56 kB)
[K     |████████████████████████████████| 56 kB 4.5 MB/s 
[?25hCollecting mlflow
  Downloading mlflow-1.21.0-py3-none-any.whl (16.9 MB)
[K     |████████████████████████████████| 16.9 MB 215 kB/s 
[?25hCollecting pyLDAvis
  Downloading pyLDAvis-3.3.1.tar.gz (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 36.5 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting scikit-learn==0.23.2
  Downloading scikit_learn-0.23.2-cp37-cp37m-manylinux1_x86_64.whl

In [7]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.0.3-cp37-none-manylinux1_x86_64.whl (76.3 MB)
[K     |████████████████████████████████| 76.3 MB 1.4 MB/s 
Installing collected packages: catboost
Successfully installed catboost-1.0.3


# Pycaret 이용

## 모든 종목 예측

* 종가와 상관계수 높은 특성 : KOSDAQ(0.87), KOSPI(0.79), DOW(0.58), NASDAQ(0.50), S&P500(0.56)
* 예측 순서(종가와 상관계수 높은 특성 먼저 예측)
    * weekday, weeknumdm로 KOSDAQ 예측(종가와 상관계수 0.87)
    * weekday, weeknum, KOSDAQ으로 NASDAQ 예측(KOSDAQ과 상관계수 0.68)
    * weekday, weeknum, KOSDAQ, NASDAQ으로 S&P500 예측(NASDAQ과 상관계수 0.93)
    * weekday, weeknum, KOSDAQ, NASDAQ, S&P500으로 DOW 예측(S&P500과 상관계수 0.95)
    * weekday, weeknum, KOSDAQ, NASDAQ, S&P500, DOW로 KOSPI 예측
    * weekday, weeknum, KOSDAQ, NASDAQ, S&P500, DOW, KOSPI로 다른 종가 예측 

In [4]:
# 제출 점수 :
# 자체 평가 점수 : 140.8549

In [1]:
from pycaret.regression import *

# 데이터 불러오기(첫번째 주)
* 훈련 : 10.04 ~ 10.29
* 예측 : 11.01 ~ 11.05

In [None]:
start_date = '2021-10-04'
end_date = '2021-10-29'

In [None]:
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

### train set

In [29]:
all_train1 = pd.read_csv('20211004_29_all_train.csv')

In [30]:
# 20일 * 370종목 = 7400
all_train1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7400 entries, 0 to 7399
Data columns (total 10 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Date     7400 non-null   object 
 1   weekday  7400 non-null   int64  
 2   weeknum  7400 non-null   int64  
 3   kosdaq   7400 non-null   float64
 4   nasdaq   7400 non-null   float64
 5   sp500    7400 non-null   float64
 6   dow      7400 non-null   float64
 7   kospi    7400 non-null   float64
 8   code     7400 non-null   int64  
 9   Close    6660 non-null   float64
dtypes: float64(6), int64(3), object(1)
memory usage: 578.2+ KB


In [31]:
all_train1.head()

Unnamed: 0,Date,weekday,weeknum,kosdaq,nasdaq,sp500,dow,kospi,code,Close
0,2021-10-04,0,40,969.285,14255.5,4300.46,34003.58,2990.675,5930,
1,2021-10-05,1,40,955.37,14433.8,4345.72,34315.99,2962.17,5930,72200.0
2,2021-10-06,2,40,922.36,14501.9,4363.55,34417.98,2908.31,5930,71300.0
3,2021-10-07,3,40,953.43,14654.0,4399.76,34754.15,2959.46,5930,71600.0
4,2021-10-08,4,40,953.11,14579.5,4391.36,34746.71,2956.3,5930,71500.0


In [32]:
# 종목 코드 6자리로 맞추기
all_train1['code'] = all_train1['code'].astype(str).str.zfill(6)

In [33]:
all_train1.head()

Unnamed: 0,Date,weekday,weeknum,kosdaq,nasdaq,sp500,dow,kospi,code,Close
0,2021-10-04,0,40,969.285,14255.5,4300.46,34003.58,2990.675,5930,
1,2021-10-05,1,40,955.37,14433.8,4345.72,34315.99,2962.17,5930,72200.0
2,2021-10-06,2,40,922.36,14501.9,4363.55,34417.98,2908.31,5930,71300.0
3,2021-10-07,3,40,953.43,14654.0,4399.76,34754.15,2959.46,5930,71600.0
4,2021-10-08,4,40,953.11,14579.5,4391.36,34746.71,2956.3,5930,71500.0


### test set

In [34]:
all_test1 = pd.read_csv('20210104_1029_pred_1101_05_all_test.csv')

In [35]:
# 5일 * 370 종목 = 1850
all_test1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1850 entries, 0 to 1849
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  1850 non-null   int64  
 1   Date        1850 non-null   object 
 2   weekday     1850 non-null   int64  
 3   weeknum     1850 non-null   int64  
 4   kosdaq      1850 non-null   float64
 5   nasdaq      1850 non-null   float64
 6   sp500       1850 non-null   float64
 7   dow         1850 non-null   float64
 8   kospi       1850 non-null   float64
 9   code        1850 non-null   int64  
dtypes: float64(5), int64(4), object(1)
memory usage: 144.7+ KB


In [36]:
all_test1.head()

Unnamed: 0.1,Unnamed: 0,Date,weekday,weeknum,kosdaq,nasdaq,sp500,dow,kospi,code
0,0,2021-11-01,0,44,972.0994,14971.576678,4483.7295,35186.918,2974.401,5930
1,1,2021-11-02,1,44,972.51996,14975.816364,4484.8213,35193.47,2974.6333,5930
2,2,2021-11-03,2,44,971.2291,14958.857623,4480.517,35167.76,2972.5112,5930
3,3,2021-11-04,3,44,973.40875,14980.056049,4485.9893,35200.984,2977.359,5930
4,4,2021-11-05,4,44,973.42645,14980.056049,4486.3213,35205.234,2974.0273,5930


In [37]:
all_test1['code'] = all_test1['code'].astype(str).str.zfill(6)

In [38]:
all_test1.head()

Unnamed: 0.1,Unnamed: 0,Date,weekday,weeknum,kosdaq,nasdaq,sp500,dow,kospi,code
0,0,2021-11-01,0,44,972.0994,14971.576678,4483.7295,35186.918,2974.401,5930
1,1,2021-11-02,1,44,972.51996,14975.816364,4484.8213,35193.47,2974.6333,5930
2,2,2021-11-03,2,44,971.2291,14958.857623,4480.517,35167.76,2972.5112,5930
3,3,2021-11-04,3,44,973.40875,14980.056049,4485.9893,35200.984,2977.359,5930
4,4,2021-11-05,4,44,973.42645,14980.056049,4486.3213,35205.234,2974.0273,5930


## 8가지 특성으로 종목별 첫번째 주 종가 예측

In [39]:
model1=setup(
    all_train1, target = 'Close',ignore_features=['weeknum'], silent=False,
    categorical_features=['code'], fold=4, fold_shuffle=True, use_gpu=True
    )

tuned_model1 = tune_model(
    compare_models(sort='MAPE', n_select=1, exclude=['knn','huber','llar','omp','par'])
)

final_model1 = finalize_model(tuned_model1)

pred1 = predict_model(final_model1, all_test1.drop('weeknum',axis=1))

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,49630.8086,6895561000.0,83039.5121,0.6357,1.0756,1.5094
1,55259.9513,9205546000.0,95945.5374,0.5054,1.0686,1.2758
2,50725.6417,7689041000.0,87687.175,0.6572,1.0553,1.3737
3,51853.9754,7930925000.0,89055.7431,0.5879,1.0561,1.3171
Mean,51867.5943,7930268000.0,88931.9919,0.5965,1.0639,1.369
SD,2110.4228,829931500.0,4622.6622,0.0583,0.0086,0.0882


In [40]:
save_model(final_model1, 'pred_model1')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=['code'],
                                       display_types=True,
                                       features_todrop=['weeknum'],
                                       id_columns=[], ml_usecase='regression',
                                       numerical_features=[], target='Close',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 num...
                  RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                        criterion='mae', max_depth=10,
                                        max_features=1.0, max_leaf_nodes=None,
                                        max_samples=N

In [41]:
pred1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1850 entries, 0 to 1849
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  1850 non-null   int64  
 1   Date        1850 non-null   object 
 2   weekday     1850 non-null   int64  
 3   kosdaq      1850 non-null   float64
 4   nasdaq      1850 non-null   float64
 5   sp500       1850 non-null   float64
 6   dow         1850 non-null   float64
 7   kospi       1850 non-null   float64
 8   code        1850 non-null   object 
 9   Label       1850 non-null   float64
dtypes: float64(6), int64(2), object(2)
memory usage: 144.7+ KB


In [42]:
pred1.tail(20)

Unnamed: 0.1,Unnamed: 0,Date,weekday,kosdaq,nasdaq,sp500,dow,kospi,code,Label
1830,0,2021-11-01,0,972.0994,14971.576678,4483.7295,35186.918,2974.401,64260,43205.113636
1831,1,2021-11-02,1,972.51996,14975.816364,4484.8213,35193.47,2974.6333,64260,43095.2
1832,2,2021-11-03,2,971.2291,14958.857623,4480.517,35167.76,2972.5112,64260,43060.268182
1833,3,2021-11-04,3,973.40875,14980.056049,4485.9893,35200.984,2977.359,64260,43282.131818
1834,4,2021-11-05,4,973.42645,14980.056049,4486.3213,35205.234,2974.0273,64260,42908.545455
1835,0,2021-11-01,0,972.0994,14971.576678,4483.7295,35186.918,2974.401,287410,43205.113636
1836,1,2021-11-02,1,972.51996,14975.816364,4484.8213,35193.47,2974.6333,287410,43095.2
1837,2,2021-11-03,2,971.2291,14958.857623,4480.517,35167.76,2972.5112,287410,43060.268182
1838,3,2021-11-04,3,973.40875,14980.056049,4485.9893,35200.984,2977.359,287410,43282.131818
1839,4,2021-11-05,4,973.42645,14980.056049,4486.3213,35205.234,2974.0273,287410,42908.545455


# 데이터 불러오기(두번째 주)
* 훈련 : 10.25 ~ 11.19
* 예측 : 11.29 ~ 12.03

### train set2

In [46]:
all_train2 = pd.read_csv('20211025_1119_all_train.csv')

In [47]:
# 20일 * 370종목 = 7400
all_train2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7400 entries, 0 to 7399
Data columns (total 10 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Date     7400 non-null   object 
 1   weekday  7400 non-null   int64  
 2   weeknum  7400 non-null   int64  
 3   kosdaq   7400 non-null   float64
 4   nasdaq   7400 non-null   float64
 5   sp500    7400 non-null   float64
 6   dow      7400 non-null   float64
 7   kospi    7400 non-null   float64
 8   code     7400 non-null   int64  
 9   Close    7397 non-null   float64
dtypes: float64(6), int64(3), object(1)
memory usage: 578.2+ KB


In [48]:
all_train2.head()

Unnamed: 0,Date,weekday,weeknum,kosdaq,nasdaq,sp500,dow,kospi,code,Close
0,2021-10-25,0,43,994.31,15226.7,4566.48,35743.78,3020.54,5930,70200.0
1,2021-10-26,1,43,1011.76,15235.7,4574.79,35755.83,3049.08,5930,71100.0
2,2021-10-27,2,43,1008.95,15235.8,4551.68,35491.48,3025.49,5930,70100.0
3,2021-10-28,3,43,1000.13,15448.1,4596.42,35729.89,3009.55,5930,70700.0
4,2021-10-29,4,43,992.33,15498.4,4605.38,35819.59,2970.68,5930,69800.0


In [49]:
all_train2['code'] = all_train2['code'].astype(str).str.zfill(6)

In [50]:
all_train2.head()

Unnamed: 0,Date,weekday,weeknum,kosdaq,nasdaq,sp500,dow,kospi,code,Close
0,2021-10-25,0,43,994.31,15226.7,4566.48,35743.78,3020.54,5930,70200.0
1,2021-10-26,1,43,1011.76,15235.7,4574.79,35755.83,3049.08,5930,71100.0
2,2021-10-27,2,43,1008.95,15235.8,4551.68,35491.48,3025.49,5930,70100.0
3,2021-10-28,3,43,1000.13,15448.1,4596.42,35729.89,3009.55,5930,70700.0
4,2021-10-29,4,43,992.33,15498.4,4605.38,35819.59,2970.68,5930,69800.0


### test set2

In [51]:
all_test2 = pd.read_csv('20211025_1119_pred_1129_1203_all_test.csv')

In [52]:
# 5일 * 370종목 = 1850
all_test2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1850 entries, 0 to 1849
Data columns (total 9 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Date     1850 non-null   object 
 1   weekday  1850 non-null   int64  
 2   weeknum  1850 non-null   int64  
 3   kosdaq   1850 non-null   float64
 4   nasdaq   1850 non-null   float64
 5   sp500    1850 non-null   float64
 6   dow      1850 non-null   float64
 7   kospi    1850 non-null   float64
 8   code     1850 non-null   int64  
dtypes: float64(5), int64(3), object(1)
memory usage: 130.2+ KB


In [53]:
all_test2.head()

Unnamed: 0,Date,weekday,weeknum,kosdaq,nasdaq,sp500,dow,kospi,code
0,2021-11-29,0,48,994.300008,15516.016764,4613.382896,35954.46,2971.095891,5930
1,2021-11-30,1,48,1009.960002,15730.499891,4652.258954,36125.67,3003.252271,5930
2,2021-12-01,2,48,997.817456,15516.016764,4613.382896,35941.957,2971.845081,5930
3,2021-12-02,3,48,997.817456,15516.016764,4613.382896,35830.055,2977.736577,5930
4,2021-12-03,4,48,1000.700012,15516.016764,4613.382896,35812.945,2977.736577,5930


In [54]:
all_test2['code'] = all_test2['code'].astype(str).str.zfill(6)

In [55]:
all_test2.head()

Unnamed: 0,Date,weekday,weeknum,kosdaq,nasdaq,sp500,dow,kospi,code
0,2021-11-29,0,48,994.300008,15516.016764,4613.382896,35954.46,2971.095891,5930
1,2021-11-30,1,48,1009.960002,15730.499891,4652.258954,36125.67,3003.252271,5930
2,2021-12-01,2,48,997.817456,15516.016764,4613.382896,35941.957,2971.845081,5930
3,2021-12-02,3,48,997.817456,15516.016764,4613.382896,35830.055,2977.736577,5930
4,2021-12-03,4,48,1000.700012,15516.016764,4613.382896,35812.945,2977.736577,5930


### 2. 8가지 특성으로 종목별 두번째 주 종가 예측

In [56]:
model2 = setup(
    all_train2, target = 'Close',ignore_features=['weeknum'], silent=False,
     categorical_features=['code'], fold=4, fold_shuffle=True, use_gpu=True
     )

tuned_model2 = tune_model(
    compare_models(sort='MAPE', n_select=1, exclude=['knn','huber','llar','omp','par'])
)

final_model2 = finalize_model(tuned_model2)

pred2 = predict_model(final_model2, all_test2.drop('weeknum',axis=1))

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,56079.0564,6326679000.0,79540.4226,0.6438,1.2394,2.5563
1,56164.91,6926200000.0,83223.7967,0.6278,1.1823,2.2456
2,56282.0915,6962635000.0,83442.4034,0.6503,1.226,2.5019
3,55789.8165,6506909000.0,80665.4117,0.6738,1.2427,2.625
Mean,56078.9686,6680606000.0,81718.0086,0.6489,1.2226,2.4822
SD,181.8335,271703900.0,1665.1408,0.0165,0.0241,0.1434


In [57]:
save_model(final_model2, 'pred_model2')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=['code'],
                                       display_types=True,
                                       features_todrop=['weeknum'],
                                       id_columns=[], ml_usecase='regression',
                                       numerical_features=[], target='Close',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 num...
                  RandomForestRegressor(bootstrap=False, ccp_alpha=0.0,
                                        criterion='mse', max_depth=10,
                                        max_features=1.0, max_leaf_nodes=None,
                                        max_samples=

In [58]:
pred2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1850 entries, 0 to 1849
Data columns (total 9 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Date     1850 non-null   object 
 1   weekday  1850 non-null   int64  
 2   kosdaq   1850 non-null   float64
 3   nasdaq   1850 non-null   float64
 4   sp500    1850 non-null   float64
 5   dow      1850 non-null   float64
 6   kospi    1850 non-null   float64
 7   code     1850 non-null   object 
 8   Label    1850 non-null   float64
dtypes: float64(6), int64(1), object(2)
memory usage: 130.2+ KB


In [59]:
pred2.head(20)

Unnamed: 0,Date,weekday,kosdaq,nasdaq,sp500,dow,kospi,code,Label
0,2021-11-29,0,994.300008,15516.016764,4613.382896,35954.46,2971.095891,5930,88129.581694
1,2021-11-30,1,1009.960002,15730.499891,4652.258954,36125.67,3003.252271,5930,89028.107124
2,2021-12-01,2,997.817456,15516.016764,4613.382896,35941.957,2971.845081,5930,88143.734759
3,2021-12-02,3,997.817456,15516.016764,4613.382896,35830.055,2977.736577,5930,88106.85279
4,2021-12-03,4,1000.700012,15516.016764,4613.382896,35812.945,2977.736577,5930,88107.890193
5,2021-11-29,0,994.300008,15516.016764,4613.382896,35954.46,2971.095891,660,88129.581694
6,2021-11-30,1,1009.960002,15730.499891,4652.258954,36125.67,3003.252271,660,89028.107124
7,2021-12-01,2,997.817456,15516.016764,4613.382896,35941.957,2971.845081,660,88143.734759
8,2021-12-02,3,997.817456,15516.016764,4613.382896,35830.055,2977.736577,660,88106.85279
9,2021-12-03,4,1000.700012,15516.016764,4613.382896,35812.945,2977.736577,660,88107.890193


# 제출 파일 만들기

In [61]:
sub = pd.read_csv('sample_submission.csv')
sub

Unnamed: 0,Day,000060,000080,000100,000120,000150,000240,000250,000270,000660,000670,000720,000810,000880,000990,001230,001440,001450,001740,002380,002790,003000,003090,003380,003410,003490,003670,003800,004000,004020,004170,004370,004490,004800,004990,005250,005290,005300,005380,005385,...,272290,273130,278280,278530,282330,285130,287410,290510,290650,292150,293490,293780,294090,294870,298000,298020,298050,298380,299030,299660,299900,307950,314130,316140,319400,319660,321550,323990,326030,330590,330860,336260,336370,347860,348150,348210,352820,357780,363280,950130
0,2021-11-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2021-11-02,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2021-11-03,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2021-11-04,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,2021-11-05,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,2021-11-29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,2021-11-30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,2021-12-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,2021-12-02,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,2021-12-03,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [62]:
all_pred = pd.concat([pred1, pred2])

In [63]:
all_pred = all_pred.reset_index()
all_pred = all_pred.drop(['index','Unnamed: 0'], axis=1)
all_pred

Unnamed: 0,Date,weekday,kosdaq,nasdaq,sp500,dow,kospi,code,Label
0,2021-11-01,0,972.099400,14971.576678,4483.729500,35186.918,2974.401000,005930,43205.113636
1,2021-11-02,1,972.519960,14975.816364,4484.821300,35193.470,2974.633300,005930,43095.200000
2,2021-11-03,2,971.229100,14958.857623,4480.517000,35167.760,2972.511200,005930,43060.268182
3,2021-11-04,3,973.408750,14980.056049,4485.989300,35200.984,2977.359000,005930,43282.131818
4,2021-11-05,4,973.426450,14980.056049,4486.321300,35205.234,2974.027300,005930,42908.545455
...,...,...,...,...,...,...,...,...,...
3695,2021-11-29,0,994.300008,15516.016764,4613.382896,35954.460,2971.095891,099320,88129.581694
3696,2021-11-30,1,1009.960002,15730.499891,4652.258954,36125.670,3003.252271,099320,89028.107124
3697,2021-12-01,2,997.817456,15516.016764,4613.382896,35941.957,2971.845081,099320,88143.734759
3698,2021-12-02,3,997.817456,15516.016764,4613.382896,35830.055,2977.736577,099320,88106.852790


In [66]:
for code in tqdm(sub.columns.values):
  temp = all_pred[all_pred['code']==code]['Label']
  temp = temp.reset_index()
  sub[code] = temp['Label']

100%|██████████| 371/371 [00:00<00:00, 404.60it/s]


In [67]:
date1 = pd.DataFrame(pd.date_range(start='2021-11-01', end='2021-11-05'), columns=['Day'])
date1

Unnamed: 0,Day
0,2021-11-01
1,2021-11-02
2,2021-11-03
3,2021-11-04
4,2021-11-05


In [68]:
date2 = pd.DataFrame(pd.date_range(start='2021-11-29', end='2021-12-03'), columns=['Day'])
date2

Unnamed: 0,Day
0,2021-11-29
1,2021-11-30
2,2021-12-01
3,2021-12-02
4,2021-12-03


In [69]:
day = pd.concat([date1, date2])
day = day.reset_index()
day = day.drop('index', axis=1)
day

Unnamed: 0,Day
0,2021-11-01
1,2021-11-02
2,2021-11-03
3,2021-11-04
4,2021-11-05
5,2021-11-29
6,2021-11-30
7,2021-12-01
8,2021-12-02
9,2021-12-03


In [70]:
sub['Day'] = day['Day']
sub

Unnamed: 0,Day,000060,000080,000100,000120,000150,000240,000250,000270,000660,000670,000720,000810,000880,000990,001230,001440,001450,001740,002380,002790,003000,003090,003380,003410,003490,003670,003800,004000,004020,004170,004370,004490,004800,004990,005250,005290,005300,005380,005385,...,272290,273130,278280,278530,282330,285130,287410,290510,290650,292150,293490,293780,294090,294870,298000,298020,298050,298380,299030,299660,299900,307950,314130,316140,319400,319660,321550,323990,326030,330590,330860,336260,336370,347860,348150,348210,352820,357780,363280,950130
0,2021-11-01,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,...,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636,43205.113636
1,2021-11-02,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,...,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2,43095.2
2,2021-11-03,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,...,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182,43060.268182
3,2021-11-04,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,...,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818,43282.131818
4,2021-11-05,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,...,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455,42908.545455
5,2021-11-29,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,...,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694,88129.581694
6,2021-11-30,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,...,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124,89028.107124
7,2021-12-01,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,...,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759,88143.734759
8,2021-12-02,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,...,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279,88106.85279
9,2021-12-03,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,...,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193,88107.890193


In [71]:
sub.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Columns: 371 entries, Day to 950130
dtypes: datetime64[ns](1), float64(370)
memory usage: 29.1 KB


In [72]:
sub.to_csv('No_sub03_pycaret02_2.csv', index=False)