## 모델생성

- 사용할 분류모델 종류
    - Logistic Regression
    - Support Vector Machine (SVM)
    - Random Forest
    - Gradient Boosting Tree (GBT)-
    - AdaBoost
    - LightBoost
    - XGBoost
    - ExtraTreesClassifier

In [None]:
# load data
import pandas as pd
data=pd.read_csv('../data/preprocessed_data/weighted_data.csv', encoding='cp949', index_col=0)
target=pd.read_csv('../data/preprocessed_data/y_data.csv', encoding='cp949', index_col=0)

In [None]:
# import models

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier

models_li = [LogisticRegression(solver='saga'), SVC(), RandomForestClassifier(), GradientBoostingClassifier(), AdaBoostClassifier(), ExtraTreesClassifier(), XGBClassifier()]

In [None]:
import numpy as np

In [None]:
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
from sklearn.preprocessing import StandardScaler
ssc = StandardScaler()
data_ssc=ssc.fit_transform(data)

In [None]:
from sklearn.model_selection import cross_val_score, train_test_split, RepeatedStratifiedKFold
rskfold = RepeatedStratifiedKFold(n_splits=3, n_repeats=5)

score_total=list()
for i in range(10):
    temp_list=list()
    for model in models_li:
        result = cross_val_score(model,
                                 data_ssc,
                                 target[f'{i+1}일 뒤 종가'],
                                 cv=rskfold)
        print(str(model).split('(')[0])
        print(f'{i+1}일 뒤 종가 : {round(np.mean(result),3)} \n')
        temp_list.append(round(np.mean(result),3))
    score_total.append(temp_list)

In [None]:
# plot 
import matplotlib.pyplot as plt
plt.rc('font',family='AppleGothic')
df=pd.DataFrame(score_total,columns=[str(i).split('(')[0] for i in models_li],index=[f'{i}일 뒤 종가 예측' for i in range(1,11)])
df.plot(kind='bar',stacked=True,figsize=(14,7))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
df.plot(figsize=(14,7),linewidth=10,alpha=0.9,linestyle='-')
plt.xticks(rotation=15,fontsize=15)
plt.title('모델별 일자별 종가 예측 결과값',fontsize=20)
plt.legend(fontsize=10,loc=2)
plt.tight_layout()
plt.show()