In [80]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score # 정확도 함수
from catboost import CatBoostClassifier
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
import csv
from sklearn.metrics import roc_auc_score
from datetime import datetime
import sys
import os


In [81]:
tier = 'GRANDMASTER'
min = '10'
df = pd.read_csv(f'../Dataset/perMinuteDataset/{min}min/{tier}.csv')

In [82]:
def mergeCol(win_df, lose_df, colName):
    win_df[colName] = (win_df[f'{colName}_ad']+win_df[f'{colName}_top']+win_df[f'{colName}_jgl']+win_df[f'{colName}_mid']+win_df[f'{colName}_sup'])
    lose_df[colName] = (lose_df[f'{colName}_ad']+lose_df[f'{colName}_top']+lose_df[f'{colName}_jgl']+lose_df[f'{colName}_mid']+lose_df[f'{colName}_sup'])
    win_df= win_df.drop([f'{colName}_ad', f'{colName}_top', f'{colName}_jgl', f'{colName}_mid', f'{colName}_sup'],axis=1)
    lose_df= lose_df.drop([f'{colName}_ad', f'{colName}_top', f'{colName}_jgl', f'{colName}_mid', f'{colName}_sup'],axis=1)
    return win_df, lose_df

def mulMinus1(win_df, lose_df, colName):
    win_df[f'Diff_{colName}'] = win_df[colName]-lose_df[colName]
    lose_df[f'Diff_{colName}'] = win_df[f'Diff_{colName}'] * -1
    win_df = win_df.drop([colName], axis=1)
    lose_df = lose_df.drop([colName], axis=1)
    return win_df, lose_df

def remove_outlier(input_data):
    q1 = input_data.quantile(0.25) # 제 1사분위수
    q3 = input_data.quantile(0.75) # 제 3사분위수
    iqr = q3 - q1 # IQR(Interquartile range) 계산
    minimum = q1 - (iqr * 1.5) # IQR 최솟값
    maximum = q3 + (iqr * 1.5) # IQR 최댓값
    # IQR 범위 내에 있는 데이터만 산출(IQR 범위 밖의 데이터는 이상치)
    df_removed_outlier = input_data[(minimum < input_data) & (input_data < maximum)]
    return df_removed_outlier

In [83]:
win_df = df[['Diff_FirstBLOOD', 'Diff_FirstDRAGON',
        'dragonType', 'WIN_controlWARDPlaced', 'WIN_WARDplaced',
        'WIN_Kill_top', 'WIN_Kill_jgl', 'WIN_Kill_mid', 'WIN_Kill_ad', 'WIN_Kill_sup',
        'WIN_Death_top', 'WIN_Death_jgl', 'WIN_Death_mid', 'WIN_Death_ad', 'WIN_Death_sup', 
        'WIN_Asisst_top', 'WIN_Asisst_jgl', 'WIN_Asisst_mid', 'WIN_Asisst_ad', 'WIN_Asisst_sup',
        'WIN_LV_top', 'WIN_LV_jgl', 'WIN_LV_mid', 'WIN_LV_ad', 'WIN_LV_sup',
        'WIN_CS_top', 'WIN_CS_jgl', 'WIN_CS_mid', 'WIN_CS_ad', 'WIN_CS_sup',
        'WIN_jglCS_top', 'WIN_jglCS_jgl', 'WIN_jglCS_mid', 'WIN_jglCS_ad', 'WIN_jglCS_sup',
        'WIN_WARDkill']]
lose_df = df[['Diff_FirstBLOOD', 'Diff_FirstDRAGON',
        'dragonType', 'LOSE_controlWARDPlaced', 'LOSE_WARDplaced',
        'LOSE_Kill_top', 'LOSE_Kill_jgl', 'LOSE_Kill_mid', 'LOSE_Kill_ad', 'LOSE_Kill_sup',
        'LOSE_Death_top', 'LOSE_Death_jgl', 'LOSE_Death_mid', 'LOSE_Death_ad', 'LOSE_Death_sup',
        'LOSE_Asisst_top', 'LOSE_Asisst_jgl', 'LOSE_Asisst_mid', 'LOSE_Asisst_ad', 'LOSE_Asisst_sup',
        'LOSE_LV_top', 'LOSE_LV_jgl', 'LOSE_LV_mid', 'LOSE_LV_ad', 'LOSE_LV_sup',
        'LOSE_CS_top', 'LOSE_CS_jgl', 'LOSE_CS_mid', 'LOSE_CS_ad', 'LOSE_CS_sup',
        'LOSE_jglCS_top', 'LOSE_jglCS_jgl', 'LOSE_jglCS_mid', 'LOSE_jglCS_ad', 'LOSE_jglCS_sup',
        'LOSE_WARDkill']]

In [84]:
colName = 'WIN'
win_df = win_df.rename(columns={f'{colName}_controlWARDPlaced': 'controlWARDPlaced', 
                                                f'{colName}_Kill_top': 'Kill_top',f'{colName}_Kill_jgl': 'Kill_jgl',f'{colName}_Kill_mid': 'Kill_mid',f'{colName}_Kill_ad': 'Kill_ad', f'{colName}_Kill_sup': 'Kill_sup',
                                                f'{colName}_Death_top': 'Death_top',f'{colName}_Death_jgl': 'Death_jgl',f'{colName}_Death_mid': 'Death_mid',f'{colName}_Death_ad': 'Death_ad',f'{colName}_Death_sup': 'Death_sup',
                                                f'{colName}_Asisst_top': 'Asisst_top',f'{colName}_Asisst_jgl': 'Asisst_jgl',f'{colName}_Asisst_mid': 'Asisst_mid',f'{colName}_Asisst_ad': 'Asisst_ad',f'{colName}_Asisst_sup': 'Asisst_sup',
                                                f'{colName}_LV_top': 'LV_top',f'{colName}_LV_jgl': 'LV_jgl',f'{colName}_LV_mid': 'LV_mid',f'{colName}_LV_ad': 'LV_ad',f'{colName}_LV_sup': 'LV_sup',
                                                f'{colName}_CS_top': 'CS_top',f'{colName}_CS_jgl': 'CS_jgl',f'{colName}_CS_mid': 'CS_mid',f'{colName}_CS_ad': 'CS_ad',f'{colName}_CS_sup': 'CS_sup',
                                                f'{colName}_jglCS_top': 'jglCS_top',f'{colName}_jglCS_jgl': 'jglCS_jgl',f'{colName}_jglCS_mid': 'jglCS_mid',f'{colName}_jglCS_ad': 'jglCS_ad',f'{colName}_jglCS_sup': 'jglCS_sup',
                                                f'{colName}_WARDplaced': 'WARDplaced', f'{colName}_WARDkill': 'WARDkill'})

colName = 'LOSE'
lose_df = lose_df.rename(columns={f'{colName}_controlWARDPlaced': 'controlWARDPlaced',
                                                f'{colName}_Kill_top': 'Kill_top',f'{colName}_Kill_jgl': 'Kill_jgl',f'{colName}_Kill_mid': 'Kill_mid',f'{colName}_Kill_ad': 'Kill_ad', f'{colName}_Kill_sup': 'Kill_sup',
                                                f'{colName}_Death_top': 'Death_top',f'{colName}_Death_jgl': 'Death_jgl',f'{colName}_Death_mid': 'Death_mid',f'{colName}_Death_ad': 'Death_ad',f'{colName}_Death_sup': 'Death_sup',
                                                f'{colName}_Asisst_top': 'Asisst_top',f'{colName}_Asisst_jgl': 'Asisst_jgl',f'{colName}_Asisst_mid': 'Asisst_mid',f'{colName}_Asisst_ad': 'Asisst_ad',f'{colName}_Asisst_sup': 'Asisst_sup',
                                                f'{colName}_LV_top': 'LV_top',f'{colName}_LV_jgl': 'LV_jgl',f'{colName}_LV_mid': 'LV_mid',f'{colName}_LV_ad': 'LV_ad',f'{colName}_LV_sup': 'LV_sup',
                                                f'{colName}_CS_top': 'CS_top',f'{colName}_CS_jgl': 'CS_jgl',f'{colName}_CS_mid': 'CS_mid',f'{colName}_CS_ad': 'CS_ad',f'{colName}_CS_sup': 'CS_sup',
                                                f'{colName}_jglCS_top': 'jglCS_top',f'{colName}_jglCS_jgl': 'jglCS_jgl',f'{colName}_jglCS_mid': 'jglCS_mid',f'{colName}_jglCS_ad': 'jglCS_ad',f'{colName}_jglCS_sup': 'jglCS_sup',
                                                f'{colName}_WARDplaced': 'WARDplaced', f'{colName}_WARDkill': 'WARDkill'})

In [85]:
win_df, lose_df = mergeCol(win_df, lose_df, 'Kill')
win_df, lose_df = mergeCol(win_df, lose_df, 'Asisst')
win_df, lose_df = mergeCol(win_df, lose_df, 'LV')
win_df, lose_df = mergeCol(win_df, lose_df, 'CS')
win_df, lose_df = mergeCol(win_df, lose_df, 'Death')
win_df, lose_df = mergeCol(win_df, lose_df, 'jglCS')

win_df, lose_df = mulMinus1(win_df, lose_df, 'Kill')
win_df, lose_df = mulMinus1(win_df, lose_df, 'Asisst')
win_df, lose_df = mulMinus1(win_df, lose_df, 'LV')
win_df, lose_df = mulMinus1(win_df, lose_df, 'CS')
win_df, lose_df = mulMinus1(win_df, lose_df, 'Death')
win_df, lose_df = mulMinus1(win_df, lose_df, 'WARDplaced')
win_df, lose_df = mulMinus1(win_df, lose_df, 'WARDkill')
win_df, lose_df = mulMinus1(win_df, lose_df, 'controlWARDPlaced')
win_df, lose_df = mulMinus1(win_df, lose_df, 'jglCS')

lose_df['Diff_FirstBLOOD'] = win_df['Diff_FirstBLOOD']*-1
lose_df['Diff_FirstDRAGON'] = win_df['Diff_FirstDRAGON']*-1

In [86]:
win_prep = remove_outlier(win_df)
win_prep.dropna(axis = 0, how = 'any', inplace = True)
lose_prep = remove_outlier(lose_df)
lose_prep.dropna(axis = 0, how = 'any', inplace = True)

In [87]:
win_prep['FirstDragon_AIR_DRAGON'] = np.where((win_prep['Diff_FirstDRAGON'] == 1) & (win_prep['dragonType'] == 1), 1, 0)
win_prep['FirstDragon_EARTH_DRAGON'] = np.where((win_prep['Diff_FirstDRAGON'] == 1) & (win_prep['dragonType'] == 2), 1, 0)
win_prep['FirstDragon_FIRE_DRAGON'] = np.where((win_prep['Diff_FirstDRAGON'] == 1) & (win_prep['dragonType'] == 3), 1, 0)
win_prep['FirstDragon_WATER_DRAGON'] = np.where((win_prep['Diff_FirstDRAGON'] == 1) & (win_prep['dragonType'] == 4), 1, 0)
win_prep['FirstDragon_HEXTECH_DRAGON'] = np.where((win_prep['Diff_FirstDRAGON'] == 1) & (win_prep['dragonType'] == 5), 1, 0)
win_prep['FirstDragon_CHEMTECH_DRAGON'] = np.where((win_prep['Diff_FirstDRAGON'] == 1) & (win_prep['dragonType'] == 6), 1, 0)
lose_prep['FirstDragon_AIR_DRAGON'] = np.where((lose_prep['Diff_FirstDRAGON'] == 1) & (lose_prep['dragonType'] == 1), 1, 0)
lose_prep['FirstDragon_EARTH_DRAGON'] = np.where((lose_prep['Diff_FirstDRAGON'] == 1) & (lose_prep['dragonType'] == 2), 1, 0)
lose_prep['FirstDragon_FIRE_DRAGON'] = np.where((lose_prep['Diff_FirstDRAGON'] == 1) & (lose_prep['dragonType'] == 3), 1, 0)
lose_prep['FirstDragon_WATER_DRAGON'] = np.where((lose_prep['Diff_FirstDRAGON'] == 1) & (lose_prep['dragonType'] == 4), 1, 0)
lose_prep['FirstDragon_HEXTECH_DRAGON'] = np.where((lose_prep['Diff_FirstDRAGON'] == 1) & (lose_prep['dragonType'] == 5), 1, 0)
lose_prep['FirstDragon_CHEMTECH_DRAGON'] = np.where((lose_prep['Diff_FirstDRAGON'] == 1) & (lose_prep['dragonType'] == 6), 1, 0)

win_prep['result'] = 1
lose_prep['result'] = 0

In [88]:
data = pd.concat([win_prep, lose_prep], axis=0)
data = data.drop(['dragonType'], axis=1)
print('Total data size of ' + tier + ' =', data.shape[0])

Total data size of GRANDMASTER = 17422


In [97]:
data.columns

Index(['Diff_FirstBLOOD', 'Diff_FirstDRAGON', 'Diff_Kill', 'Diff_Asisst',
       'Diff_LV', 'Diff_CS', 'Diff_Death', 'Diff_WARDplaced', 'Diff_WARDkill',
       'Diff_controlWARDPlaced', 'Diff_jglCS', 'FirstDragon_AIR_DRAGON',
       'FirstDragon_EARTH_DRAGON', 'FirstDragon_FIRE_DRAGON',
       'FirstDragon_WATER_DRAGON', 'FirstDragon_HEXTECH_DRAGON',
       'FirstDragon_CHEMTECH_DRAGON', 'result'],
      dtype='object')

In [89]:
rf = RandomForestClassifier(max_features='sqrt', max_leaf_nodes=200, random_state=10)
    # 라이트지비엠
lgbm = LGBMClassifier(n_estimators=100, max_depth=12, num_leaves=25, verbosity=0, min_child_samples=30, random_state=10)
# 캣부스트
cat = CatBoostClassifier(iterations=200, depth=7, learning_rate=0.1, l2_leaf_reg=40, verbose=0, random_state=10)
# 엑스트라트리
et = ExtraTreesClassifier(max_depth=7, max_features=None, random_state=10)
# rf = RandomForestClassifier()
# # 라이트지비엠
# lgbm = LGBMClassifier(verbosity=0)
# # 캣부스트
# cat = CatBoostClassifier(verbose=0)
# # 엑스트라트리
# et = ExtraTreesClassifier()

In [90]:
colCnt = data.shape[1]
print(colCnt)
X = data[data.columns.difference(['result'])]
y = data['result']

18


##### 일정 기준 이상의 상관계수 column 추출

In [91]:
# correlation_matrix = data.corr().round(2)
# Pass_Fail_corr = correlation_matrix['result']
# PFC=pd.DataFrame(Pass_Fail_corr)

# PFC_R = PFC.loc[abs(PFC.result) >= 0.05]
# print(PFC_R)

# corrData = data[PFC_R.index]
# data = corrData
# colCnt = data.shape[1]
# print(colCnt)
# X = data.iloc[:, :colCnt-1]
# y = data.iloc[:, colCnt-1:]

##### 히트맵 그리기

In [92]:
# df = data
# columns = np.array(df.columns)
# df_small = df[columns]
# df_corr = df_small.corr()
# plt.figure(figsize=(50, 50))
# sns.heatmap(df_corr, annot=True, fmt=".2f", cmap="Blues")

##### Kfold

In [93]:
# column_names = ['Diff_FirstBLOOD', 'Diff_FirstDRAGON', 'Diff_FirstHERALD',
#        'Diff_Firsttower', 'dragonType', 'invadeKill', 'invadeDeath',
#        'controlWARDPlaced', 'Kill_top', 'Kill_jgl', 'Kill_mid', 'Kill_ad',
#        'Kill_sup', 'Death_top', 'Death_jgl', 'Death_mid', 'Death_ad',
#        'Death_sup', 'Assist_top', 'Assist_jgl', 'Assist_mid', 'Assist_ad',
#        'Assist_sup', 'LV_top', 'LV_jgl', colName, colName, 'LV_sup', 'CS_top',
#        'CS_jgl', 'CS_mid', 'CS_ad', 'CS_sup', 'jglCS_top', 'jglCS_jgl',
#        'jglCS_mid', 'jglCS_ad', 'jglCS_sup', 'GOLD_top', 'GOLD_jgl',
#        'GOLD_mid', 'GOLD_ad', 'GOLD_sup', 'WARDkill', 'Inhibitor', 'TOWERkill',
#        'WARDplaced']
# model = rf
# kf = KFold(n_splits=5, shuffle=True, random_state=42)
# for train_index, test_index in kf.split(X):
#     X_train, X_test = data.iloc[train_index][column_names], data.iloc[test_index][column_names]
#     y_train, y_test = data.iloc[train_index]['result'], data.iloc[test_index]['result']
#     model.fit(X_train, y_train)
#     pre = model.predict(X_test)

#     # score = accuracy_score(y_test, pre)
#     print('accuracy_score : ', accuracy_score(y_test, pre))
#     print('f1_score : ', f1_score(y_test, pre))
#     tn, fp, fn, tp = confusion_matrix(y_test, pre).ravel()
#     print('tn:', tn, ' fp:', fp, ' fn:', fn, ' tp:', tp)
#     print()
    

# # scores = cross_val_score(model, data[column_names], data['result'], cv=kf)
# # mean_score = np.mean(scores)
# # print(f'평균 평가 점수: {mean_score}')

In [94]:
print(f'{tier}의 {min}분 데이터')
modelList = [rf, lgbm, cat, et]
# modelList = [knn, svc, nusvc, dct]
modelNameList = ["RandomForest", "LightGBM", "CatBoost", "ExtraTree"]
# modelNameList = ["KNN", "SVC", "NuSVC", "DecisionTree"]
fieldnames = ["Model", "accuracy_score", "F1_score", "TN", "FP", "FN", "TP"]
for i in range(4):
    model = modelList[i]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=10) # 학습데이터와 평가데이터의 비율을 8:2 로 분할|
    y_train = y_train.values.ravel()
    model.fit(X_train, y_train)
    pre_test = model.predict(X_test)
    pre_train = model.predict(X_train)
    tn, fp, fn, tp = confusion_matrix(y_test, pre_test).ravel()
    if model == cat:
        printModel = f"CatBoost{model.get_params()}"
    else:
        printModel = model
    print(f"{modelNameList[i]} Train Accuracy : ", round(accuracy_score(y_train, pre_train)*100, 2), "%")
    print(f"{modelNameList[i]} Test Accuracy : ", round(accuracy_score(y_test, pre_test)*100, 2), "%")
    # print(f"{modelNameList[i]} F1_score :", round(f1_score(y_test, pre_test)*100, 2), "%")
    # print(f"{modelNameList[i]} ROC_AUC :", round(roc_auc_score(y_test, pre_test)*100, 2), "%")
    # print('tn:', tn, ' fp:', fp, ' fn:', fn, ' tp:', tp)

    # 과적합 확인
    # scores = cross_val_score(model, X, y, cv=3)
    # print("교차 검증 평균 정확도:", scores.mean())

    # 결과 csv로 저장
    fieldnames = ["Tier", "Min","TIME", "Model", "Train_Accuracy", "Test_Accuracy", "F1_score", "ROC_AUC", "TN", "FP", "FN", "TP", "Note"]
    result = {"Tier": tier,
              "Min": min,
              "TIME": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
              "Model": printModel, 
              "Train_Accuracy" : round(accuracy_score(y_train, pre_train)*100, 2),
              "Test_Accuracy" : round(accuracy_score(y_test, pre_test)*100, 2), 
              "F1_score" : round(f1_score(y_test, pre_test)*100, 2),
              "ROC_AUC" : round(roc_auc_score(y_test, pre_test)*100, 2),
              "TN": tn,
              "FP": fp,
              "FN": fn,
              "TP": tp,
              "Note": 'solve ExtraTrees overfittng'}
    with open(f'../Dataset/perMinuteDataset/result/currentAccuracy/{tier}.csv', 'a', newline='') as f:
        w = csv.DictWriter(f, fieldnames=fieldnames)
        if i == 0:
            w.writeheader()
        w.writerow(result)

GRANDMASTER의 10분 데이터


RandomForest Train Accuracy :  79.71 %
RandomForest Test Accuracy :  74.38 %
LightGBM Train Accuracy :  78.68 %
LightGBM Test Accuracy :  74.06 %
CatBoost Train Accuracy :  77.28 %
CatBoost Test Accuracy :  74.18 %
ExtraTree Train Accuracy :  74.66 %
ExtraTree Test Accuracy :  73.74 %


##### GridSearch

In [95]:
# from sklearn.model_selection import GridSearchCV
# params = {'max_leaf_nodes':[80, 100, 120, 150]
#             }
# lgbm_clf = RandomForestClassifier(max_features='sqrt')
# grid_cv = GridSearchCV(lgbm_clf, param_grid = params, cv = 3, n_jobs = -1)
# grid_cv.fit(X_train, y_train)

# print('최적 하이퍼 파라미터: ', grid_cv.best_params_)
# print('최고 예측 정확도: {:.4f}'.format(grid_cv.best_score_))

# estimator = grid_cv.best_estimator_
# pred = estimator.predict(X_test)
# print('테스트 데이터 세트 정확도: {0:.4f}'.format(accuracy_score(y_test,pred)))

##### 청파소나타 게임 10판으로 예측

In [96]:
# 실제 결과 = 승승패패패승패패패
# cps = pd.read_csv('cps.csv')
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=10) # 학습데이터와 평가데이터의 비율을 8:2 로 분할|
# cat.fit(X_train, y_train)
# cat.predict(cps)