In [66]:
from imblearn.over_sampling import SMOTE
import numpy as np
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split, KFold, cross_val_score
import pandas as pd

### load csv data

In [67]:
X_train = np.loadtxt('../data/csvs/csv_3/train_x_mv3_22_3.csv', delimiter=',', skiprows=1)
y_train = np.loadtxt('../data/csvs/csv_3/train_y_mv3_22_3.csv', delimiter=',', skiprows=1)
X_test = np.loadtxt('../data/csvs/csv_3/test_x_mv3_22_3.csv', delimiter=',', skiprows=1)
y_test = np.loadtxt('../data/csvs/csv_3/test_y_mv3_22_3.csv', delimiter=',', skiprows=1)

### keypoint noralization

In [68]:
X_train = np.array(X_train)
X_train[:, ::2] /= 1280
X_train[:, 1::2] /= 720
X_train

array([[0.45546875, 0.63055556, 0.46328125, ..., 0.18333333, 0.        ,
        0.        ],
       [0.41640625, 0.50416667, 0.43125   , ..., 0.        , 0.        ,
        0.        ],
       [0.35078125, 0.50694444, 0.36484375, ..., 0.03472222, 0.        ,
        0.        ],
       ...,
       [0.48359375, 0.55416667, 0.51640625, ..., 0.        , 0.26796875,
        0.56111111],
       [0.47421875, 0.44444444, 0.51484375, ..., 0.45416667, 0.215625  ,
        0.39166667],
       [0.4875    , 0.52638889, 0.52421875, ..., 0.65277778, 0.21875   ,
        0.58472222]])

In [69]:
X_test = np.array(X_test)
X_test[:, ::2] /= 1280
X_test[:, 1::2] /= 720


### over sampling using SMOTE

In [70]:
smote = SMOTE(random_state=11)
X_train_over, y_train_over = smote.fit_resample(X_train, y_train)

In [32]:
sample = np.concatenate((X_train_over, y_train_over), axis=1)
sample = pd.DataFrame(sample)
sample.to_csv("train_sampling.csv", index=False)

### split train, validation data

In [71]:
X_train, X_val, y_train, y_val = train_test_split(X_train_over, y_train_over, test_size=0.10)

### train with XGBClassifier

In [84]:
model = XGBClassifier(n_estimators=500, learning_rate=0.18, max_depth=4, random_state=32)
xgb_model = model.fit(X_train, y_train, eval_metric="logloss", 
                      early_stopping_rounds=200, eval_set=([X_val, y_val],), verbose=2)
print(xgb_model)

[0]	validation_0-logloss:0.55273
[2]	validation_0-logloss:0.37907
[4]	validation_0-logloss:0.27838
[6]	validation_0-logloss:0.21329
[8]	validation_0-logloss:0.16875
[10]	validation_0-logloss:0.13630
[12]	validation_0-logloss:0.11494
[14]	validation_0-logloss:0.09817
[16]	validation_0-logloss:0.08628
[18]	validation_0-logloss:0.07677
[20]	validation_0-logloss:0.06998
[22]	validation_0-logloss:0.06435




[24]	validation_0-logloss:0.05994
[26]	validation_0-logloss:0.05622
[28]	validation_0-logloss:0.05297
[30]	validation_0-logloss:0.05000
[32]	validation_0-logloss:0.04725
[34]	validation_0-logloss:0.04546
[36]	validation_0-logloss:0.04416
[38]	validation_0-logloss:0.04266
[40]	validation_0-logloss:0.04169
[42]	validation_0-logloss:0.04050
[44]	validation_0-logloss:0.03970
[46]	validation_0-logloss:0.03888
[48]	validation_0-logloss:0.03812
[50]	validation_0-logloss:0.03760
[52]	validation_0-logloss:0.03721
[54]	validation_0-logloss:0.03636
[56]	validation_0-logloss:0.03596
[58]	validation_0-logloss:0.03573
[60]	validation_0-logloss:0.03552
[62]	validation_0-logloss:0.03503
[64]	validation_0-logloss:0.03504
[66]	validation_0-logloss:0.03468
[68]	validation_0-logloss:0.03435
[70]	validation_0-logloss:0.03405
[72]	validation_0-logloss:0.03386
[74]	validation_0-logloss:0.03368
[76]	validation_0-logloss:0.03355
[78]	validation_0-logloss:0.03353
[80]	validation_0-logloss:0.03358
[82]	validatio

### predict test

In [85]:
expected_y = y_test
pred_y = xgb_model.predict(X_test)

### scores

In [86]:
print(classification_report(expected_y, pred_y))
accuracy = accuracy_score(expected_y, pred_y)
print("Accuracy: %.2f%%" % (accuracy * 100))

              precision    recall  f1-score   support

           0       1.00      0.91      0.95        44
           1       1.00      0.80      0.89        45
           2       0.94      0.92      0.93        49
           3       1.00      0.96      0.98        53
           4       0.98      0.85      0.91        47
           5       0.97      0.85      0.90        33
           6       0.93      0.73      0.82        37
           7       1.00      0.89      0.94        36
           8       0.93      0.90      0.92        63
           9       1.00      0.89      0.94        38
          10       1.00      0.91      0.95        34
          11       1.00      0.81      0.90        32
          12       1.00      0.93      0.97        45

   micro avg       0.98      0.88      0.93       556
   macro avg       0.98      0.87      0.92       556
weighted avg       0.98      0.88      0.93       556
 samples avg       0.88      0.88      0.88       556

Accuracy: 87.41%


  _warn_prf(average, modifier, msg_start, len(result))


### cross validation

In [87]:
# cross validation - kfold
kfold = KFold(n_splits=5, shuffle=True, random_state=0)
scores = cross_val_score(xgb_model, X_train_over, y_train_over, cv=kfold)

# validation score
print("교차 검증별 정확도: ", (np.round(scores, 4)* 100))
print("평균 검증 정확도: ", (np.round(np.mean(scores), 4)*100))

교차 검증별 정확도:  [87.08 88.62 88.   83.69 85.23]
평균 검증 정확도:  86.52


### save model

In [88]:
xgb_model.save_model('xgb_mv3_nor.json')