In [8]:
from imblearn.over_sampling import SMOTE
import numpy as np
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split, KFold, cross_val_score

### load csv data

In [9]:
X_train = np.loadtxt('../data/csvs/csv_4/train_x_mv4_22_nor.csv', delimiter=',', skiprows=1)
y_train = np.loadtxt('../data/csvs/csv_4/train_y_mv4_22_nor.csv', delimiter=',', skiprows=1)
X_test = np.loadtxt('../data/csvs/csv_4/test_x_mv4_22_nor.csv', delimiter=',', skiprows=1)
y_test = np.loadtxt('../data/csvs/csv_4/test_y_mv4_22_nor.csv', delimiter=',', skiprows=1)

### keypoint noralization

In [10]:
X_train = np.array(X_train)
X_train[:, ::2] /= 1280
X_train[:, 1::2] /= 720
X_train

array([[0.45546875, 0.62916667, 0.46484375, ..., 0.18333333, 0.        ,
        0.        ],
       [0.375     , 0.4       , 0.3734375 , ..., 0.06111111, 0.        ,
        0.        ],
       [0.37734375, 0.40416667, 0.3796875 , ..., 0.075     , 0.        ,
        0.        ],
       ...,
       [0.46484375, 0.51805556, 0.50078125, ..., 0.        , 0.246875  ,
        0.51527778],
       [0.4828125 , 0.55694444, 0.515625  , ..., 0.        , 0.        ,
        0.        ],
       [0.47109375, 0.44583333, 0.51171875, ..., 0.45      , 0.21953125,
        0.39861111]])

In [11]:
X_test = np.array(X_test)
X_test[:, ::2] /= 1280
X_test[:, 1::2] /= 720


### over sampling using SMOTE

In [42]:
smote = SMOTE(random_state=11)
X_train_over, y_train_over = smote.fit_resample(X_train, y_train)

### split train, validation data

In [43]:
X_train, X_val, y_train, y_val = train_test_split(X_train_over, y_train_over, test_size=0.10)

### train with XGBClassifier

In [17]:
model = XGBClassifier(n_estimators=500, learning_rate=0.18, max_depth=4, random_state=32)
xgb_model = model.fit(X_train, y_train, eval_metric="logloss", 
                      early_stopping_rounds=100, eval_set=([X_test, y_test],), verbose=2)
print(xgb_model)

[0]	validation_0-logloss:0.55281
[2]	validation_0-logloss:0.37790
[4]	validation_0-logloss:0.27492
[6]	validation_0-logloss:0.20955
[8]	validation_0-logloss:0.16613
[10]	validation_0-logloss:0.13694
[12]	validation_0-logloss:0.11690
[14]	validation_0-logloss:0.10333
[16]	validation_0-logloss:0.09284
[18]	validation_0-logloss:0.08582
[20]	validation_0-logloss:0.07983
[22]	validation_0-logloss:0.07566
[24]	validation_0-logloss:0.07249
[26]	validation_0-logloss:0.06957
[28]	validation_0-logloss:0.06801
[30]	validation_0-logloss:0.06582
[32]	validation_0-logloss:0.06415
[34]	validation_0-logloss:0.06340
[36]	validation_0-logloss:0.06245
[38]	validation_0-logloss:0.06195
[40]	validation_0-logloss:0.06127
[42]	validation_0-logloss:0.06058
[44]	validation_0-logloss:0.06005
[46]	validation_0-logloss:0.06010
[48]	validation_0-logloss:0.06004
[50]	validation_0-logloss:0.05990
[52]	validation_0-logloss:0.05969
[54]	validation_0-logloss:0.05979
[56]	validation_0-logloss:0.05964
[58]	validation_0-l

### predict test

In [18]:
expected_y = y_test
pred_y = xgb_model.predict(X_test)

### scores

In [19]:
print(classification_report(expected_y, pred_y))
accuracy = accuracy_score(expected_y, pred_y)
print("Accuracy: %.2f%%" % (accuracy * 100))

              precision    recall  f1-score   support

           0       1.00      0.91      0.95        22
           1       1.00      0.81      0.89        21
           2       1.00      0.71      0.83        21
           3       0.88      0.88      0.88        26
           4       0.94      0.77      0.85        22
           5       1.00      0.60      0.75        15
           6       0.82      0.64      0.72        14
           7       1.00      0.71      0.83        17
           8       0.91      0.74      0.82        27
           9       1.00      0.72      0.84        18
          10       1.00      0.55      0.71        11
          11       1.00      0.93      0.97        15
          12       1.00      0.60      0.75        20

   micro avg       0.96      0.75      0.84       249
   macro avg       0.97      0.74      0.83       249
weighted avg       0.96      0.75      0.84       249
 samples avg       0.75      0.75      0.75       249

Accuracy: 74.30%


  _warn_prf(average, modifier, msg_start, len(result))


### cross validation

In [21]:
# cross validation - kfold
kfold = KFold(n_splits=5, shuffle=True, random_state=0)
scores = cross_val_score(xgb_model, X_train, y_train, cv=kfold)

# validation score
print("교차 검증별 정확도: ", (np.round(scores, 4)* 100))
print("평균 검증 정확도: ", (np.round(np.mean(scores), 4)*100))

교차 검증별 정확도:  [78.54 78.05 80.49 78.92 78.43]
평균 검증 정확도:  78.89


### save model

In [55]:
xgb_model.save_model('xgb_mv4.json')