In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import *

In [8]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
submission = pd.read_csv('sample_submission.csv')

In [3]:
X = train.iloc[:, 1:-1]
target = test.iloc[:, 1:]

X = np.array(X).reshape(-1, 8, 4, 1)
target = np.array(target).reshape(-1, 8, 4, 1)

ohe = OneHotEncoder(sparse = False)
y = ohe.fit_transform(train[['target']])

skf = StratifiedKFold(n_splits = 10, random_state = 1, shuffle = True)

In [4]:
es = EarlyStopping(monitor = 'val_acc', patience = 50, mode = 'max', verbose = 0)
rlrp = ReduceLROnPlateau(monitor='val_acc', factor=0.2, patience=5, mode='max')

In [5]:
cnn_acc = []
cnn_pred = np.zeros((target.shape[0], 4))
for i, (tr_idx, val_idx) in enumerate(skf.split(X, train.target)) :
    print(f'{i + 1} Fold Training.....')
    tr_x, tr_y = X[tr_idx], y[tr_idx]
    val_x, val_y = X[val_idx], y[val_idx]
    
    ### CNN 모델
    cnn = Sequential()
    cnn.add(Conv2D(64, (2, 4), padding = "same", activation = 'elu', input_shape = (8, 4, 1))) # 정사각필터보다 직사각필터가 좋은 성능
    cnn.add(BatchNormalization())
    
    cnn.add(Conv2D(32, (2, 4), padding = "same", activation = 'elu'))
    cnn.add(BatchNormalization())
    
    cnn.add(Conv2D(16, (3, 4), padding = "same", activation = 'elu'))
    cnn.add(BatchNormalization())
    
    cnn.add(Conv2D(32, (3, 4), padding = "same", activation = 'elu'))
    
    cnn.add(BatchNormalization())
    cnn.add(GlobalAveragePooling2D())
    
    cnn.add(Dense(16, activation = 'relu'))
    cnn.add(Dense(4, activation = 'softmax'))

    ### ModelCheckPoint Fold마다 갱신
    mc = ModelCheckpoint(f'model_{i + 1}.h5', save_best_only = True, monitor = 'val_acc', mode = 'max', verbose = 0)
   
    ### 모델 compile
    cnn.compile(optimizer = RMSprop(lr=0.04), loss = 'categorical_crossentropy', metrics = ['acc'])

    cnn.fit(tr_x, tr_y, validation_data = (val_x, val_y), epochs = 100, batch_size = 32, callbacks = [es, mc, rlrp], verbose = 0)

    ### 최고 성능 기록 모델 Load
    best = load_model(f'model_{i + 1}.h5')
    ### validation predict
    val_pred = best.predict(val_x)
    ### 확률값 중 최대값을 클래스로 매칭
    val_cls = np.argmax(val_pred, axis = 1)
    ### Fold별 정확도 산출
    fold_cnn_acc = accuracy_score(np.argmax(val_y, axis = 1), val_cls)
    cnn_acc.append(fold_cnn_acc)
    print(f'{i + 1} Fold ACC of CNN = {fold_cnn_acc}\n')

    ### Fold별 test 데이터에 대한 예측값 생성 및 앙상블
    fold_pred = best.predict(target) / skf.n_splits
    cnn_pred += fold_pred

1 Fold Training.....
1 Fold ACC of CNN = 0.8547008547008547

2 Fold Training.....
2 Fold ACC of CNN = 0.905982905982906

3 Fold Training.....
3 Fold ACC of CNN = 0.9188034188034188

4 Fold Training.....
4 Fold ACC of CNN = 0.8803418803418803

5 Fold Training.....
5 Fold ACC of CNN = 0.8803418803418803

6 Fold Training.....
6 Fold ACC of CNN = 0.8884120171673819

7 Fold Training.....
7 Fold ACC of CNN = 0.8841201716738197

8 Fold Training.....
8 Fold ACC of CNN = 0.8798283261802575

9 Fold Training.....
9 Fold ACC of CNN = 0.9098712446351931

10 Fold Training.....
10 Fold ACC of CNN = 0.8497854077253219



In [6]:
np.mean(cnn_acc)

0.8852188107552914

In [10]:
submission['target'] = np.argmax(cnn_pred, axis = 1)

submission

submission.to_csv('submission.csv',index=False)