In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline 

from tensorflow.keras import callbacks,optimizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout,Conv2D, MaxPooling2D, Flatten,BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import KFold

In [10]:
train = pd.read_csv("../dataset/train.csv")
test = pd.read_csv("../dataset/test.csv")

X = ( train.iloc[:,1:].copy() ).astype('float32') / 255.0
y = ( train.iloc[:,0].copy() ).astype('int32')
X_test = ( test.copy() ).astype('float32') / 255.0

X = X.to_numpy().reshape(-1, 28, 28, 1)
X_test = X_test.to_numpy().reshape(-1, 28, 28, 1) 

In [11]:
def build_model( input_shape, learning_rate ):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape = input_shape ),
        BatchNormalization(),
        Conv2D(32, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),
    
        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),
    
        Flatten(),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])

    model.compile(
        optimizer= optimizers.Adam(learning_rate = learning_rate),  
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

In [12]:
def early_stopping_set(patience, min_delta):
    
    early_stopping = callbacks.EarlyStopping(
    patience = patience, 
    min_delta = min_delta,
    restore_best_weights = True,
    )

    return early_stopping

In [13]:
def data_generator_set(rotation_range, zoom_range, width_shift_range, height_shift_range):
    datagen = ImageDataGenerator(
        rotation_range = rotation_range,
        zoom_range = zoom_range,
        width_shift_range = width_shift_range, 
        height_shift_range = height_shift_range
    )
    return datagen

In [20]:
def train_onefold(model, X_train, y_train, X_val, y_val, epoches, batch_size, early_stop_params, data_gen_params):
    early_stopping = early_stopping_set(**early_stop_params)
    
    datagen = data_generator_set(**data_gen_params)
    datagen.fit(X_train)
    
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size= 64),
        epochs = epoches, 
        batch_size =batch_size, 
        validation_data = (X_val, y_val),
        callbacks = [early_stopping]
    )
    return model

In [15]:
def train_k_fold(X, y, n_splits, shuffle, random_state, learning_rates, input_shape, model_train_params):
    kf = KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)
    fold = 1
    accuracies = []
    test_predictions = []
    learning_rates = learning_rates
    input_shape = input_shape
    for train_idx, val_idx in kf.split(X):
        print(f"training {fold} flod")
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]
    
        model = build_model( input_shape, learning_rates[ fold-1 ] )
        trained_model = train_onefold( model, X_train, y_train, X_val, y_val, **model_train_params )
    
        test_pred = trained_model.predict(X_test)
        test_predictions.append(test_pred)
        scores = trained_model.evaluate(X_val, y_val, verbose=0)
        print(f'fold{fold} accuracy: {scores[1]:.4f}')
        accuracies.append(scores[1])
        fold += 1
    
    print(f'{fold-1} folds mean accuracy: {np.mean(accuracies):.4f}')
    return test_predictions

In [17]:
params = {
    "learning_rates": [0.0008, 0.0004, 0.0008, 0.0006, 0.0003],
    "input_shape": (28, 28, 1),
    
    "model_train_params":{
        "epoches": 80, 
        "batch_size": 256,
        "early_stop_params": {
            "patience": 20, 
            "min_delta": 0.001,
        },
        "data_gen_params": {
            "rotation_range": 10,
            "zoom_range": 0.2,
            "width_shift_range": 0.1,
            "height_shift_range": 0.1,
        },  
    },
}

In [None]:
test_predictions = train_k_fold(X, y, 5, True, 42, **params)
final_prediction = np.mean(test_predictions, axis=0)
final_labels = np.argmax(final_prediction, axis=1)

In [19]:
submission = pd.DataFrame({
    'ImageId': range(1, len(final_labels) + 1),
    'Label': final_labels
})

submission.to_csv('submission_kfold_ensemble.csv', index=False)