In [None]:
## Upload File ##
from google.colab import files
files.upload() 

In [None]:
!unzip data.zip -d data

In [7]:
## import library ##
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import os
import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf
from keras.models import Model
from keras.layers import Input, GlobalAveragePooling2D, Dropout, Dense, Flatten, Conv2D, BatchNormalization, MaxPooling2D

In [8]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    tf.random.set_seed(seed)

seed_everything()

In [9]:
## Load Data ##
train = pd.read_csv('./data/train.csv')
test  = pd.read_csv('./data/test.csv')
submission = pd.read_csv('./data/submission.csv')

In [None]:
print(train.shape)
train.head(3)

In [None]:
print(test.shape)
test.head(3)

In [12]:
# print(train['digit'].value_counts())

In [13]:
# group1 = train.groupby('digit').get_group(0).reset_index(drop=True)
# group2 = train.groupby('digit').get_group(1).reset_index(drop=True)
# group3 = train.groupby('digit').get_group(2).reset_index(drop=True)
# group4= train.groupby('digit').get_group(3).reset_index(drop=True)
# group5= train.groupby('digit').get_group(4).reset_index(drop=True)
# group6= train.groupby('digit').get_group(5).reset_index(drop=True)
# group7= train.groupby('digit').get_group(6).reset_index(drop=True)
# group8= train.groupby('digit').get_group(7).reset_index(drop=True)
# group9= train.groupby('digit').get_group(8).reset_index(drop=True)
# group10= train.groupby('digit').get_group(9).reset_index(drop=True)
# groups =[group1, group2, group3, group4, group5, group6, group7, group8, group9, group10]

In [14]:
# ## Image example of each group ##
# fig1 = plt.figure(figsize=(10,10))

# for i in range(len(groups)):
    
#     digit = groups[i].loc[0, 'digit']
#     letter = groups[i].loc[0, 'letter']
#     img = groups[i].loc[0, '0':].values.reshape(28,28).astype(int)
    
#     ax = fig1.add_subplot(2,5,i+1)
#     ax.imshow(img)
#     title = f"digit: {digit}, letter: {letter}"
#     plt.title(title)
    
#     fig1.tight_layout()

# plt.savefig('fig1.png')

In [18]:
## Preprocess data ## 
x_train = train.drop(['id', 'digit', 'letter'], axis=1).values
x_train = x_train.reshape(-1, 28, 28, 1)
x_train = x_train/255

y = train['digit']
y_train = np.zeros((len(y), len(y.unique())))
for i, digit in enumerate(y):
    y_train[i, digit] = 1

In [45]:
## Model ##
def custom_model():
  input_tensor = Input(shape= (28, 28, 1))
  # Block 1
  x = Conv2D(128, kernel_size=3, padding='same', activation='relu')(input_tensor)
  x = BatchNormalization()(x)
  x = Conv2D(128, kernel_size=3, padding='same', activation='relu')(x)
  x = BatchNormalization()(x)
  x = MaxPooling2D((2,2))(x)
  
  # Block 2
  x = Conv2D(256, kernel_size=3, padding='same', activation='relu')(x)
  x = BatchNormalization()(x)
  x = Conv2D(256, kernel_size=3, padding='same', activation='relu')(x)
  x = BatchNormalization()(x)
  x = MaxPooling2D((2,2))(x)
  
  # Block 3
  x = Conv2D(512, kernel_size=3, padding='same', activation='relu')(x)
  x = BatchNormalization()(x)
  x = Conv2D(512, kernel_size=3, padding='same', activation='relu')(x)
  x = BatchNormalization()(x)
  x = Conv2D(512, kernel_size=3, padding='same', activation='relu')(x)
  x = MaxPooling2D((2,2))(x)

  x = Flatten()(x)
  x = Dense(1024, activation = 'relu')(x)
  x = Dense(512, activation = 'relu')(x)
  output = Dense(10, activation='softmax')(x)
  model = Model(input_tensor, output)
  
  return model

In [None]:
model = custom_model()
model.summary()

In [None]:
## Training Model ##
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
model = custom_model()

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, decay = 0.01)
rlrop = ReduceLROnPlateau(monitor='accuracy', mode='min', patience=10, factor=0.5, min_lr=1e-6, verbose=0)
cp = ModelCheckpoint(monitor='accuracy', save_best_only=True, filepath='/content/drive/MyDrive/computer_vision_competition/model.ckpt', save_weights_only=True, verbose=0)
callback_list = [rlrop, cp]

model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(x_train, y_train, epochs=30, callbacks= callback_list)

In [None]:
## Eval ##
x_test = test.drop(['id', 'letter'], axis=1).values
x_test = x_test.reshape(-1, 28, 28, 1)
x_test = x_test/255

submission = pd.read_csv('./data/submission.csv')
submission['digit'] = np.argmax(model.predict(x_test), axis=1)
submission.head()

In [53]:
submission.to_csv('/content/drive/MyDrive/computer_vision_competition/sub.csv', index=False)