### Multiclass classification models using neural networks
Data:<br>https://www.kaggle.com/competitions/Kannada-MNIST

### CNN

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
random_state = 42
np.random.seed(random_state)
tf.random.set_seed(random_state)
import time
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning) 

In [2]:
# load data
train = pd.read_csv('drive/MyDrive/input/Kannada-MNIST/train.csv')
test = pd.read_csv('drive/MyDrive/input/Kannada-MNIST/test.csv', index_col='id')
sample_submission = pd.read_csv('drive/MyDrive/input/Kannada-MNIST/sample_submission.csv')

# define X_train, y_train, and X_test
X_train = train.drop('label', axis=1)
y_train = train['label']
X_test = test

# normalize data from 0 to 1
X_train = X_train.astype(float)/255.
X_test = X_test.astype(float)/255.

# reshape data
X_train = X_train.to_numpy().reshape(-1, 28, 28, 1)
X_test = X_test.to_numpy().reshape(-1, 28, 28, 1)
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
print(f'X_train: {X_train.shape}, y_train: {y_train.shape}, X_test: {X_test.shape}')

X_train: (60000, 28, 28, 1), y_train: (60000, 10), X_test: (5000, 28, 28, 1)


In [3]:
# construct model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3,3), padding='same', activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(32, (3,3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(32, (3,3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Flatten(),    
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 16)        160       
_________________________________________________________________
batch_normalization (BatchNo (None, 28, 28, 16)        64        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 32)        4640      
_________________________________________________________________
batch_normalization_1 (Batch (None, 28, 28, 32)        128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        1

In [4]:
# train val split
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.2, random_state=random_state) 
print(f'X_train: {X_train.shape}, y_train: {y_train.shape}, X_val: {X_val.shape}, y_val: {y_val.shape}')

X_train: (48000, 28, 28, 1), y_train: (48000, 10), X_val: (12000, 28, 28, 1), y_val: (12000, 10)


In [5]:
# early stopping
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, verbose=1, restore_best_weights=True)
# reduce learning rate on plateau
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1, min_lr=0.0001)

# optimizer
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.002,)
# optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001,)

# compile model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [6]:
# image augmentation, generate batches of image data
batch_size = 1024
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=10,
                                                                width_shift_range=0.2,
                                                                height_shift_range=0.2,
                                                                zoom_range=0.2,)
datagen_val = tf.keras.preprocessing.image.ImageDataGenerator() 

In [7]:
# fit model, get fit time
start_time = time.time()
history = model.fit(datagen_train.flow(X_train, y_train, batch_size=batch_size),
                    steps_per_epoch=len(X_train)//batch_size,
                    epochs=500,
                    validation_data=(X_val, y_val),
                    validation_steps=50,
                    callbacks=[reduce_lr, es],
                    verbose=2)
print(f'fit time: {time.time()-start_time:.2f} seconds')

Epoch 1/500
46/46 - 42s - loss: 0.6237 - accuracy: 0.7953 - val_loss: 2.7448 - val_accuracy: 0.1083
Epoch 2/500
46/46 - 10s - loss: 0.1235 - accuracy: 0.9606 - val_loss: 3.9531 - val_accuracy: 0.1018
Epoch 3/500
46/46 - 10s - loss: 0.0780 - accuracy: 0.9752 - val_loss: 3.5160 - val_accuracy: 0.1048
Epoch 4/500
46/46 - 10s - loss: 0.0609 - accuracy: 0.9813 - val_loss: 2.9129 - val_accuracy: 0.2848
Epoch 5/500
46/46 - 10s - loss: 0.0501 - accuracy: 0.9845 - val_loss: 2.8222 - val_accuracy: 0.3063
Epoch 6/500
46/46 - 10s - loss: 0.0445 - accuracy: 0.9858 - val_loss: 2.1309 - val_accuracy: 0.5008
Epoch 7/500
46/46 - 10s - loss: 0.0386 - accuracy: 0.9879 - val_loss: 2.1520 - val_accuracy: 0.4408
Epoch 8/500
46/46 - 10s - loss: 0.0361 - accuracy: 0.9888 - val_loss: 1.8984 - val_accuracy: 0.4930
Epoch 9/500
46/46 - 10s - loss: 0.0349 - accuracy: 0.9890 - val_loss: 1.1057 - val_accuracy: 0.7426
Epoch 10/500
46/46 - 10s - loss: 0.0305 - accuracy: 0.9909 - val_loss: 0.4296 - val_accuracy: 0.8743

In [8]:
# evaluate predictions
y_pred = np.argmax(model.predict(X_test), axis=1)
submission = sample_submission.copy()
submission['label'] = y_pred
submission.head()

Unnamed: 0,id,label
0,0,3
1,1,0
2,2,2
3,3,6
4,4,7


In [9]:
submission.to_csv('drive/MyDrive/submission.csv', index=False)

In [10]:
model.save('drive/MyDrive/model_conv_imaug_v1')

INFO:tensorflow:Assets written to: drive/MyDrive/model_conv_imaug_v1/assets
