### Multiclass classification models using neural networks
Data:<br>https://www.kaggle.com/competitions/Kannada-MNIST

### Densely connected network

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
random_state = 42
np.random.seed(random_state)
tf.random.set_seed(random_state)
import time
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning) 

In [2]:
# load data
train = pd.read_csv('drive/MyDrive/input/Kannada-MNIST/train.csv')
test = pd.read_csv('drive/MyDrive/input/Kannada-MNIST/test.csv', index_col='id')
sample_submission = pd.read_csv('drive/MyDrive/input/Kannada-MNIST/sample_submission.csv')

# define X_train, y_train, and X_test
X_train = train.drop('label', axis=1)
y_train = train['label']
X_test = test

# normalize data from 0 to 1
X_train = X_train.astype(float)/255.
X_test = X_test.astype(float)/255.

# reshape data
X_train = X_train.to_numpy().reshape(-1, 28, 28, 1)
X_test = X_test.to_numpy().reshape(-1, 28, 28, 1)
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
print(f'X_train: {X_train.shape}, y_train: {y_train.shape}, X_test: {X_test.shape}')

X_train: (60000, 28, 28, 1), y_train: (60000, 10), X_test: (5000, 28, 28, 1)


In [3]:
# construct model
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
    # tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(2500, activation='relu'),
    # tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(2000, activation='relu'),
    # tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1500, activation='relu'),
    # tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1000, activation='relu'),
    tf.keras.layers.Dense(500, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 2500)              1962500   
_________________________________________________________________
dense_1 (Dense)              (None, 2000)              5002000   
_________________________________________________________________
dense_2 (Dense)              (None, 1500)              3001500   
_________________________________________________________________
dense_3 (Dense)              (None, 1000)              1501000   
_________________________________________________________________
dense_4 (Dense)              (None, 500)               500500    
_________________________________________________________________
dense_5 (Dense)              (None, 10)                5

In [4]:
# train val split
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.2, random_state=random_state) 
print(f'X_train: {X_train.shape}, y_train: {y_train.shape}, X_val: {X_val.shape}, y_val: {y_val.shape}')

X_train: (48000, 28, 28, 1), y_train: (48000, 10), X_val: (12000, 28, 28, 1), y_val: (12000, 10)


In [5]:
# early stopping
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=30, verbose=1, restore_best_weights=True)
# reduce learning rate on plateau
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1, min_lr=0.0001)

# optimizer
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.002,)
# optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001,)

# compile model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [6]:
# image augmentation, generate batches of image data
batch_size = 1024
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=10,
                                                                width_shift_range=0.2,
                                                                height_shift_range=0.2,
                                                                zoom_range=0.2,)
datagen_val = tf.keras.preprocessing.image.ImageDataGenerator() 

In [7]:
# fit model, get fit time
start_time = time.time()
history = model.fit(datagen_train.flow(X_train, y_train, batch_size=batch_size),
                    steps_per_epoch=len(X_train)//batch_size,
                    epochs=500,
                    validation_data=(X_val, y_val),
                    validation_steps=50,
                    callbacks=[reduce_lr, es],
                    verbose=2)
print(f'fit time: {time.time()-start_time:.2f} seconds')

Epoch 1/500
46/46 - 13s - loss: 2.2026 - accuracy: 0.2895 - val_loss: 1.0990 - val_accuracy: 0.6785
Epoch 2/500
46/46 - 10s - loss: 0.9147 - accuracy: 0.6806 - val_loss: 0.5019 - val_accuracy: 0.8266
Epoch 3/500
46/46 - 10s - loss: 0.4609 - accuracy: 0.8457 - val_loss: 0.2179 - val_accuracy: 0.9231
Epoch 4/500
46/46 - 10s - loss: 0.3097 - accuracy: 0.9011 - val_loss: 0.1215 - val_accuracy: 0.9632
Epoch 5/500
46/46 - 10s - loss: 0.2308 - accuracy: 0.9280 - val_loss: 0.0777 - val_accuracy: 0.9758
Epoch 6/500
46/46 - 10s - loss: 0.2070 - accuracy: 0.9363 - val_loss: 0.0768 - val_accuracy: 0.9749
Epoch 7/500
46/46 - 9s - loss: 0.1711 - accuracy: 0.9465 - val_loss: 0.1312 - val_accuracy: 0.9605
Epoch 8/500
46/46 - 9s - loss: 0.1582 - accuracy: 0.9510 - val_loss: 0.0896 - val_accuracy: 0.9712
Epoch 9/500
46/46 - 9s - loss: 0.1420 - accuracy: 0.9557 - val_loss: 0.0707 - val_accuracy: 0.9808
Epoch 10/500
46/46 - 10s - loss: 0.1367 - accuracy: 0.9588 - val_loss: 0.0539 - val_accuracy: 0.9852
Ep

In [8]:
# evaluate predictions
y_pred = np.argmax(model.predict(X_test), axis=1)
submission = sample_submission.copy()
submission['label'] = y_pred
submission.head()

Unnamed: 0,id,label
0,0,3
1,1,0
2,2,2
3,3,6
4,4,7


In [9]:
submission.to_csv('drive/MyDrive/submission.csv', index=False)

In [10]:
model.save('drive/MyDrive/model_grad_imaug_v2')

INFO:tensorflow:Assets written to: drive/MyDrive/model_grad_imaug_v2/assets
