In [53]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
# from local_utils import detect_lp
from os.path import splitext,basename
from keras.models import model_from_json
import glob
from PIL import Image
import pandas as pd

from tensorflow import keras
from tensorflow.keras import layers

In [54]:
#  Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

path = '/content/trainset.csv'

df = pd.read_csv(path)
n = len(df)
train = df[0:int(n*0.85)]
test = df[int(n*0.85):]
len(train)

23800

In [55]:
y_train = train['label'].values
del train['label']
x_train = train.values

y_test = test['label'].values
del test['label']
x_test = test.values


In [56]:
x_train = x_train.reshape(len(train),28,28)
x_test = x_test.reshape(len(test),28,28,1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

x_train shape: (23800, 28, 28, 1)
23800 train samples
4200 test samples


In [57]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape)

(23800, 10)


In [61]:
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 1600)              0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 1600)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)               

In [62]:
batch_size = 128
epochs = 25

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x7fc7d7f80208>

In [63]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.11045809835195541
Test accuracy: 0.9669047594070435


In [64]:
test_path = '/content/testset.csv'
raw_test_data = pd.read_csv(test_path)

test_data = raw_test_data.values
test_data = test_data.reshape(len(test_data),28,28)
test_data = np.expand_dims(test_data, -1)
print(test_data.shape)

(14000, 28, 28, 1)


In [65]:
test_values = model.predict_classes(test_data)

In [67]:
test_values = pd.DataFrame(test_values,columns=['Label'])

imageID = pd.DataFrame(range(14000),columns=['ImageID'])
imageID.index = range(14000)
imageID['ImageID'] = imageID['ImageID']+1

test_submission = imageID.join(test_values)
test_submission.head()

Unnamed: 0,ImageID,Label
0,1,3
1,2,1
2,3,3
3,4,7
4,5,0


In [68]:
test_submission.to_csv('test_values.csv',index=False)