In [None]:
import cv2
import numpy as np
import pandas as pd
import seaborn as sn
#import visualkeras as vk
from keras.utils import np_utils
from keras.datasets import mnist
from sklearn.utils import shuffle
from matplotlib import pyplot as plt
from keras.optimizers import SGD, Adam
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, Activation, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
dataset = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/A_Z Handwritten Data.csv').astype('float32')
dataset.rename(columns={'0': "label"}, inplace=True)
print(dataset.head())

# New Section

In [None]:
letter_x = dataset.drop("label", axis=1)
letter_y = dataset["label"]
(digit_train_x, digit_train_y), (digit_test_x, digit_test_y) = mnist.load_data()

In [None]:
letter_x = letter_x.values

In [None]:
print(letter_x.shape, letter_y.shape)
print(digit_train_x.shape, digit_train_y.shape)
print(digit_test_x.shape, digit_test_y.shape)

In [None]:
digit_data = np.concatenate((digit_train_x, digit_test_x))
digit_target = np.concatenate((digit_train_y, digit_test_y))

print(digit_data.shape, digit_target.shape)

In [None]:
digit_target += 26

In [None]:
data = []

for flatten in letter_x:
  image = np.reshape(flatten, (28, 28, 1))
  data.append(image)

letter_data = np.array(data, dtype=np.float32)
letter_target = letter_y

In [None]:
digit_data = np.reshape(digit_data, (digit_data.shape[0], digit_data.shape[1], digit_data.shape[2], 1))

In [None]:
print(letter_data.shape, letter_target.shape)
print(digit_data.shape, digit_target.shape)

In [None]:
shuffled_data = shuffle(letter_data)
rows, cols = 5, 5

plt.figure(figsize=(10, 10))

for i in range(rows * cols):
  plt.subplot(cols, rows, i+1)
  plt.imshow(shuffled_data[i].reshape(28, 28), interpolation="nearest", cmap="Greys")

plt.show()

In [None]:
shuffled_data = shuffle(digit_data)
rows, cols = 5, 5

plt.figure(figsize=(10, 10))

for i in range(rows * cols):
  plt.subplot(cols, rows, i+1)
  plt.imshow(shuffled_data[i].reshape(28, 28), interpolation="nearest", cmap="Greys")

plt.show()

In [None]:
data = np.concatenate((digit_data, letter_data))
target = np.concatenate((digit_target, letter_target))

print(data.shape, target.shape)

In [None]:
shuffled_data = shuffle(data)
rows, cols = 5, 5

plt.figure(figsize=(10, 10))

for i in range(rows * cols):
  plt.subplot(cols, rows, i+1)
  plt.imshow(shuffled_data[i].reshape(28, 28), interpolation="nearest", cmap="Greys")

plt.show()

In [None]:
train_data, test_data, train_labels, test_lebels = train_test_split(data, target, test_size=0.2)
print(train_data.shape, train_labels.shape)
print(test_data.shape, test_lebels.shape)

In [None]:
train_data = train_data / 255.0
test_data = test_data / 255.0

train_labels = np_utils.to_categorical(train_labels)
test_lebels = np_utils.to_categorical(test_lebels)

train_data = np.reshape(train_data, (train_data.shape[0], train_data.shape[1], train_data.shape[2], 1))
test_data = np.reshape(test_data, (test_data.shape[0], test_data.shape[1], test_data.shape[2], 1))

print(train_data.shape, test_data.shape)
print(train_labels.shape, test_lebels.shape)

In [None]:
train_label_counts = [0 for i in range(36)]
test_label_counts = [0 for i in range(36)]

for i in range(train_data.shape[0]):
    train_label_counts[np.argmax(train_labels[i])] += 1

for i in range(test_data.shape[0]):
    test_label_counts[np.argmax(test_lebels[i])] += 1

In [None]:
frequency = [
    train_label_counts, test_label_counts
]

fig = plt.figure(figsize=(8, 6))
ax = fig.add_axes([0, 0, 1, 1])
x = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
     'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2',
     '3', '4', '5', '6', '7', '8', '9']

plt.xticks(range(len(frequency[0])), x)
plt.title('Train vs Test Data Distribution')
plt.xlabel('Characters')
plt.ylabel('Frequency')

ax.bar(np.arange(len(frequency[0])), frequency[0], color='b', width=0.50)
ax.bar(np.arange(len(frequency[1])) + 0.45, frequency[1], color='r', width=0.45)
ax.legend(labels=['train', 'test'])

In [None]:
model = Sequential()

model.add(Conv2D(32, (5,5), input_shape=(28, 28, 1), activation='relu'))
model.add(BatchNormalization())

model.add(Conv2D(32, (5, 5), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(2, 2))
model.add(Dropout(0.25))

model.add(BatchNormalization())
model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dense(36, activation='softmax'))
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
best_loss_checkpoint = ModelCheckpoint(
    filepath="./best_loss_model.h5",
    monitor="loss",
    save_best_only=True,
    save_weights_only=True,
    mode="min"
)

In [None]:
best_val_loss_checkpoint = ModelCheckpoint(
    filepath="./best_val_loss_model.h5",
    monitor="val_loss",
    save_best_only=True,
    save_weights_only=True,
    mode="min"
)

In [None]:
history = model.fit(
    train_data, train_labels,
    validation_data=(test_data, test_lebels),
    epochs=30,
    batch_size=300
    #callbacks=[best_loss_checkpoint, best_val_loss_checkpoint]
)

In [None]:
plt.plot(history.history['loss'], 'b', label='loss')
plt.plot(history.history['val_loss'], 'r', label='val_loss')
plt.xlabel('epoch')
plt.ylabel('frequency')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['accuracy'], 'b', label='accuracy')
plt.plot(history.history['val_accuracy'], 'r', label='val_accuracy')
plt.xlabel('epoch')
plt.ylabel('frequency')
plt.legend()
plt.show()

In [None]:
#model.load_weights("./best_val_loss_model.h5")

In [None]:
loss, acc = model.evaluate(test_data, test_lebels)
print(loss, acc)

In [None]:
predictions = model.predict(test_data)

In [None]:
confusion = confusion_matrix(
    np.argmax(test_lebels, axis=1),
    np.argmax(predictions, axis=1)
)
print(confusion)

In [None]:
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

df_cm = pd.DataFrame(confusion, columns=np.unique(labels), index = np.unique(labels))
df_cm.index.name = 'actual'
df_cm.columns.name = 'predicted'
plt.figure(figsize = (15,15))
sn.set(font_scale=1.4) 
sn.heatmap(df_cm, cmap="Reds", annot=True, annot_kws={"size": 12}, fmt="d")