In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [None]:
train_data = pd.read_csv('/Users/bajajvbh/Desktop/Vaibhav/Datasets/digitsnpixels.csv')

In [None]:
## Exploratory Data Analysis
print(f"There are {A.shape[0]} rows and {A.shape[1]} columns")
A.head()

In [None]:
# Separate features X (independent variables) and target/label y (dependent variable)
X=A.to_numpy()[:,1:]
y=A.to_numpy()[:,0]

In [None]:
# the value range of X
X.min(),X.max()

In [None]:
# the array shape of a single data (image)
X[0].shape
np.sqrt(784)

In [None]:
# reshape the image data into 2D array, and plot the image
plt.imshow(X[8].reshape(28,28),cmap=plt.cm.binary)

In [None]:
y[8]

In [None]:
# count and show the number of data for each label/target y
sns.countplot(y)

In [None]:
# ## Data Preprocessing

# normalizing features (pixels)
X = X / 255.0

# one-hot-encoding target (digit 0-9)
y = tf.keras.utils.to_categorical(y)

#This function returns a matrix of binary values (either ‘1’ or ‘0’).
#It has number of rows equal to the length of the input vector and number of columns equal to the number of classes.

In [None]:
# create train and validation data
from sklearn.model_selection import train_test_split

xtrain,xval,ytrain,yval=train_test_split(X,y,test_size=0.2)

In [None]:
# reshape data to fit Keras's input configuration (rank 4 tensor: (rows, pixel, pixel, channel))

xtrain=xtrain.reshape(xtrain.shape[0],28,28,1)
xval=xval.reshape(xval.shape[0],28,28,1)

In [None]:
# Build Keras Model
model=tf.keras.Sequential()

# add first convolution layer
model.add(tf.keras.layers.Convolution2D(filters=10,kernel_size=(3,3),activation="relu",input_shape=(28,28,1)))

#How much u wish to read and recognize for maxpooling
# add first pooling layer
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

#add regularization
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.4))

# add second convolution layer
model.add(tf.keras.layers.Convolution2D(filters=10,kernel_size=(3,3),activation="relu",input_shape=(28,28,1)))

# add second pooling layer
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

#add regularization
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.4))

# flatten the array (from 2D to 1D)
model.add(tf.keras.layers.Flatten())

#add first fully connected layer
model.add(tf.keras.layers.Dense(units=30,activation="relu"))

#add second fully connected layer
model.add(tf.keras.layers.Dense(units=20,activation="relu"))

#add output layer (0-9 classes)
model.add(tf.keras.layers.Dense(units=10,activation="softmax"))

#compile model
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])

# show the network architecture
model.summary()


In [None]:
#Train Model
# define early stopping
callback=tf.keras.callbacks.EarlyStopping(monitor="val_loss",patience=3)

# fit the model and save the information in history
history=model.fit(
    xtrain,
    ytrain,
    batch_size=64,
    epochs=50,
    validation_data=(xval,yval),
    callbacks=[callback]
)

In [None]:
# plot history
fig, ax = plt.subplots(1, 2, figsize=(18,6))
ax[0].plot(history.history['loss'], label='train')
ax[0].plot(history.history['val_loss'], label='valid')
ax[1].plot(history.history['accuracy'], label='train')
ax[1].plot(history.history['val_accuracy'], label='valid')
ax[0].set_title('Loss')
ax[1].set_title('Accuracy')
ax[0].legend(); ax[1].legend();

In [None]:
## Evaluation on Validation Data

results=model.evaluate(xval,yval,batch_size=50)

In [None]:
print(f"val loss: {results[0]} and val acc: {results[1]}")

In [None]:
# get probabilities
probabilities=model.predict(xval)

# get prediction
ypred=np.argmax(probabilities,axis=1)