In [1]:

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import LabelEncoder , StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import Perceptron    # Used for simple linear classification tasks.

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from tensorflow.keras.models import Sequential     # Sequential lets you build a neural network layer-by-layer in Keras.

from tensorflow.keras.layers import Dense     #Dense makes the final predictions
from tensorflow.keras.layers import Conv2D     # Conv2D extracts features
from tensorflow.keras.layers import Flatten    # Flatten reshapes them

from tensorflow.keras.layers import MaxPooling2D     # MaxPooling2D reduces size
from tensorflow.keras.layers import Dropout          # Dropout prevents overfitting

from tensorflow.keras.utils import to_categorical     # converts numeric class labels into one-hot encoded format for training classification models
     

In [2]:
df= pd.read_csv("train.csv")
df_test= pd.read_csv("test.csv")

In [3]:
df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
df.shape

(42000, 785)

In [5]:
df.columns

Index(['label', 'pixel0', 'pixel1', 'pixel2', 'pixel3', 'pixel4', 'pixel5',
       'pixel6', 'pixel7', 'pixel8',
       ...
       'pixel774', 'pixel775', 'pixel776', 'pixel777', 'pixel778', 'pixel779',
       'pixel780', 'pixel781', 'pixel782', 'pixel783'],
      dtype='object', length=785)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 251.5 MB


In [7]:
df.isnull().sum()

label       0
pixel0      0
pixel1      0
pixel2      0
pixel3      0
           ..
pixel779    0
pixel780    0
pixel781    0
pixel782    0
pixel783    0
Length: 785, dtype: int64

In [9]:
df_test.columns

Index(['pixel0', 'pixel1', 'pixel2', 'pixel3', 'pixel4', 'pixel5', 'pixel6',
       'pixel7', 'pixel8', 'pixel9',
       ...
       'pixel774', 'pixel775', 'pixel776', 'pixel777', 'pixel778', 'pixel779',
       'pixel780', 'pixel781', 'pixel782', 'pixel783'],
      dtype='object', length=784)

In [None]:
#  HERE WR ARE DOING DATA PREPROCESSING
X_train = df.drop("label",axis=1).values
y_train = df["label"].values

# THIS FOR TEST DATASET (no labels in test data)
X_test = df_test.drop("label", axis=1).values
y_test = df_test["label"].values

KeyError: "['label'] not found in axis"

In [None]:
#  THIS IS USED TO CONVERT THE PIXEL VALUES FROM 0-255 TO 0-1
X_train = X_train.astype("float32") / 255.0
X_test = X_test.astype("float32") / 255.0

In [None]:

# -1 → automatically figures out the number of images
#  HERE WE USE -1 BECAUSE IN THE DATASET WE HAVE LABELS ON 1ST COL AND THE 0 AND 1 FROM NEXT COL FROM LABELS
#  SO THATS WHY WE CHOOSE -1 SO WE CAN START FRESHLY FROM 0 TO 784
# 28, 28 → each image is 28×28 pixels
X_train_img = X_train.reshape(-1, 28, 28)
X_test_img = X_test.reshape(-1, 28, 28)

In [None]:
#  HERE NUM_CLASSES GIVES THE LENGTH OF ONE-HOT ENCODING
y_train_cat = to_categorical(y_train, num_classes=10)
y_test_cat = to_categorical(y_test, num_classes=10)

In [None]:
#  FROM NOW HERE WE START FROM FIRST TYPE OF DL PREDICTION I.E IS PERCEPTRON
perceptron = Sequential([
    #  FLATTEEN IS USED FOR CONVERTING 2D TO 1D ARRAY
    Flatten(input_shape=(28,28)),
    #  BECAUSE WE ARE USING CATEGORICAL VARIABLES WE USE SOFTMAX 
    #  AND HERE 10 IS HOW MANY NEURON LOCATED FROM FLATTEN TO ANN(FIRST LAYER(INPUT))
    #  BECAUSE WE DIVIDED IT INTO 10 CATEGORIES SO WE USED 10 HERE 
    Dense(10, activation='softmax')
])

In [None]:
perceptron.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["accuracy"])


In [None]:
history_percp = perceptron.fit(X_train_img,y_train_cat,epochs=5, batch_size=32,validation_data=(X_test_img,y_test_cat), verbose=1)

In [None]:
acc_percp = perceptron.evaluate(X_test_img,y_test_cat, verbose=0)[1]
acc_percp

ANN IMPLEMENTATION

In [None]:
# IN ANN WE HAVE MULTIPLE HIDDEN LAYERS SO WE USED RELU ACTIVATION FUNCTION
ann = Sequential([
    Flatten(input_shape=(28,28)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

In [None]:
ann.complile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
history_ann = ann.fit(X_train_img,y_train_cat, epochs=5, batch_size=32, validation_data = (X_test_img,y_test_cat), verbose=1)

In [None]:
acc_ann = ann.evaluate(X_test_img,y_test_cat, verbose=0)[1]
acc_ann

NOW AT FINALLY WE USE CNN AND DO PREDICTON ON TEST DATA

In [None]:
X_train_cnn = X_train.reshape(-1, 28, 28,1)
X_test_cnn = X_test.reshape(-1, 28, 28, 1)
     

In [None]:
cnn = Sequential([
    #  HERE WE APPLY CONVLUTIONAL LAYER WITH NEURON SIZE 32 AND KERNEL_SIZE MEANS CHOOSE THE ARRAY IN THE FORM OF 3 BY 3 MATRICS ONLY
    # INPUT_SIZE MEANS 28 BY 28 MATRIX SIZE AND 1 IS STARTING COLS INDEX 
    Conv2D(32, kernel_size=(3,3), activation="relu", input_size=(28,28,1)),
    # HERE WE ARE APPLYING POOLING AND FROM 3 BY 3 MATRIX FROM ABOVE WE CHOOSE 2 BY 2 IMPORTANT NUMBERS AND WHICH IS LARGEST
    MaxPooling2D(pool_size=(2,2)),
    #  INCREASE THE NEURON AND WEIGHT SIZE TO 64
    Conv2D(64, kernel_size=(3,3), activation="relu"),
    MaxPooling2D(pool_size=(2,2)),
    # HERE FLATTEN MEANS CALUCLATING OR EXTRACTING ALL WEIGHTS FROM THR KERNEL_SIZE WE WANT TO CONVERTING IT INTO 1D 
    Flatten(),
    # AFTER FLATTEN WE USED NORMAL ANN 
    Dense(128, activation="relu")
    #  HERE DROPOUT IS USED FOR PREVENTING OVERFITTING
    Dropout(0.5),
    #  BECAUSE IT IS CATAGORICAL IT IS SOFTMAX 
    Dense(10, activation="softmax")
])

In [None]:
cnn.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
history_cnn = cnn.fit(X_train_cnn,y_train_cat, epochs=5, batch_size=32, validation_data=(X_test_cnn,y_test_cat), verbose =1)

In [None]:
acc_ann = cnn.evaluate(X_test_cnn,y_test_cat, verbose=0)[1]
acc_ann

AFTER CALCULATING ALL WE ARE COMPARING AND DRAWING PLOTS AND COMPARES

In [None]:
# HERE WE MAKE A FUNCTION
def plot_training(history, title):
    plt.figure(figsize=(12,4))
    plt.subplot(1,2,1)
    plt.plot(history.history['accuracy'], label="Train")
    plt.plot(history.history['val_accuracy'], label="Val")
    plt.title(f"{title} Accuracy")
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(history.history['loss'], label="Train")
    plt.plot(history.history['val_loss'], label="Val")
    plt.title(f"{title} Loss")
    plt.legend()
    plt.show()

In [None]:


plot_training(history_percp, "Perceptron")

In [None]:


plot_training(history_ann, "ANN")

In [None]:

plot_training(history_cnn, "CNN")
     

In [None]:


plt.figure(figsize=(10,6))
plt.plot(history_percp.history['val_accuracy'], label="Perceptron")
plt.plot(history_ann.history['val_accuracy'], label="ANN")
plt.plot(history_cnn.history['val_accuracy'], label="CNN")
plt.title("Validation Accuracy Comparison")
plt.xlabel("Epochs")
plt.ylabel("Val Accuracy")
plt.legend()
plt.show()

In [None]:


def show_side_by_side(models, model_names, X, X_cnn, y_true, n=5):
    idxs = np.random.choice(len(X), n, replace=False)
    plt.figure(figsize=(15, 6))
    for i, idx in enumerate(idxs):
        plt.subplot(2, n, i+1)
        plt.imshow(X[idx].reshape(28, 28), cmap="gray")
        plt.axis("off")
        plt.title(f"True: {y_true[idx]}")
        preds = [np.argmax(model.predict(X_cnn[idx].reshape(1, 28, 28, 1) if name == "CNN" else X[idx].reshape(1, 28, 28)))
                 for model, name in zip(models, model_names)]
        plt.subplot(2, n, n+i+1)
        plt.axis("off")
        plt.title("\n".join(f"{n}: {p}" for n, p in zip(model_names, preds)))
    plt.tight_layout()
    plt.show()

In [None]:
y_pred_cnn = np.argmax(cnn.predict(X_test_cnn), axis=1)
cm = confusion_matrix(y_test, y_pred_cnn)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("CNN Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

In [None]:


final_accs = [acc_percp*100, acc_ann*100, acc_cnn*100]
models = ["Perceptron", "ANN", "CNN"]

plt.figure(figsize=(8,6))
bars = plt.bar(models, final_accs, color=['#ff9999','#66b3ff','#99ff99'])
plt.title("Final Test Accuracy Comparison")
plt.ylabel("Accuracy (%)")
for bar, acc in zip(bars, final_accs):
    plt.text(bar.get_x()+bar.get_width()/2, bar.get_height()-1, f"{acc:.2f}%",
             ha='center', va='bottom', fontsize=12, fontweight='bold')
plt.ylim(80, 100)
plt.show()