In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
import nbtest

## Step 1: Load the dataset and prepare the data

In [2]:
df = pd.read_csv("./input/train.csv")

# Drop unnecessary columns for the task
columns_to_drop = ["source", "extra_note", "flag", "id"]
df.drop(columns=columns_to_drop, inplace=True)

# Split original DataFrame into train/test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

'''
TODO: Write an assertion for `train_df` to check that the unnecessary columns 
      (i.e., "source", "extra_note", "flag", "id") are indeed dropped.
'''

X_train = train_df.drop("label", axis=1) / 255.0  # normalized value
y_train = train_df["label"]                     

X_test = test_df.drop("label", axis=1) / 255.0
y_test = test_df["label"]

X_train = X_train.values.reshape(-1, 28, 28, 1)
X_test = X_test.values.reshape(-1, 28, 28, 1)

## Step 2: Define a convolutional neural network

In [3]:
conv_filters = 16        
kernel_size = (3, 3)
dropout_rate = 0.5
dense_units = 32         
epochs = 2               
batch_size = 32          

In [None]:
model = Sequential([
    Conv2D(conv_filters, kernel_size, activation='relu', input_shape=(28, 28, 1)), # Convolutional layer with ReLU activation
    MaxPooling2D(pool_size=(2, 2)), # Max pooling layer
    Conv2D(conv_filters, kernel_size, activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(), # Flatten the 2D output into 1D feature vector for the dense layers
    Dense(dense_units, activation='relu'), # Fully connected layer with ReLU activation
    Dropout(dropout_rate), # Dropout layer to reduce overfitting by randomly dropping units
    Dense(10, activation='softmax') # Output layer with 10 units (for 10 classes) using softmax activation for classification
])

'''
TODO: Write an assertion for `model` to check the number of layers is 8.
'''

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

## Step 3: Train the model

In [None]:
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1) # Train the model on the training data

## Step 4: Evaluate the model

In [None]:

loss, accuracy = model.evaluate(X_test, y_test, verbose=1) # Evaluate the model performance
print(f"Test Accuracy: {accuracy:.4f}")
# TODO: Write an assertion for `accuracy` to check that the model's accuracy is within in the expected range.
