In [4]:
import pandas as pd

# Load the MNIST dataset
df = pd.read_csv('/content/sample_data/mnist_train_small.csv', header=None) # No header in MNIST dataset

# Separate features and labels
X = df.drop(0, axis=1)  # Features (pixels) - drop the first column (index 0)
y = df[0]               # Labels (digit classes) - select the first column (index 0)

# Check for missing data
df.isnull().sum()  # No missing data in this case

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0
...,...
780,0
781,0
782,0
783,0


In [5]:
from sklearn.preprocessing import MinMaxScaler

# Normalize the pixel values
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
from sklearn.decomposition import PCA

# Apply PCA for dimensionality reduction (optional)
pca = PCA(n_components=100)  # Reduce dimensions to 100 principal components
X_pca = pca.fit_transform(X_scaled)

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train Random Forest
model_rf = RandomForestClassifier()
model_rf.fit(X_train, y_train)

# Make predictions
y_pred_rf = model_rf.predict(X_test)

# Evaluate accuracy
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy: {accuracy_rf * 100:.2f}%")


Random Forest Accuracy: 95.20%


In [8]:
from sklearn.neural_network import MLPClassifier

# Train Neural Network
model_mlp = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=300)
model_mlp.fit(X_train, y_train)

# Make predictions
y_pred_mlp = model_mlp.predict(X_test)

# Evaluate accuracy
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)
print(f"Neural Network Accuracy: {accuracy_mlp * 100:.2f}%")

Neural Network Accuracy: 96.53%


In [11]:
from sklearn.metrics import classification_report, confusion_matrix

# Confusion matrix for Random Forest
cm_rf = confusion_matrix(y_test, y_pred_rf)
print(classification_report(y_test, y_pred_rf))

# Confusion matrix for Neural Network
cm_mlp = confusion_matrix(y_test, y_pred_mlp)
print(classification_report(y_test, y_pred_mlp)) # Added y_pred_mlp as an argument

              precision    recall  f1-score   support

           0       0.97      0.98      0.97       409
           1       0.98      0.97      0.98       471
           2       0.93      0.95      0.94       387
           3       0.97      0.94      0.95       425
           4       0.94      0.95      0.95       390
           5       0.94      0.94      0.94       334
           6       0.98      0.98      0.98       401
           7       0.95      0.97      0.96       441
           8       0.93      0.93      0.93       351
           9       0.92      0.90      0.91       391

    accuracy                           0.95      4000
   macro avg       0.95      0.95      0.95      4000
weighted avg       0.95      0.95      0.95      4000

              precision    recall  f1-score   support

           0       0.98      0.99      0.98       409
           1       0.98      0.99      0.98       471
           2       0.96      0.94      0.95       387
           3       0.98 