<a href="https://colab.research.google.com/github/kayleepho/MATH-5750-Project-3/blob/main/project3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Math 5750/6880: Mathematics of Data Science \
Project 3

# 1. Fashion-MNIST image classification using sklearn
In this exercise, you will build a
classifier for the Fashion-MNIST image dataset using the sklearn MLPClassifier.\\
- Use the provided code to import and preprocess the Fashion-MNIST image dataset. \\
- Build a dense neural network
using MLPClassifier. \\
- Start with a simple model architecture and train your model.\\
- Then experiment with your model/training method by changing:\\
• the number of hidden layers and neurons\\
• the activation functions\\
• optimization method and the learning rate\\
• regularization or early stopping settings.\\
- Observe how each change affects convergence speed and accuracy.

In [8]:
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.reshape(len(X_train), -1)
X_test  = X_test.reshape(len(X_test), -1)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Build a dense neural network using MLPClassifier.
mlp = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=42)

# Train the model
mlp.fit(X_train, y_train)

# Make predictions
y_pred = mlp.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8836


In [None]:
# Changing the number of hidden layers and neurons
mlp1 = MLPClassifier(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=42)
mlp1.fit(X_train, y_train)
y_pred1 = mlp1.predict(X_test)
accuracy1 = accuracy_score(y_test, y_pred1)
print(f"Accuracy: {accuracy1}")

In [None]:
# Changing the activation functions
mlp2 = MLPClassifier(hidden_layer_sizes=(100,), activation='tanh', solver='adam', random_state=42)
mlp2.fit(X_train, y_train)
y_pred2 = mlp2.predict(X_test)
accuracy2 = accuracy_score(y_test, y_pred2)
print(f"Accuracy: {accuracy2}")

In [None]:
# Changing optimization method and the learning rate
mlp3 = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='sgd', learning_rate_init=0.00, random_state=42)
mlp3.fit(X_train, y_train)
y_pred3 = mlp3.predict(X_test)
accuracy3 = accuracy_score(y_test, y_pred3)
print(f"Accuracy: {accuracy3}")

In [None]:
# Changing regularization or early stopping settings
mlp4 = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', early_stopping=True, random_state=42)
mlp4.fit(X_train, y_train)
y_pred4 = mlp4.predict(X_test)
accuracy4 = accuracy_score(y_test, y_pred4)
print(f"Accuracy: {accuracy4}")

In [None]:
# Observe how each change affects convergence speed and accuracy.
import time

# Define different model configurations
configs = [
    ("Baseline", dict(hidden_layer_sizes=(64,), activation='relu', solver='adam', max_iter=20)),
    ("Deeper network", dict(hidden_layer_sizes=(256,128,64), activation='relu', solver='adam', max_iter=20)),
    ("Tanh activation", dict(hidden_layer_sizes=(128,64), activation='tanh', solver='adam', max_iter=20)),
    ("SGD optimizer", dict(hidden_layer_sizes=(128,64), activation='relu', solver='sgd', learning_rate_init=0.01, momentum=0.9, max_iter=50)),
    ("Regularized + EarlyStop", dict(hidden_layer_sizes=(128,64), activation='relu', solver='adam', alpha=0.001, early_stopping=True, validation_fraction=0.1, max_iter=100))
]

# 3. Run experiments
results = []
for name, params in configs:
    print(f"\nTraining model: {name}")
    start = time.time()
    mlp = MLPClassifier(random_state=42, **params)
    mlp.fit(X_train, y_train)
    train_time = time.time() - start
    acc = accuracy_score(y_test, mlp.predict(X_test))
    results.append((name, acc, train_time, mlp.n_iter_))

# 4. Display summary
print("\n===== RESULTS SUMMARY =====")
print(f"{'Model':<25}{'Accuracy':<12}{'Epochs':<10}{'Train Time (s)':<15}")
for name, acc, t, n_iter in results:
    print(f"{name:<25}{acc:<12.4f}{n_iter:<10}{t:<15.1f}")

# 3. Fashion-MNIST image classification  using pytorch

In [None]:
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
import torch
from torch.utils.data import TensorDataset, DataLoader

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# scale to [0,1], add channel dimension -> (N, 1, 28, 28)
X_train = (X_train.astype("float32") / 255.0)[:, None, :, :]
X_test  = (X_test.astype("float32")  / 255.0)[:,  None, :, :]

y_train = y_train.astype(np.int64)
y_test  = y_test.astype(np.int64)

# train/val split: last 10k of train as validation
X_tr, X_val = X_train[:50000], X_train[50000:]
y_tr, y_val = y_train[:50000], y_train[50000:]

# wrap in PyTorch TensorDatasets and DataLoaders
train_ds = TensorDataset(torch.from_numpy(X_tr),  torch.from_numpy(y_tr))
val_ds   = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
test_ds  = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=256, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False)

In [None]:
import torch.nn as nn
import torch.optim as optim

# In colab, you should ``change runtime type'' to GPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# your code here