<a href="https://colab.research.google.com/github/kayleepho/MATH-5750-Project-3/blob/main/project3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Math 5750/6880: Mathematics of Data Science \
Project 3

# 1. Fashion-MNIST image classification using sklearn
In this exercise, you will build a
classifier for the Fashion-MNIST image dataset using the sklearn MLPClassifier.\\
- Use the provided code to import and preprocess the Fashion-MNIST image dataset. \\
- Build a dense neural network
using MLPClassifier. \\
- Start with a simple model architecture and train your model.\\
- Then experiment with your model/training method by changing:\\
• the number of hidden layers and neurons\\
• the activation functions\\
• optimization method and the learning rate\\
• regularization or early stopping settings.\\
- Observe how each change affects convergence speed and accuracy.

In [1]:
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.reshape(len(X_train), -1)
X_test  = X_test.reshape(len(X_test), -1)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [2]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Build a dense neural network using MLPClassifier.
mlp = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=42)

# Train the model
mlp.fit(X_train, y_train)

# Make predictions
y_pred = mlp.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8836


In [3]:
# Changing the number of hidden layers and neurons
mlp1 = MLPClassifier(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=42)
mlp1.fit(X_train, y_train)
y_pred1 = mlp1.predict(X_test)
accuracy1 = accuracy_score(y_test, y_pred1)
print(f"Accuracy: {accuracy1}")

Accuracy: 0.8802


In [4]:
# Changing the activation functions
mlp2 = MLPClassifier(hidden_layer_sizes=(100,), activation='tanh', solver='adam', random_state=42)
mlp2.fit(X_train, y_train)
y_pred2 = mlp2.predict(X_test)
accuracy2 = accuracy_score(y_test, y_pred2)
print(f"Accuracy: {accuracy2}")

Accuracy: 0.8741


In [5]:
# Changing optimization method and the learning rate
mlp3 = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='sgd', learning_rate_init=0.01, random_state=42)
mlp3.fit(X_train, y_train)
y_pred3 = mlp3.predict(X_test)
accuracy3 = accuracy_score(y_test, y_pred3)
print(f"Accuracy: {accuracy3}")

Accuracy: 0.8801


In [6]:
# Changing regularization or early stopping settings
mlp4 = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', early_stopping=True, random_state=42)
mlp4.fit(X_train, y_train)
y_pred4 = mlp4.predict(X_test)
accuracy4 = accuracy_score(y_test, y_pred4)
print(f"Accuracy: {accuracy4}")

Accuracy: 0.8832


In [14]:
import time
import pandas as pd
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# ----------------------------
# 1. Load and preprocess data
# ----------------------------
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Flatten images: 28x28 -> 784
X_train = X_train.reshape(X_train.shape[0], -1)
X_test  = X_test.reshape(X_test.shape[0], -1)

# Check shapes
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# ----------------------------
# 2. Define model configurations
# ----------------------------
configs = [
    ("Baseline", dict(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=42)),
    ("Deeper network", dict(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=42)),
    ("Tanh activation", dict(hidden_layer_sizes=(100,), activation='tanh', solver='adam', random_state=42)),
    ("SGD optimizer", dict(hidden_layer_sizes=(100,), activation='relu', solver='sgd', learning_rate_init=0.01, random_state=42)),
    ("Regularized + EarlyStop", dict(hidden_layer_sizes=(100,), activation='relu', solver='adam', early_stopping=True, random_state=42))
]

# ----------------------------
# 3. Train models and record results
# ----------------------------
results = []

for name, params in configs:
    print(f"\nTraining model: {name}")
    start_time = time.time()

    mlp = MLPClassifier(**params)
    mlp.fit(X_train, y_train)  # Should now work without 4D error

    train_time = time.time() - start_time
    accuracy = accuracy_score(y_test, mlp.predict(X_test))

    results.append({
        "Model": name,
        "Accuracy": accuracy,
        "Training Time (s)": train_time,
        "Iterations": mlp.n_iter_
    })

    print(f"Accuracy: {accuracy:.4f}, Training Time: {train_time:.2f}s, Iterations: {mlp.n_iter_}")

# ----------------------------
# 4. Display results
# ----------------------------
df_results = pd.DataFrame(results)
print("\nSummary of experiments:")
print(df_results)


X_train shape: (60000, 784)
X_test shape: (10000, 784)

Training model: Baseline
Accuracy: 0.8836, Training Time: 268.70s, Iterations: 124

Training model: Deeper network
Accuracy: 0.8802, Training Time: 208.99s, Iterations: 93

Training model: Tanh activation
Accuracy: 0.8741, Training Time: 243.50s, Iterations: 104

Training model: SGD optimizer
Accuracy: 0.8801, Training Time: 319.21s, Iterations: 184

Training model: Regularized + EarlyStop
Accuracy: 0.8832, Training Time: 53.07s, Iterations: 30

Summary of experiments:
                     Model  Accuracy  Training Time (s)  Iterations
0                 Baseline    0.8836         268.700726         124
1           Deeper network    0.8802         208.994771          93
2          Tanh activation    0.8741         243.498878         104
3            SGD optimizer    0.8801         319.212848         184
4  Regularized + EarlyStop    0.8832          53.067917          30


# 3. Fashion-MNIST image classification  using pytorch
In this exercise, you will repeat Exercise 1 using PyTorch.
- Use the provided code to import and preprocess the Fashion-MNIST image dataset.
- Again, start with a simple model architecture and train your model.
- As above, experiment with the model/training method.
- With pytorch, there are many more options, so read about them and experiment!
- Try to further improve your model by using convolutional neural network (CNN) layers and MaxPool2d layers.
- For a challenge, use transfer learning to import a pretrained model and fine
tune it on the Fashion-MNIST image dataset.

In [7]:
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
import torch
from torch.utils.data import TensorDataset, DataLoader

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# scale to [0,1], add channel dimension -> (N, 1, 28, 28)
X_train = (X_train.astype("float32") / 255.0)[:, None, :, :]
X_test  = (X_test.astype("float32")  / 255.0)[:,  None, :, :]

y_train = y_train.astype(np.int64)
y_test  = y_test.astype(np.int64)

# train/val split: last 10k of train as validation
X_tr, X_val = X_train[:50000], X_train[50000:]
y_tr, y_val = y_train[:50000], y_train[50000:]

# wrap in PyTorch TensorDatasets and DataLoaders
train_ds = TensorDataset(torch.from_numpy(X_tr),  torch.from_numpy(y_tr))
val_ds   = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
test_ds  = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=256, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False)

In [8]:
import torch.nn as nn
import torch.optim as optim

# In colab, you should ``change runtime type'' to GPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# start with a simple model architecture and train your model


Using device: cpu


In [None]:
# Start with a simple model architecture and train your model.