# Imports

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from torchvision.datasets import MNIST
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 1. Data preparation

### 1.1. Read data from file

In [2]:
#load the datasets (train data and test data which are pre-splitted)
data1 = MNIST(root='MNIST_dataset', train=True, download=True)
data2 = MNIST(root='MNIST_dataset', train=False, download=True)

#concatenate the data and labels from train and test datasets
all_images = torch.cat((data1.data, data2.data), dim=0)
all_labels = torch.cat((data1.targets, data2.targets), dim=0)

Now, we have 70,000 images intotal.

In [3]:
x = all_images.numpy()
y = all_labels.numpy()

print("Total images:", x.shape[0])

Total images: 70000


### 1.2. Normalizing the data

In [4]:
x = x / 255.0  # Normalize pixel values to [0, 1] range

print("min pixel value:", x.min(), ", max pixel value:", x.max())

min pixel value: 0.0 , max pixel value: 1.0


### 1.3. Splitting into train, validtaion and test sets

In [5]:
x_train, x_rest, y_train, y_rest = train_test_split(x, y, train_size= 0.6, random_state=42, stratify=y) # 60% train, 40% to split again into val and test
x_val, x_test, y_val, y_test = train_test_split(x_rest, y_rest, train_size=0.5, random_state=42, stratify=y_rest) # 40% * 50% = 20% val, 20% test

print("Training set = ", x_train.shape[0])
print("Validation set = ", x_val.shape[0])
print("Test set = ", x_test.shape[0])

Training set =  42000
Validation set =  14000
Test set =  14000


In [13]:
x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_val_flat = x_val.reshape(x_val.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)

# Verify the new shape
print(f"Original shape: {x_train.shape}")
print(f"Flattened shape: {x_train_flat.shape}")

Original shape: (42000, 28, 28)
Flattened shape: (42000, 784)


### 1.4. Pytorch DataLoaders

In [None]:
# Change from numpy arrays to tensors
x_train_tensor = torch.from_numpy(x_train_flat).unsqueeze(1).float()
y_train_tensor = torch.from_numpy(y_train).long()

x_val_tensor = torch.from_numpy(x_val_flat).unsqueeze(1).float()
y_val_tensor = torch.from_numpy(y_val).long()

x_test_tensor = torch.from_numpy(x_test_flat).unsqueeze(1).float()
y_test_tensor = torch.from_numpy(y_test).long()

train_dataset = torch.utils.data.TensorDataset(x_train_tensor, y_train_tensor)
val_dataset = torch.utils.data.TensorDataset(x_val_tensor, y_val_tensor)
test_dataset = torch.utils.data.TensorDataset(x_test_tensor, y_test_tensor)

# Create DataLoaders with batch size = 64 to optimize training
train_NN_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_NN_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_NN_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 2. Linear Classification Models

## 2.1. Logistic Regression

For logistic regression, it's a binary classifier, so we need to use two digits only as classes.

### 2.1.1. Data preparation For Binary Classification

Boolean masking is used to filter the dataset for two specific digits.

In [None]:
def filter_digits(x, y, digit1, digit2):
    filter_mask = (y == digit1) | (y == digit2)

    x_filtered = x[filter_mask]
    y_filtered = y[filter_mask]

    y_filtered = np.where(y_filtered == digit1, 0, 1) # Map digit1 to 0 and digit2 to 1
    return x_filtered, y_filtered

Filter the data for only 2 digits.

In [None]:
DIGIT_A = 0
DIGIT_B = 1

x_binary_train_flat, y_binary_train = filter_digits(x_train_flat, y_train, DIGIT_A, DIGIT_B)
x_binary_val_flat, y_binary_val = filter_digits(x_val_flat, y_val, DIGIT_A, DIGIT_B)
x_binary_test_flat, y_binary_test = filter_digits(x_test_flat, y_test, DIGIT_A, DIGIT_B)

In [21]:
print(x_binary_train_flat.shape, y_binary_train.shape)
print(x_binary_val_flat.shape, y_binary_val.shape)
print(x_binary_test_flat.shape, y_binary_test.shape)

(8478, 784) (8478,)
(2826, 784) (2826,)
(2827, 784) (2827,)


In [None]:
# Convert to float tensors
# Convert labels to columns vector
x_train_binary_tensor = torch.from_numpy(x_binary_train_flat).float()
y_train_binary_tensor = torch.from_numpy(y_binary_train).float().view(-1, 1)

x_val_binary_tensor = torch.from_numpy(x_binary_val_flat).float()
y_val_binary_tensor = torch.from_numpy(y_binary_val).float().view(-1, 1)

x_test_binary_tensor = torch.from_numpy(x_binary_test_flat).float()
y_test_binary_tensor = torch.from_numpy(y_binary_test).float().view(-1, 1)

train_binary_dataset = torch.utils.data.TensorDataset(x_train_binary_tensor, y_train_binary_tensor)
val_binary_dataset = torch.utils.data.TensorDataset(x_val_binary_tensor, y_val_binary_tensor)
test_binary_dataset = torch.utils.data.TensorDataset(x_test_binary_tensor, y_test_binary_tensor)

train_lr_loader = DataLoader(train_binary_dataset, batch_size=64, shuffle=True)
val_lr_loader = DataLoader(val_binary_dataset, batch_size=64, shuffle=False)
test_lr_loader = DataLoader(test_binary_dataset, batch_size=64, shuffle=False)

### 2.1.2. Defining The Model

In [None]:
INPUT_FEATURES = 784 # Number of pixels/inputs (28x28)
OUTPUT_FEATURES = 1  # The output is either 0 or 1

# Create the weights tensor of random numbers with size of (784, 1)
weights = torch.randn(INPUT_FEATURES, OUTPUT_FEATURES, dtype=torch.float)

# Create the bias tensor of zeros with size of 1 element
bias = torch.zeros(OUTPUT_FEATURES, dtype=torch.float)

# Enable gradient tracking for weights and bias
weights.requires_grad = True
bias.requires_grad = True

Define Sigmoid activation function and the cross entropy loss function.

In [25]:
def sigmoid(z):
    return 1.0 / (1.0 + torch.exp(-z))

# y_true are the labels (0 or 1) and y_pred_logits are the model outputs (logits)
def binary_cross_entropy_loss(y_true, y_pred_logits):

    p = sigmoid(y_pred_logits)
    
    # Prevent log(0) by adding a small value (epsilon)
    epsilon = 1e-7

    loss_per_item = - ( y_true * torch.log(p + epsilon) + (1.0 - y_true) * torch.log(1.0 - p + epsilon) )

    # Return average
    return torch.mean(loss_per_item)

### 2.1.3 Training The Model

Add more sections before this and change numbering if needed