# Imports

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F


# Part A

- Normalization of data

In [None]:
data=pd.read_csv("dataset/mnist_test.csv")
data2=pd.read_csv("dataset/mnist_train.csv")
data2=data2.drop(data2.index[0])
data=data.drop(data.index[0])
data=pd.concat([data,data2])

data_np = data.to_numpy()

# Separate labels (first column) and features (remaining columns)
y = data_np[:, 0].astype(int)
X = data_np[:, 1:].astype(float)

# Normalize pixel values to [0, 1]
X = X / 255.0

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.4, stratify=y, random_state=42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42
)

# Reshape for neural networks
X_train_nn = X_train.reshape(-1, 1, 28, 28)
X_val_nn = X_val.reshape(-1, 1, 28, 28)
X_test_nn = X_test.reshape(-1, 1, 28, 28)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# For neural networks (keep image shape)
X_train_nn_tensor = torch.tensor(X_train_nn, dtype=torch.float32)
X_val_nn_tensor = torch.tensor(X_val_nn, dtype=torch.float32)
X_test_nn_tensor = torch.tensor(X_test_nn, dtype=torch.float32)

# Create TensorDatasets
train_dataset_flat = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset_flat = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset_flat = TensorDataset(X_test_tensor, y_test_tensor)

train_dataset_nn = TensorDataset(X_train_nn_tensor, y_train_tensor)
val_dataset_nn = TensorDataset(X_val_nn_tensor, y_val_tensor)
test_dataset_nn = TensorDataset(X_test_nn_tensor, y_test_tensor)

# Create DataLoaders
batch_size = 64

train_loader_flat = DataLoader(train_dataset_flat, batch_size=batch_size, shuffle=True)
val_loader_flat = DataLoader(val_dataset_flat, batch_size=batch_size, shuffle=False)
test_loader_flat = DataLoader(test_dataset_flat, batch_size=batch_size, shuffle=False)

train_loader_nn = DataLoader(train_dataset_nn, batch_size=batch_size, shuffle=True)
val_loader_nn = DataLoader(val_dataset_nn, batch_size=batch_size, shuffle=False)
test_loader_nn = DataLoader(test_dataset_nn, batch_size=batch_size, shuffle=False)

- Binary logistic regression 

- Softmax Regression Implementation

# Part B

- Custom Neural Network Architecture

In [None]:
class FullyConnectedNN(nn.Module):
    def __init__(self, input_size=784, hidden_sizes=[128, 64], num_classes=10):
        super(FullyConnectedNN, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.fc3 = nn.Linear(hidden_sizes[1], num_classes)

        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)
        nn.init.xavier_normal_(self.fc3.weight)

    def forward(self, x):
        if x.ndim == 4:
            x = x.view(x.size(0), -1)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
        

- Training Infrastructure

# Part C

- Performance Visualization

- Hyperparameter Analysis

- Model Comparison