- Your architecture should consist of a single hidden layer with up to k nodes.
- You can use any activation function (e.g., sigmoid, tanh, etc.) in the hidden nodes.
- Your model must use a bias term at the input and hidden layers. It can be a standalone term or
be incorporated in the weight matrices.
- You should use gradient descent to train your FFNN.
- You may find it helpful to use random number seeds for reproducibility when debugging.
- You do not need to use a GPU for this assignment, and your models should train in less than one
minute each.
- You are responsible for selecting hyperparameters (e.g., number of hidden nodes, learning rate,
training epochs, batch sizes, early stopping criteria, lambda, etc.). The goal is to get “good”
performance from your model, but an exhaustive hyper-parameter search is unnecessary.
- All code, exhibits and answers to free-response questions must be in a single Jupyter notebook.
- Your code should use parameters to control all functionality needed to complete specific tasks
(see below).

In [53]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SingleLayerFFNN(nn.Module):
    def __init__(self, hidden_size, activation_fn="sigmoid"):
        super(SingleLayerFFNN, self).__init__()

        self.input_layer = nn.Linear(2, hidden_size, bias=True)
        self.output_layer = nn.Linear(hidden_size, 2, bias=True)

        if activation_fn == "sigmoid":
            self.activation = torch.sigmoid
        elif activation_fn == "tanh":
            self.activation = torch.tanh
        elif activation_fn == "relu":
            self.activation = torch.relu

    def forward(self, x):
        # One hidden layer
        hidden = self.activation(self.input_layer(x))
        output = self.output_layer(hidden)

        return output
    
        # probabilities = F.softmax(output, dim=1)

        # return probabilities

In [54]:
import torch.optim as optim

def train_model(model, train_loader, num_epochs, learning_rate):
    # nn.CrossEntropyLoss for multiclass cross entropy
    # nn.MSELoss for mean squared error
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        # loss for this epoch
        epoch_loss = 0
        for data, target in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)

            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_loader):.4f}")

In [55]:
import csv

import torch
from torch.utils.data import DataLoader, TensorDataset

def load_data(file_path, batch_size):
    inputs = []
    labels = []

    with open(file_path, "r") as csvfile:
        reader = csv.reader(csvfile)
        next(reader) # skip header row
        for row in reader:
            labels.append(float(row[0]))
            inputs.append([float(row[1]), float(row[2])])

    inputs_tensor = torch.tensor(inputs, dtype=torch.float32)
    labels_tensor = torch.tensor(labels, dtype=torch.long)

    dataset = TensorDataset(inputs_tensor, labels_tensor)
    loader = DataLoader(dataset, batch_size=batch_size)

    return loader


In [56]:
'''
Train model with train set. 
Hyperparameters: hidden_size, batch_size, num_epochs, learning_rate
'''

file_path = "xor_train.csv"

# hidden_size from [2, 3, 5, 7, 9] 
hidden_size = 9
batch_size = 2
# activation from "sigmoid", "relu", "tanh"
activation_fn = "sigmoid"
num_epochs = 20
learning_rate = 0.1

train_loader = load_data(file_path, batch_size)
model = SingleLayerFFNN(hidden_size, activation_fn)

train_model(model, train_loader, num_epochs, learning_rate)

Epoch 1/20, Loss: 0.7024
Epoch 2/20, Loss: 0.6872
Epoch 3/20, Loss: 0.6868
Epoch 4/20, Loss: 0.6862
Epoch 5/20, Loss: 0.6857
Epoch 6/20, Loss: 0.6853
Epoch 7/20, Loss: 0.6849
Epoch 8/20, Loss: 0.6845
Epoch 9/20, Loss: 0.6842
Epoch 10/20, Loss: 0.6839
Epoch 11/20, Loss: 0.6836
Epoch 12/20, Loss: 0.6833
Epoch 13/20, Loss: 0.6831
Epoch 14/20, Loss: 0.6829
Epoch 15/20, Loss: 0.6827
Epoch 16/20, Loss: 0.6825
Epoch 17/20, Loss: 0.6823
Epoch 18/20, Loss: 0.6821
Epoch 19/20, Loss: 0.6820
Epoch 20/20, Loss: 0.6818


In [57]:
def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            preds = torch.argmax(output, dim=1)
            all_preds.extend(preds.numpy())
            all_targets.extend(target.numpy())

    all_preds = torch.tensor(all_preds)
    all_targets = torch.tensor(all_targets)
    
    correct = (all_preds == all_targets).sum().item()
    total = len(all_targets)
    accuracy = correct / total
    
    print(f"Accuracy: {accuracy:.4f}")
    return accuracy

In [58]:
eval_file_path = "xor_valid.csv"
eval_batch_size = 4

eval_loader = load_data(eval_file_path, eval_batch_size)
evaluate_model(model, eval_loader)

Accuracy: 0.4550


0.455

In [None]:
'''
Grid Search
'''

learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [16, 32]
num_epochs_list = [10, 20]

