In [1]:
import classifierAgents
from sklearn.model_selection import train_test_split
import numpy as np

file = '/Users/youssefawad/Documents/Kings/term_2/6CCS3ML1_Machine_Learning/coursework/cw1_pacman/good-moves.txt'

data, target = classifierAgents.loadData(file)

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

In [2]:
# https://docs.pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html
# https://docs.pytorch.org/tutorials/beginner/basics/optimization_tutorial.html
import os
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from torchvision import datasets, transforms

  Referenced from: <367D4265-B20F-34BD-94EB-4F3EE47C385B> /opt/anaconda3/envs/pacman/lib/python3.12/site-packages/torchvision/image.so
  warn(


In [3]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

In [5]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            # Small NN model. Any bigger was massively overfitting
            # Will add dropout if still overfitting
            nn.Linear(25, 8),
            nn.ReLU(),
            nn.Linear(8, 4)
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [6]:
model = NeuralNetwork()
print(model)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=25, out_features=8, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8, out_features=4, bias=True)
  )
)


In [7]:
logits = model(X_train)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([1, 3, 1, 2, 1, 2, 2, 1, 2, 3, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1,
        3, 2, 2, 1, 3, 1, 2, 3, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1, 3, 1, 1, 1, 1, 1,
        1, 2, 2, 1, 2, 1, 1, 1, 2, 3, 2, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1,
        2, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1,
        2, 1, 1, 2])


In [8]:
accuracy = (y_pred.numpy() == y_train).sum() / len(y_train)
print(f"Training accuracy: {accuracy * 100:.2f}%")

Training accuracy: 33.00%


In [9]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=25, out_features=8, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8, out_features=4, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([8, 25]) | Values : tensor([[-0.0084, -0.1133, -0.1421,  0.1943,  0.0063,  0.1874,  0.1412,  0.0718,
         -0.1438, -0.0659,  0.1427, -0.1291,  0.0885,  0.0075, -0.1912, -0.1724,
          0.1471,  0.1784, -0.1605,  0.1435, -0.1647, -0.0308,  0.0628,  0.1191,
          0.0714],
        [-0.0846, -0.0820,  0.1497, -0.1685, -0.1309, -0.0285, -0.0940,  0.0250,
          0.1214, -0.0235,  0.0738,  0.0877,  0.0963, -0.1175,  0.1507,  0.1245,
         -0.0636,  0.0927, -0.1076,  0.1600, -0.1993,  0.0529,  0.1127, -0.0464,
         -0.1606]], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([8]) | Values : tensor([-0.1300, -0.1961], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: to

In [10]:
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

In [11]:
train_dataloader = DataLoader(train_dataset, batch_size=64)
test_dataloader = DataLoader(test_dataset, batch_size=64)

In [12]:
# https://docs.pytorch.org/tutorials/beginner/basics/optimization_tutorial.html#full-implementation
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    # Added train eval to monitor if train acc much higher than test acc (overfitting)
    model.eval()
    correct = 0
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    correct /= size
    print(f"Train Accuracy: {(100*correct):>0.1f}")


def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [15]:
learning_rate = 0.001
batch_size = 10
epochs = 100
best_loss = float("inf")
patience = 10
counter = 0

# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.01)
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.01)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
loss_fn = nn.CrossEntropyLoss()

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loss = test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 1.413993  [   64/  100]
Train Accuracy: 33.0
Test Error: 
 Accuracy: 26.9%, Avg loss: 1.403073 

Epoch 2
-------------------------------
loss: 1.410763  [   64/  100]
Train Accuracy: 34.0
Test Error: 
 Accuracy: 26.9%, Avg loss: 1.399617 

Epoch 3
-------------------------------
loss: 1.407810  [   64/  100]
Train Accuracy: 34.0
Test Error: 
 Accuracy: 26.9%, Avg loss: 1.396180 

Epoch 4
-------------------------------
loss: 1.404924  [   64/  100]
Train Accuracy: 34.0
Test Error: 
 Accuracy: 26.9%, Avg loss: 1.392754 

Epoch 5
-------------------------------
loss: 1.401987  [   64/  100]
Train Accuracy: 34.0
Test Error: 
 Accuracy: 26.9%, Avg loss: 1.389331 

Epoch 6
-------------------------------
loss: 1.399046  [   64/  100]
Train Accuracy: 34.0
Test Error: 
 Accuracy: 26.9%, Avg loss: 1.385906 

Epoch 7
-------------------------------
loss: 1.396104  [   64/  100]
Train Accuracy: 34.0
Test Error: 
 Accuracy: 26.9%, Avg loss: 1.382430 


In [17]:
learning_rate = 0.01
batch_size = 10
epochs = 100
best_loss = float("inf")
patience = 10
counter = 0

# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.01)
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.01)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
loss_fn = nn.CrossEntropyLoss()

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loss = test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 1.046135  [   64/  100]
Train Accuracy: 82.0
Test Error: 
 Accuracy: 61.5%, Avg loss: 1.037200 

Epoch 2
-------------------------------
loss: 1.005297  [   64/  100]
Train Accuracy: 83.0
Test Error: 
 Accuracy: 61.5%, Avg loss: 1.009476 

Epoch 3
-------------------------------
loss: 0.966484  [   64/  100]
Train Accuracy: 85.0
Test Error: 
 Accuracy: 61.5%, Avg loss: 0.982674 

Epoch 4
-------------------------------
loss: 0.928512  [   64/  100]
Train Accuracy: 86.0
Test Error: 
 Accuracy: 61.5%, Avg loss: 0.957343 

Epoch 5
-------------------------------
loss: 0.891526  [   64/  100]
Train Accuracy: 86.0
Test Error: 
 Accuracy: 61.5%, Avg loss: 0.934189 

Epoch 6
-------------------------------
loss: 0.855512  [   64/  100]
Train Accuracy: 86.0
Test Error: 
 Accuracy: 61.5%, Avg loss: 0.912728 

Epoch 7
-------------------------------
loss: 0.820589  [   64/  100]
Train Accuracy: 85.0
Test Error: 
 Accuracy: 69.2%, Avg loss: 0.892341 


In [18]:
learning_rate = 0.1
batch_size = 10
epochs = 100
best_loss = float("inf")
patience = 10
counter = 0

# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.01)
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.01)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
loss_fn = nn.CrossEntropyLoss()

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loss = test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.242553  [   64/  100]
Train Accuracy: 89.0
Test Error: 
 Accuracy: 57.7%, Avg loss: 0.763340 

Epoch 2
-------------------------------
loss: 0.257882  [   64/  100]
Train Accuracy: 89.0
Test Error: 
 Accuracy: 61.5%, Avg loss: 1.074366 

Epoch 3
-------------------------------
loss: 0.385321  [   64/  100]
Train Accuracy: 88.0
Test Error: 
 Accuracy: 73.1%, Avg loss: 0.928158 

Epoch 4
-------------------------------
loss: 0.355700  [   64/  100]
Train Accuracy: 89.0
Test Error: 
 Accuracy: 76.9%, Avg loss: 0.753626 

Epoch 5
-------------------------------
loss: 0.285632  [   64/  100]
Train Accuracy: 89.0
Test Error: 
 Accuracy: 65.4%, Avg loss: 0.763258 

Epoch 6
-------------------------------
loss: 0.251749  [   64/  100]
Train Accuracy: 89.0
Test Error: 
 Accuracy: 57.7%, Avg loss: 0.846703 

Epoch 7
-------------------------------
loss: 0.244999  [   64/  100]
Train Accuracy: 90.0
Test Error: 
 Accuracy: 53.8%, Avg loss: 0.904083 
