In [8]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import pandas as pd
from torch.utils.data import Dataset
from skimage import io
import matplotlib.pyplot as plt
import torch.nn.functional as F
import optuna
import pytorch_lightning as pl
import torch 
import torch.nn as nn 

from torchmetrics import __version__ as torchmetrics_version
from pkg_resources import parse_version

from torchmetrics import Accuracy

In [9]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


# Lab for Week 5: Fitting a Feed Forward Neural Network

In Milestone 1 you chose a project topic you are interested in and found some datasets you could use. You also already built a [data loader](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html).

Today, we will fit a baseline feed forward neural network to one of the datasets you chose by following the pytorch tutorials.

## Step 1: Define a Data Loader
You are encouraged to use one of the data loaders you built for milestone 1. Otherwise use the one described [here](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html).

Tip: If your targets are labels, you might want to use the lambda transformation described [here](https://pytorch.org/tutorials/beginner/basics/transforms_tutorial.html) to turn your target into one-hot vectors.

In [10]:
import torch
from torchvision import transforms
from PIL import Image
import os


transform = transforms.Compose([
    transforms.Resize((299, 299)),  # Resize to 299x299, this is a better size for the type of images I'm using
    transforms.ToTensor(),          # Converting images to tensor
])


from sklearn.preprocessing import LabelEncoder
# Loding the dataset
#fitzpatrick = 'https://raw.githubusercontent.com/mattgroh/fitzpatrick17k/refs/heads/main/fitzpatrick17k.csv'
#df = pd.read_csv(fitzpatrick)
# To this:
fitzpatrick_url = 'https://raw.githubusercontent.com/mattgroh/fitzpatrick17k/refs/heads/main/fitzpatrick17k.csv'
df = pd.read_csv(fitzpatrick_url)  # Correctly load the dataset from the URL

image_folder = '/Users/laureladams/Documents/School/spring2025/introtoADL/introToAppliedDeepLearning/processed_images'
#Definnig the class
class MultiLayerPerceptron(pl.LightningModule):
    def __init__(self, image_shape=(3, 299, 299), hidden_units=(32, 16)):
        super().__init__()
        # new PL attributes:
        if parse_version(torchmetrics_version) > parse_version("0.8"):
            self.train_acc = Accuracy(task="multiclass", num_classes=10)
            self.valid_acc = Accuracy(task="multiclass", num_classes=10)
            self.test_acc = Accuracy(task="multiclass", num_classes=10)
        else:
            self.train_acc = Accuracy()
            self.valid_acc = Accuracy()
            self.test_acc = Accuracy()
        
        # Model similar to previous section:
        input_size = image_shape[0] * image_shape[1] * image_shape[2] 
        all_layers = [nn.Flatten()]
        for hidden_unit in hidden_units: 
            layer = nn.Linear(input_size, hidden_unit) 
            all_layers.append(layer) 
            all_layers.append(nn.ReLU()) 
            input_size = hidden_unit 
 
        all_layers.append(nn.Linear(hidden_units[-1], 10)) 
        self.model = nn.Sequential(*all_layers)
        self.image_folder = image_folder
        # Filter out rows with missing images
        valid_rows = []
        for idx, row in df.iterrows():
            image_path = os.path.join(image_folder, f"{row['md5hash']}.jpg")
            if os.path.exists(image_path):
                valid_rows.append(idx)
            else:
                print(f"Warning: Image not found: {image_path}")
        self.data = df.loc[valid_rows].reset_index(drop=True)

        self.features = self.data[['md5hash']].values
        self.labels = self.data['label'].values

        # Label encoding that will convert fitzpatrick string labels (skin tone classification) into integers
        self.label_encoder = LabelEncoder()
        self.labels = self.label_encoder.fit_transform(self.labels)
    def forward(self, x):
        x = self.model(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = nn.functional.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.train_acc.update(preds, y)
        self.log("train_loss", loss, prog_bar=True)
        return loss
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_folder, f"{self.features[idx][0]}.jpg")
    
        if os.path.exists(image_path):
            img = Image.open(image_path).convert("RGB")  
            img = transform(img)  # Apply transformations to the images (resize + ToTensor)
        else:
            raise FileNotFoundError(f"Image not found: {image_path}") 
            #to check if the images are there or missing, because I was having issues with this originally
    
        label = torch.tensor(self.labels[idx], dtype=torch.long)  # To convert label to tensor
    
        return img, label  # To make sure the image is a tensor before returning, was getting errors from this 
# Conditionally define epoch end methods based on PyTorch Lightning version
    if parse_version(pl.__version__) >= parse_version("2.0"):
        # For PyTorch Lightning 2.0 and above
        def on_training_epoch_end(self):
            self.log("train_acc", self.train_acc.compute())
            self.train_acc.reset()

        def on_validation_epoch_end(self):
            self.log("valid_acc", self.valid_acc.compute())
            self.valid_acc.reset()

        def on_test_epoch_end(self):
            self.log("test_acc", self.test_acc.compute())
            self.test_acc.reset()

    else:
        # For PyTorch Lightning < 2.0
        def training_epoch_end(self, outs):
            self.log("train_acc", self.train_acc.compute())
            self.train_acc.reset()

        def validation_epoch_end(self, outs):
            self.log("valid_acc", self.valid_acc.compute())
            self.valid_acc.reset()

        def test_epoch_end(self, outs):
            self.log("test_acc", self.test_acc.compute())
            self.test_acc.reset()
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = nn.functional.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_acc.update(preds, y)
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_acc.compute(), prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        return optimizer


# from sklearn.model_selection import train_test_split
# # Making training and testing dataset!
# df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
# train_dataset = CustomDataset(df_train, image_folder)
# test_dataset = CustomDataset(df_test, image_folder)

# print(len(train_dataset)) # just to make sure this worked
# print(len(test_dataset))



In [11]:
# Used this cell to process the images, resize, and put them in a new folder

# # Define paths
# input_folder = "/Users/laureladams/Documents/School/spring2025/introtoADL/introToAppliedDeepLearning/images"
# output_folder = "/Users/laureladams/Documents/School/spring2025/introtoADL/introToAppliedDeepLearning/processed_images"

# # Ensure output folder exists
# os.makedirs(output_folder, exist_ok=True)

# # Define transformation
# transform = transforms.Compose([
#     transforms.Resize((299, 299))  # Resize to 299x299
# ])

# # Process images
# for filename in os.listdir(input_folder):
#     if filename.endswith(".jpg") or filename.endswith(".png"):  # Add other formats if needed
#         img_path = os.path.join(input_folder, filename)
#         img = Image.open(img_path)
#         img = transform(img)  # Apply resize transform
#         img.save(os.path.join(output_folder, filename))  # Save resized image

# print("Processing complete! All images are resized and saved in 'processed_images'.")



In [12]:
#  DataLoader that I will use for training and testing
# train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# # Example: Print the first batch
# for batch_X, batch_y in train_loader:
#     print(batch_X.shape, batch_y.shape)  
#     break
fitzpatrick = 'https://raw.githubusercontent.com/mattgroh/fitzpatrick17k/refs/heads/main/fitzpatrick17k.csv'
df = pd.read_csv(fitzpatrick)
image_folder = '/Users/laureladams/Documents/School/spring2025/introtoADL/introToAppliedDeepLearning/processed_images'
#Definnig the class
class CustomDataset(Dataset):
    def __init__(self, data_path='./'):
        super().__init__()
        self.data_path = data_path
        self.transform = transforms.Compose([transforms.ToTensor()])
        
    def prepare_data(self):
        fitzpatrick_df = pd.read_csv(self.data_path + "/fitzpatrick.csv")

    def setup(self, stage=None):
        # stage is either 'fit', 'validate', 'test', or 'predict'
        # here note relevant
        Fitzpatrick_all = Fitzpatrick( 
            root=self.data_path,
            train=True,
            transform=self.transform,  
            download=False
        ) 

        self.train, self.val = random_split(
            fitzpatrick_all, [55000, 5000], generator=torch.Generator().manual_seed(1)
        )

        self.test = Fitzpatrick( 
            root=self.data_path,
            train=False,
            transform=self.transform,  
            download=False
        ) 

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=64, num_workers=4)

    def val_dataloader(self):
        return DataLoader(self.val, batch_size=64, num_workers=4)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=64, num_workers=4)
    
    
torch.manual_seed(1) 


<torch._C.Generator at 0x106f00bb0>

In [13]:
pip install torch torchvision torchaudio


Note: you may need to restart the kernel to use updated packages.


In [26]:
# test

import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split

class FitzpatrickDataModule(pl.LightningDataModule):
    def __init__(self, df, image_folder, batch_size=64):
        super().__init__()
        self.df = df
        self.image_folder = image_folder
        self.batch_size = batch_size
        self.transform = transforms.Compose([
            transforms.Resize((299, 299)),
            transforms.ToTensor()
        ])

    def setup(self, stage=None):
        # dataset = CustomDataset(self.df, self.image_folder, self.transform)
        dataset = CustomDataset(self.df, self.image_folder)
        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        self.train, self.val = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(1))
        self.test = CustomDataset(self.df, self.image_folder, self.transform)  # Could split differently

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, shuffle=True, num_workers=4)

    def val_dataloader(self):
        return DataLoader(self.val, batch_size=self.batch_size, num_workers=4)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, num_workers=4)

# Usage
data_module = FitzpatrickDataModule(df, image_folder, batch_size=8)

In [27]:
#  DataLoader that I will use for training and testing
# train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# # Example: Print the first batch
# for batch_X, batch_y in train_loader:
#     print(batch_X.shape, batch_y.shape)
#     break
fitzpatrick = 'https://raw.githubusercontent.com/mattgroh/fitzpatrick17k/refs/heads/main/fitzpatrick17k.csv'
df = pd.read_csv(fitzpatrick)
image_folder = '/Users/laureladams/Documents/School/spring2025/introtoADL/introToAppliedDeepLearning/processed_images'
#Definnig the class
class CustomDataset(Dataset):
    def __init__(self, data_path='./'):
        super().__init__()
        self.data_path = data_path
        self.transform = transforms.Compose([transforms.ToTensor()])

    def prepare_data(self):
        fitzpatrick_df = pd.read_csv(self.data_path + "/fitzpatrick.csv")

    def setup(self, stage=None):
        # stage is either 'fit', 'validate', 'test', or 'predict'
        # here note relevant
        Fitzpatrick_all = Fitzpatrick(
            root=self.data_path,
            train=True,
            transform=self.transform,
            download=False
        )

        self.train, self.val = random_split(
            fitzpatrick_all, [55000, 5000], generator=torch.Generator().manual_seed(1)
        )

        self.test = Fitzpatrick(
            root=self.data_path,
            train=False,
            transform=self.transform,
            download=False
        )

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=64, num_workers=4)

    def val_dataloader(self):
        return DataLoader(self.val, batch_size=64, num_workers=4)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=64, num_workers=4)


torch.manual_seed(1)


<torch._C.Generator at 0x106f00bb0>

In [None]:
print(Users/)

In [28]:
from pytorch_lightning.callbacks import ModelCheckpoint
import pytorch_lightning as pl
import torch

# Initialize model
fitzpatrickclassifier = MultiLayerPerceptron()

# Define callbacks
callbacks = [ModelCheckpoint(save_top_k=1, mode='max', monitor="valid_acc")]  # save top 1 model

# Initialize DataModule (using the image folder path defined earlier)
fitzpatrick_data = FitzpatrickDataModule(df=df, image_folder='/Users/laureladams/Documents/School/spring2025/introtoADL/introToAppliedDeepLearning/processed_images')

# Initialize Trainer
if torch.cuda.is_available():  # if you have GPUs
    trainer = pl.Trainer(max_epochs=10, callbacks=callbacks, gpus=1)
else:
    trainer = pl.Trainer(max_epochs=10, callbacks=callbacks)

# Fit the model using the DataModule
trainer.fit(model=fitzpatrickclassifier, datamodule=fitzpatrick_data)




GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs




/Users/laureladams/anaconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/configuration_validator.py:68: You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.


TypeError: CustomDataset.__init__() takes from 1 to 2 positional arguments but 3 were given

In [None]:
# Debugging: check the type of fitzpatrick
print(type(fitzpatrick))  # Check if it's a LightningDataModule or a string

# Ensure that fitzpatrick is a LightningDataModule before passing it
if not isinstance(fitzpatrick, pl.LightningDataModule):
    raise TypeError("Expected a LightningDataModule, but got a string or other type. Check your data loading.")

# Now you can safely call trainer.fit
trainer.fit(model=fitzpatrickclassifier, datamodule=fitzpatrick)


In [None]:
# Ensure that fitzpatrick is a LightningDataModule before passing it
if isinstance(fitzpatrick, str):
    raise TypeError("Expected a LightningDataModule, but got a string. Check your data loading.")

trainer.fit(model=fitzpatrickclassifier, datamodule=fitzpatrick)


In [None]:
/Users/laureladams/Documents/School/spring2025/introtoADL/introToAppliedDeepLearning/images/0a0e21f413499ad85018f7fa0df3efe2.jpg

In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from keras.models import Model
from keras.preprocessing import image
from scipy.spatial import distance_matrix
from keras.layers import Dense, GlobalAveragePooling2D
from keras.applications.inception_v3 import InceptionV3

In [None]:
import os

# Original images folder
working_path = '/Users/laureladams/Documents/School/spring2025/introtoADL/introToAppliedDeepLearning/images'

# New folder for processed images
save_path = '/Users/laureladams/Documents/School/spring2025/introtoADL/introToAppliedDeepLearning/processed_images'

# Create the new folder if it doesn't already exist
os.makedirs(save_path, exist_ok=True)

# Verify the folder was created
print(f"Processed images will be saved to: {save_path}")


In [None]:
print('length of training dataset:')
print(len(train_dataset))
print('length of test dataset:')
print(len(test_dataset))

In [None]:
train_dataloader = DataLoader(train_dataset)
test_dataloader = DataLoader(test_dataset)

In [None]:
for x, y in train_dataloader:
    print(x)
    break

## Step 2: Define a Model
Follow [this tutorial](https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html) to define a model. Make sure that the input dimension fits the dimensionality of your data, and the output dimension fits the dimensionality of your targets.

In [None]:
# class NeuralNetwork(nn.Module):
#     def __init__(self):
#         super(NeuralNetwork, self).__init__()
        
#         # Define the fully connected layers
#         self.fc1 = nn.Linear(299 * 299 * 3, 512)  # Updated the input size to match the flattened image size
#         self.fc2 = nn.Linear(512, 128)
#         self.fc3 = nn.Linear(128, 114)  # There are 114 output classes (skin types/diseases represented)

#     def forward(self, x):
       
#         x = torch.flatten(x, 1)  # Flatten the image

        
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = self.fc3(x)  # Output layer (no activation function, logits)
        
#         return x

class MultiLayerPerceptron(pl.LightningModule):
    def __init__(self, image_shape=(1, 28, 28), hidden_units=(32, 16)):
        super().__init__()
        
        # new PL attributes:
        if parse_version(torchmetrics_version) > parse_version("0.8"):
            self.train_acc = Accuracy(task="multiclass", num_classes=10)
            self.valid_acc = Accuracy(task="multiclass", num_classes=10)
            self.test_acc = Accuracy(task="multiclass", num_classes=10)
        else:
            self.train_acc = Accuracy()
            self.valid_acc = Accuracy()
            self.test_acc = Accuracy()
        
        # Model similar to previous section:
        input_size = image_shape[0] * image_shape[1] * image_shape[2] 
        all_layers = [nn.Flatten()]
        for hidden_unit in hidden_units: 
            layer = nn.Linear(input_size, hidden_unit) 
            all_layers.append(layer) 
            all_layers.append(nn.ReLU()) 
            input_size = hidden_unit 
 
        all_layers.append(nn.Linear(hidden_units[-1], 10)) 
        self.model = nn.Sequential(*all_layers)

    def forward(self, x):
        x = self.model(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = nn.functional.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.train_acc.update(preds, y)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    # Conditionally define epoch end methods based on PyTorch Lightning version
    if parse_version(pl.__version__) >= parse_version("2.0"):
        # For PyTorch Lightning 2.0 and above
        def on_training_epoch_end(self):
            self.log("train_acc", self.train_acc.compute())
            self.train_acc.reset()

        def on_validation_epoch_end(self):
            self.log("valid_acc", self.valid_acc.compute())
            self.valid_acc.reset()

        def on_test_epoch_end(self):
            self.log("test_acc", self.test_acc.compute())
            self.test_acc.reset()

    else:
        # For PyTorch Lightning < 2.0
        def training_epoch_end(self, outs):
            self.log("train_acc", self.train_acc.compute())
            self.train_acc.reset()

        def validation_epoch_end(self, outs):
            self.log("valid_acc", self.valid_acc.compute())
            self.valid_acc.reset()

        def test_epoch_end(self, outs):
            self.log("test_acc", self.test_acc.compute())
            self.test_acc.reset()
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = nn.functional.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_acc.update(preds, y)
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_acc.compute(), prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        return optimizer


In [None]:
# model = NeuralNetwork().to(device)
# print(model)

## Step 3: Train Model

Follow the tutorial [here](https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html) to train your model.

In [None]:
# Defining hyperparameters for optimization
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    
    
    """
    Runs one full training epoch on the given model using the provided dataloader.

    Parameters:
        dataloader (torch.utils.data.DataLoader): DataLoader providing batches of training data (inputs and labels).
        model (torch.nn.Module): The PyTorch model to be trained.
        loss_fn (function): The loss function used to compute the error between predictions and true labels.
        optimizer (torch.optim.Optimizer): The optimizer used to update the model parameters.

    Behavior:
        - Sets the model to training mode (important for layers like dropout and batch normalization).
        - Iterates over the data batches:
            - Computes the model's predictions.
            - Calculates the loss.
            - Performs backpropagation and updates model weights.
        - Prints progress every 100 batches, showing the current loss and the number of samples processed.

    Returns:
        None
    """
    
#    TODO: Implement this function

def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print("Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    """
    Evaluates the model's performance on a test dataset.

    Parameters:
        dataloader (torch.utils.data.DataLoader): DataLoader providing batches of test data (inputs and labels).
        model (torch.nn.Module): The PyTorch model to be evaluated.
        loss_fn (function): The loss function used to compute the error between predictions and true labels.

    Behavior:
        - Sets the model to evaluation mode (which disables behaviors like dropout).
        - Iterates over the test data without computing gradients (using torch.no_grad() for efficiency).
        - Accumulates the total loss and counts the number of correct predictions.
        - Computes the average loss and overall accuracy.
        - Prints the test accuracy and average loss.

    Returns:
        None
    """ 
    # TODO: Implement this function
    


In [None]:
# Train the model for 3 different hyper parameter settings (e.g. different learning rates, different loss functions that make sense for your data, etc.)

# loss_fn = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# epochs = 10
# for t in range(epochs):
#     print(f"Epoch {t+1}\n-------------------------------")
#     train_loop(train_dataloader, model, loss_fn, optimizer)
#     test_loop(test_dataloader, model, loss_fn)
# print("Done!")

# Assuming `train_loop` and `test_loop` are defined to train and evaluate your model

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 5  # reduced to 5 because 10 was taking forever

val_losses = []

for epoch in range(epochs):
    train_loop(train_loader, model, loss_fn, optimizer)
    val_loss = test_loop(test_loader, model, loss_fn)
    
    val_losses.append(val_loss)  # Wanted to store the validation loss so I'd be able to see it

print("Validation losses over epochs:", val_losses)


In [None]:
#opimization

# Define the model, loss function, and optimizer here (using your existing model)
def create_model():
    # Example: Define your model architecture (change this to your actual model)
    model = nn.Sequential(
        nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        nn.Flatten(),
        nn.Linear(64*128*128, 114)  # Change dimensions based on your input size
    )
    return model

def objective(trial):
    # Hyperparameter tuning
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    batch_size = trial.suggest_int('batch_size', 16, 128)
    momentum = trial.suggest_uniform('momentum', 0.5, 0.9)
    
    # Recreate model and dataloaders for each trial
    model = create_model()
    loss_fn = nn.CrossEntropyLoss()
    
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)

print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

## Step 4: Record results

Results are typically recorded in a table. For inspiration, check out the famous [Attention is All You Need paper](https://arxiv.org/abs/1706.03762). This paper first introduced the transformer architecture we will learn about later this semseter and has been highly influential in deep learnin. Check out how results are reported in Tables 2, 3, and 4.
Note that because here the transformers are used for text generation results are reported using the BLEU score (the higher the better). 

You should create your own result tables to record how different hyperparameter settings affect performance. Make sure to record test accuracy, not traingin accuracy!