In [5]:

import torch
import torch.nn as nn
import os
import warnings
import numpy as np
import torch.utils.data
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from matplotlib import pyplot as plt
from dataset import ImagesDataset, RotatedImages
from architectures import MyCNN


In [6]:

def evaluate_model(model: torch.nn.Module, loader: torch.utils.data.DataLoader, loss_fn, device: torch.device):
    """Function for evaluation of a model ``model`` on the data in
    ``loader`` on device ``device``, using the specified ``loss_fn`` loss
    function."""
    model.eval()
    loss = 0
    with torch.no_grad():
        for data in tqdm(loader, desc="Evaluating", position=0, leave=False):
            inputs, targets, _ = data
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs = model(inputs)
            loss += loss_fn(outputs, targets).item()
    loss /= len(loader)
    model.train()
    return loss

In [7]:

# Configuration parameters
results_path = "results"
device = "cuda"
learning_rate = 1e-3
weight_decay = 1e-5
n_updates = 50000
network_config = {
    "n_hidden_layers": 3,
    "n_in_channels": 3,
    "n_kernels": 32,
    "kernel_size": 7
}

# Setup
device = torch.device(device)
if "cuda" in device.type and not torch.cuda.is_available():
    warnings.warn("CUDA not available, falling back to CPU")
    device = torch.device("cpu")

np.random.seed(0)
torch.manual_seed(0)

plot_path = os.path.join(results_path, "plots")
os.makedirs(plot_path, exist_ok=True)


In [12]:

dataset = ImagesDataset(r"C:\Users\omark\OneDrive - Johannes Kepler Universit채t Linz\Johannes Kepler Universit채t\Programming for AI II\Assignment 7 Project\training_data\training_data", 100, 100, int)

n_total = len(dataset)
indices = np.arange(n_total)
np.random.shuffle(indices)

split = int(0.8 * n_total)
train_indices, test_indices = indices[:split], indices[split:]

training_set = torch.utils.data.Subset(dataset, indices=train_indices)
test_set = torch.utils.data.Subset(dataset, indices=test_indices)

train_loader = torch.utils.data.DataLoader(training_set, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False)









writer = SummaryWriter(log_dir=os.path.join(results_path, "tensorboard"))


In [13]:


net = MyCNN(**network_config)
net.to(device)

mse = torch.nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay)

write_stats_at = 1000
plot_at = 10000
validate_at = 5000
update = 0
best_test_loss = np.inf
update_progress_bar = tqdm(total=n_updates, desc=f"loss: {np.nan:7.5f}", position=0)

saved_model_file = os.path.join(results_path, "best_model.pt")
torch.save(net, saved_model_file)

while update < n_updates:
    for data in train_loader:
        inputs, targets, *_ = data  # Adjust unpacking if needed
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = mse(outputs, targets)
        loss.backward()
        optimizer.step()
        
        if (update + 1) % write_stats_at == 0:
            writer.add_scalar(tag="Loss/training", scalar_value=loss.cpu(), global_step=update)
            for i, (name, param) in enumerate(net.named_parameters()):
                writer.add_histogram(tag=f"Parameters/[{i}] {name}", values=param.cpu(), global_step=update)
                writer.add_histogram(tag=f"Gradients/[{i}] {name}", values=param.grad.cpu(), global_step=update)
        
        if (update + 1) % plot_at == 0:
            plt(inputs.detach().cpu().numpy(), targets.detach().cpu().numpy(), outputs.detach().cpu().numpy(), plot_path, update)
        
        if (update + 1) % validate_at == 0:
            test_loss = evaluate_model(net, loader=test_loader, loss_fn=mse, device=device)
            writer.add_scalar(tag="Loss/test", scalar_value=test_loss, global_step=update)
            if test_loss < best_test_loss:
                best_test_loss = test_loss
                torch.save(net, saved_model_file)
        
        update_progress_bar.set_description(f"loss: {loss:7.5f}", refresh=True)
        update_progress_bar.update()
        
        update += 1
        if update >= n_updates:
            break

update_progress_bar.close()
writer.close()
print("Finished Training!")

net = torch.load(saved_model_file)
train_loss = evaluate_model(net, loader=train_loader, loss_fn=mse, device=device)
test_loss = evaluate_model(net, loader=test_loader, loss_fn=mse, device=device)

print(f"Scores:")
print(f"  training loss: {train_loss}")
print(f"      test loss: {test_loss}")

with open(os.path.join(results_path, "results.txt"), "w") as rf:
    print(f"Scores:", file=rf)
    print(f"  training loss: {train_loss}", file=rf)
    print(f"      test loss: {test_loss}", file=rf)

loss:     nan:   0%|                                                                         | 0/50000 [03:42<?, ?it/s]


RuntimeError: Given groups=1, weight of size [32, 3, 7, 7], expected input[32, 1, 100, 100] to have 3 channels, but got 1 channels instead