In [1]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import wandb
import yaml
from torch.optim.lr_scheduler import StepLR
from utils.logger import *
from utils.models_init import *
from utils.training import * 
from utils.optimizers import *
from utils.custom_models import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open('/home/disi/ml/intromlproject/config.yaml', 'r') as file:
    config = yaml.safe_load(file)

config = config['config']
config['data_dir'] = config['data_dir'].format(root=config['root'], img_folder=config['img_folder'])
config['save_dir'] = config['save_dir'].format(root=config['root'], model_name=config['model_name'], img_folder=config['img_folder'])
if config['checkpoint'] is not None:
    config['checkpoint'] = config['checkpoint'].format(root=config['root'])
config['device'] = "cuda" if torch.cuda.is_available() else "cpu"
config['project_name'] = config['project_name'].format(model_name=config['model_name'])
config['dataset_name'] = config['dataset_name'].format(img_folder=config['img_folder'])

In [3]:
train_transform = transforms.Compose([
    transforms.Resize((config['image_size'], config['image_size'])),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  
    transforms.ToTensor(),
    transforms.Normalize(mean=config['mean'], std=config['std'])
])

val_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

In [4]:
train_dataset = datasets.ImageFolder(root=os.path.join(config['data_dir'], 'train'), transform=train_transform)
val_dataset = datasets.ImageFolder(root=os.path.join(config['data_dir'], 'val'), transform=train_transform)

In [5]:
train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=8)
val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=8)

In [6]:
# Function to check DataLoader
def check_dataloader(loader):
    try:
        for i, (inputs, labels) in enumerate(loader):
            if i == 0:  # Just check the first batch to keep it simple
                print("First batch of inputs:", inputs.shape)
                print("First batch of labels:", labels.shape)
                break
    except Exception as e:
        print("Error during loading data:", e)
        return False
    return True

# Check DataLoader
if check_dataloader(train_loader):
    print("DataLoader loaded correctly.")
else:
    print("DataLoader has issues.")

First batch of inputs: torch.Size([64, 3, 224, 224])
First batch of labels: torch.Size([64])
DataLoader loaded correctly.


In [7]:
num_classes = len(train_loader.dataset.classes)
model = init_model(config['model_name'], num_classes)



In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()
# Optimize only the parameters that require gradients
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)

In [9]:
from torchvision import datasets, transforms
import os

# Define your data directory
data_dir = '/home/disi/ml/datasets/aircraft'

# Optionally, define transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

# Load datasets
datasets = {
    x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
    for x in ['train', 'val']
}

In [10]:
# Get the size of each dataset
dataset_sizes = {x: len(datasets[x]) for x in ['train', 'val']}
dataset_sizes

{'train': 3334, 'val': 3333}

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

for inputs, labels in train_loader:
    inputs = inputs.to(device)
    labels = labels.to(device)

for inputs, labels in val_loader:
    inputs = inputs.to(device)
    labels = labels.to(device)


In [12]:
# Assuming 'model' is your neural network model
first_param_device = next(model.parameters()).device
print(f"The model is loaded on: {first_param_device}")
for name, param in model.named_parameters():
    print(f"{name} is on {param.device}")

The model is loaded on: cuda:0
features.conv0.weight is on cuda:0
features.norm0.weight is on cuda:0
features.norm0.bias is on cuda:0
features.denseblock1.denselayer1.norm1.weight is on cuda:0
features.denseblock1.denselayer1.norm1.bias is on cuda:0
features.denseblock1.denselayer1.conv1.weight is on cuda:0
features.denseblock1.denselayer1.norm2.weight is on cuda:0
features.denseblock1.denselayer1.norm2.bias is on cuda:0
features.denseblock1.denselayer1.conv2.weight is on cuda:0
features.denseblock1.denselayer2.norm1.weight is on cuda:0
features.denseblock1.denselayer2.norm1.bias is on cuda:0
features.denseblock1.denselayer2.conv1.weight is on cuda:0
features.denseblock1.denselayer2.norm2.weight is on cuda:0
features.denseblock1.denselayer2.norm2.bias is on cuda:0
features.denseblock1.denselayer2.conv2.weight is on cuda:0
features.denseblock1.denselayer3.norm1.weight is on cuda:0
features.denseblock1.denselayer3.norm1.bias is on cuda:0
features.denseblock1.denselayer3.conv1.weight is o

In [13]:
wandb.login()  # @edit
wandb.init(project=config['project_name'],
name = config['dataset_name'],
#name=f"{config['model_name']}_{config['dataset_name']}_opt: {config['optimizer']}_batch_size: {config['batch_size']}_lr: {config['learning_rate']}",
config=config)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlorenzo-chicco[0m ([33mlorenzochicco99[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [14]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=25):
    counter = 0
    patience = 3
    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)


        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                data_loader = train_loader
                print("Entered train mode")

            else:
                model.eval()   # Set model to evaluate mode
                data_loader = val_loader
                print("Entered val mode")

            running_loss = 0.0
            running_corrects = 0
            total_samples = 0

            # Iterate over data
            for inputs, labels in data_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                total_samples += inputs.size(0)

            epoch_loss = running_loss / total_samples
            epoch_acc = running_corrects.double() / total_samples

            if phase == "train":
                wandb.log({
                "train/loss": epoch_loss,
                "train/accuracy":epoch_acc,
                "epoch": epoch
                })
            
            else:
                wandb.log({
                "val/loss":epoch_loss,
                "val/accuracy":epoch_acc,
                "epoch": epoch})

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        if epoch_loss < best_val_loss:
            counter = 0
            best_val_loss = epoch_loss

        else:
            counter += 1

        if counter > patience:
            break

        scheduler.step()

    print('Training complete')
    return model

# Execute the training function
model_ft = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=11)


Epoch 1/11
----------
Entered train mode
train Loss: 3.2182 Acc: 0.1956
Entered val mode
val Loss: 2.9834 Acc: 0.2373
Epoch 2/11
----------
Entered train mode
train Loss: 2.0459 Acc: 0.4049
Entered val mode
val Loss: 2.6404 Acc: 0.3042
Epoch 3/11
----------
Entered train mode
train Loss: 1.4728 Acc: 0.5630
Entered val mode
val Loss: 1.8426 Acc: 0.4584
Epoch 4/11
----------
Entered train mode
train Loss: 0.8998 Acc: 0.7579
Entered val mode
val Loss: 1.0055 Acc: 0.7276
Epoch 5/11
----------
Entered train mode
train Loss: 0.6465 Acc: 0.8326
Entered val mode
val Loss: 0.9072 Acc: 0.7435
Epoch 6/11
----------
Entered train mode
train Loss: 0.5391 Acc: 0.8737
Entered val mode
val Loss: 0.8298 Acc: 0.7672
Epoch 7/11
----------
Entered train mode
train Loss: 0.4479 Acc: 0.9010
Entered val mode
val Loss: 0.8141 Acc: 0.7699
Epoch 8/11
----------
Entered train mode
train Loss: 0.4234 Acc: 0.9115
Entered val mode
val Loss: 0.8038 Acc: 0.7768
Epoch 9/11
----------
Entered train mode
train Loss: 0.4