In [None]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import pandas as pd
import time
import os
import copy
import pandas as pd
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.13.1+cu117
Torchvision Version:  0.14.1+cu117


In [None]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs, lr, block):
    """
    training loop to train model, this is a standard training loop for resnet/DNN
    """
    since = time.time()

    val_acc_history = []
    log_values = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
        
    app_epoch = f'-{num_epochs}e' if num_epochs != 10 else ''
    
    lr_code = str(lr).split('.')[-1]

    for epoch in range(num_epochs):
        print('{}/{}'.format(epoch, num_epochs - 1), end='\t')
        # print('-' * 10)
        log_value = []

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                # only eval at end trial
                at_start = epoch == 0
                at_end = epoch == num_epochs - 1
                every_10 = ((epoch + 1) % 10) == 10
                
                to_eval = at_start or at_end or every_10
                if not to_eval:
                    continue # only eval once per 10 epochs
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc), end='\t')
            log_value += [epoch_loss, epoch_acc.item()]

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(best_model_wts, f'{dataset_sz}-{lr_code}-{block}{app_epoch}-best-evalend.pt')
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        log_values.append(log_value)
        pd.DataFrame(log_values).to_csv(f'{dataset_sz}-{lr_code}-{block}{app_epoch}-evalend.csv', index=False)
        torch.save(model.state_dict(), f'{dataset_sz}-{lr_code}-{block}{app_epoch}-evalend.pt')

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    # model.load_state_dict(best_model_wts)
    # TODO SAVE MODEL WEIGHTS AFTER RUN
    return model, log_values, best_model_wts

In [None]:
def only_fine_tune_block(model, block_num):
    """
    Freeze/unfreeze specific blocks for fine-tune training
    """
    # freeze all params
    for param in model.parameters():
        param.requires_grad = False
    # unfreeze predefined block
    block = eval(f'model.layer{block_num}')
    for param in block.parameters():
        param.requires_grad = True

input_size = 224

In [None]:
from pathlib import Path
# Might need to adjust for different data sets
# DATA_DIR = "/mnt/d/dataset/stylized-imagenet"
# available DATA_DIRs
# whole dataset: /mnt/d/dataset/stylized-imagenet
# subsets:
# /home/rs/stylized-imagenet-5k
# /home/rs/stylized-imagenet-1k
# /home/rs/stylized-imagenet-10k

DATA_DIR = "/home/rs/stylized-imagenet-1k"
data_dir = DATA_DIR

dataset_sz = DATA_DIR.split('-')[-1]

# May need to adjust per machine specs
# 128 uses ~14GB, 192 uses 19,5 GB, 256 uses 24GB. 192 ~= 256
BATCH_SIZE = 192 

def check_valid(path):
    path = Path(path)
    return not path.stem.startswith('._')

# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256), # TODO imagenet training uses 256 input size, crop to 224 on the next line
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create training and validation datasets
image_datasets = {x: datasets.ImageFolder(
    os.path.join(data_dir, x), 
    data_transforms[x],
    is_valid_file=check_valid
) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=BATCH_SIZE, 
                                                   shuffle=True, num_workers=4) for x in ['train', 'val']}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Initializing Datasets and Dataloaders...


In [None]:
def finetune_model(LEARNING_RATE, FINE_TUNE_BLOCK,NUM_EPOCHS=10):
    print(f"{lr=}, {FINE_TUNE_BLOCK=}")
    
    # initialize fresh model from resnet pretrained
    model_ft = models.resnet50(weights='ResNet50_Weights.IMAGENET1K_V1')
    
    # freeze/unfreeze specific block
    only_fine_tune_block(model_ft, FINE_TUNE_BLOCK)

    model_ft = model_ft.to(device)
    # Gather parameters to be optimized by checking whether requires_grad is True
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)

    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(params_to_update, lr=LEARNING_RATE, momentum=0.9)

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()

    # Train and evaluate
    model_ft, hist, best_model_wt = train_model(
        model_ft,
        dataloaders_dict,
        criterion,
        optimizer_ft,
        num_epochs=NUM_EPOCHS,
        lr=LEARNING_RATE,
        block=FINE_TUNE_BLOCK
    )


In [None]:
# This is all combination except for data count (set through DATA_DIR)
LRS = [
    # 0.001,
    # 0.0001,
    0.00001
]

FINE_TUNE_BLOCKS = [
    3,
    2,
    1,
    4,
    
    # 4
]

In [None]:
for lr in LRS:
    for block in FINE_TUNE_BLOCKS:
        finetune_model(lr, block) # add num_epochs, try with 1 first
        torch.cuda.empty_cache()

lr=1e-05, FINE_TUNE_BLOCK=3
0/9	train Loss: 5.4997 Acc: 0.1390	val Loss: 4.6562 Acc: 0.2309	
1/9	train Loss: 5.5483 Acc: 0.1420	
2/9	train Loss: 5.3029 Acc: 0.1600	
3/9	train Loss: 5.4090 Acc: 0.1380	
4/9	train Loss: 5.5113 Acc: 0.1410	
5/9	train Loss: 5.5583 Acc: 0.1550	
6/9	train Loss: 5.5341 Acc: 0.1450	
7/9	train Loss: 5.4032 Acc: 0.1560	
8/9	train Loss: 5.4245 Acc: 0.1460	
9/9	train Loss: 5.4020 Acc: 0.1500	val Loss: 4.6541 Acc: 0.2243	
Training complete in 3m 29s
Best val Acc: 0.230863
lr=1e-05, FINE_TUNE_BLOCK=2
0/9	train Loss: 5.6223 Acc: 0.1340	val Loss: 4.6519 Acc: 0.2307	
1/9	train Loss: 5.5447 Acc: 0.1490	
2/9	train Loss: 5.4428 Acc: 0.1540	
3/9	train Loss: 5.4856 Acc: 0.1510	
4/9	train Loss: 5.4867 Acc: 0.1510	
5/9	train Loss: 5.4328 Acc: 0.1740	
6/9	train Loss: 5.4402 Acc: 0.1540	
7/9	train Loss: 5.4658 Acc: 0.1620	
8/9	train Loss: 5.4260 Acc: 0.1500	
9/9	train Loss: 5.4264 Acc: 0.1600	val Loss: 4.6646 Acc: 0.2244	
Training complete in 3m 29s
Best val Acc: 0.230663
lr=1e-

In [None]:
ll = [
    # (0.02,3,1), # trial throwaway
    # (0.01, 3, 10),
    # (0.1, 3, 10), 
    (0.001, 2, 100),
    (0.001, 4, 100),
    (0.001, 1, 100)
]

for lr, block, epochs in ll:
    finetune_model(lr, block, epochs)
    torch.cuda.empty_cache()

lr=0.001, FINE_TUNE_BLOCK=2
0/99	train Loss: 5.2342 Acc: 0.1717	

KeyboardInterrupt: 

In [None]:
for lr in LRS:
    for block in FINE_TUNE_BLOCKS:
        finetune_model(lr, block)
        torch.cuda.empty_cache()



Epoch 0/9
----------
train Loss: 5.3302 Acc: 0.1663
val Loss: 4.4713 Acc: 0.2443

Epoch 1/9
----------
train Loss: 5.1819 Acc: 0.1751
val Loss: 4.3010 Acc: 0.2585

Epoch 2/9
----------
train Loss: 5.0858 Acc: 0.1849
val Loss: 4.2338 Acc: 0.2625

Epoch 3/9
----------
train Loss: 5.0084 Acc: 0.1833
val Loss: 4.1818 Acc: 0.2674

Epoch 4/9
----------
train Loss: 4.9937 Acc: 0.1817
val Loss: 4.1209 Acc: 0.2732

Epoch 5/9
----------
train Loss: 4.9473 Acc: 0.1858
val Loss: 4.0721 Acc: 0.2773

Epoch 6/9
----------
train Loss: 4.9247 Acc: 0.1874
val Loss: 4.0423 Acc: 0.2795

Epoch 7/9
----------
train Loss: 4.8484 Acc: 0.1984
val Loss: 4.0402 Acc: 0.2791

Epoch 8/9
----------
train Loss: 4.8369 Acc: 0.2037
val Loss: 4.0046 Acc: 0.2819

Epoch 9/9
----------
train Loss: 4.8314 Acc: 0.1990
val Loss: 3.9704 Acc: 0.2833

Training complete in 19m 21s
Best val Acc: 0.283288


NameError: name 'pd' is not defined