# Training

In this notebook, the selected models are trained on the datasets generated by the coreset selectors.

### Models

- Resnet18
- Mobilenet
- Inception
- VGGnet

In [1]:
import sys
sys.path.append('../submodules/PyTorch_CIFAR10/cifar10_models/')
sys.path.append('../submodules/PyTorch_CIFAR10')

from resnet import resnet18
from mobilenetv2 import mobilenet_v2
from inception import inception_v3
from vgg import vgg11_bn

### Training Epoch Function

Function that trains and evaluates the model for a given number of epochs.
Switches from optimizer1 to optimizer2 after 10 epochs, for using SWAT technique

[https://arxiv.org/pdf/1712.07628.pdf](https://arxiv.org/pdf/1712.07628.pdf)

In [2]:
from datetime import datetime
from sklearn.metrics import accuracy_score
import torch

def train_model(epochs, net, net_name, criterion, optimizer, train_loader, val_loader,scheduler=None, verbose=True, device='cuda'):
    freq = max(epochs//20,1)
 
    for epoch in range(1, epochs+1):
        net.train()

        losses_train = []
        for X, target in train_loader:
            X, target = X.to(device), target.to(device)
            
            net_output = net(X)
            loss = criterion(net_output, target)
            losses_train.append(float(loss))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            #scheduler is cosine annealing, so its called in the step loop
            if scheduler != None:
                scheduler.step()
        
        if verbose and epoch%freq==0:

            y_pred_val =  []
            y_true_val = []

            net.eval()

            losses_val = []
            for X, target in val_loader:
                X, target = X.to(device), target.to(device)

                # Compute the validation loss
                target_hat_val = net(X)

                loss = criterion(target_hat_val, target)
                losses_val.append(float(loss))

                y_pred_val.extend(target_hat_val.argmax(1).tolist())
                y_true_val.extend(target.tolist())

            mean_val = sum(losses_val)/len(losses_val)
            mean_train = sum(losses_train)/len(losses_train)

            print('Timestamp: ', datetime.now().strftime("%H:%M:%S"), \
                  '\tVal epoch {}'.format(epoch), \
                  '\n\tModel: {}'.format(net_name), \
                  '\n\tLoss Train: {:.3}'.format(mean_train), \
                  ',\n\tLoss Test: {:.3}'.format(mean_val),\
                  ',\n\tAccuracy on test: {:.3}'.format(accuracy_score(y_true_val, y_pred_val)) )

### Training Evaluation Function

Used to evaluate model after training. Function puts the result in a log

In [3]:
from datetime import datetime
from sklearn.metrics import accuracy_score
import torch

def eval_model(net, net_name, coreset_selector_name, percentage_of_dataset, criterion, test_loader, logfile):
    y_pred_val =  []
    y_true_val = []

    net.eval()

    losses_val = []
    for X, target in val_loader:
        X, target = X.to(device), target.to(device)

        # Compute the validation loss
        target_hat_val = net(X)

        loss = criterion(target_hat_val, target)
        losses_val.append(float(loss))

        y_pred_val.extend(target_hat_val.argmax(1).tolist())
        y_true_val.extend(target.tolist())

    mean_val = sum(losses_val)/len(losses_val)
    
    logtext = '\nTimestamp: ' + datetime.now().strftime("%H:%M:%S") + \
            '\n\tModel: {}'.format(net_name) + \
            '\n\tCoreset Selector: {}'.format(coreset_selector_name) + \
            '\n\tPercentage of Dataset: {}'.format(percentage_of_dataset) + \
            '\n\tLoss Test: {:.3}'.format(mean_val) + \
            '\n\tAccuracy on test: {:.3}'.format(accuracy_score(y_true_val, y_pred_val))
    
    f = open(logfile, 'a')
    f.write(logtext)
    f.close()

### Dataset

In [4]:
import torch
import torchvision
import torchvision.transforms as transforms

# fix random seed
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True

#factors selected from torch docs
mean = (0.4914, 0.4822, 0.4465)
std = (0.2471, 0.2435, 0.2616)


#preprocessing
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data/',
                                             train=True, 
                                             transform=transform,
                                             download=False)

test_dataset = torchvision.datasets.CIFAR10(root='./data/',
                                            train=False, 
                                            transform=transform,
                                            download=False)

### Coreset Training Function

Function that trains the model on selected datapoints from the dataset. The argument is given as a list of indices.

Additionally, logs the last trainings and accuracies.

In [8]:
import numpy as np
import torch.nn as nn
from schduler import WarmupCosineLR

def train_and_save_models(train_indices, percentage_of_dataset):
    
    train_datasubset = [train_dataset[i] for i in train_indices]
    
    # Data loader
    train_loader = torch.utils.data.DataLoader(dataset=train_datasubset,
                                               batch_size=128, 
                                               shuffle=True,
                                               drop_last=True
                                              )

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=128, 
                                              shuffle=False,
                                              drop_last=True
                                             )
    
    # Device configuration
    device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

    # Training parameters
    num_epochs = 100
    learning_rate = 1e-2
    weight_decay = 1e-3
    total_steps = num_epochs * len(train_loader)

    # Models
    model_resnet = resnet18()
    model_mobilenet = mobilenet_v2()
    model_inception = inception_v3()
    model_vgg = vgg11_bn()

    models = [model_resnet, model_mobilenet, model_inception, model_vgg]
    model_labels = ['resnet', 'mobilenet', 'inception', 'vgg']
    
    #train selected models on subset
    for model, label in zip(models, model_labels):

        model = model.to(device)

        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=learning_rate
            weight_decay=weight_decay,
            momentum=0.9,
            nesterov=True
        )
        
        # Scheduler
        #scheduler = torch.optim.lr_scheduler.StepLR(optimizer2, step_size=1, gamma=1/10)
        
        #'''
        # Scheduler
        scheduler = WarmupCosineLR(
            optimizer,
            warmup_epochs=total_steps * 0.1,
            max_epochs = total_steps)
        #'''

        # Train the model
        train_model(num_epochs, model, label, criterion, optimizer, train_loader, test_loader, scheduler=scheduler, verbose=True, device=device)

        # Save the model checkpoint
        torch.save(model.state_dict(), 'model_weights/{:03}_{}.pt'.format(percentage_of_dataset, label))

### Train for full dataset

In [9]:
train_indices = np.arange(0, len(train_dataset))

train_and_save_models(train_indices, 100)



Timestamp:  20:33:28 	Val epoch 4 
	Model: resnet 
	Loss Train: 0.518 ,
	Loss Test: 0.651 ,
	Accuracy on test: 0.777
Timestamp:  20:35:40 	Val epoch 8 
	Model: resnet 
	Loss Train: 0.441 ,
	Loss Test: 0.651 ,
	Accuracy on test: 0.777


KeyboardInterrupt: 