# Train Model

#### References
* https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import models
import utils_train
import change_dataset_np
import matplotlib.pyplot as plt
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

# Hyperparameters
num_classes = 3
batch_size = 80
img_size = 224
base_lr = 1e-6

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = 'cpu'
print('Device:', device)
num_gpu = torch.cuda.device_count()
batch_size *= num_gpu
base_lr *= num_gpu
print('Number of GPUs Available:', num_gpu)

train_pickle_file = 'change_dataset_train.pkl'
val_pickle_file = 'change_dataset_train.pkl'

PyTorch Version:  1.1.0
Torchvision Version:  0.3.0
Device: cuda:0
Number of GPUs Available: 8


#### Define Transformation

In [2]:
#transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(img_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()        
    ]),
    'val': transforms.Compose([
        transforms.Resize(img_size),
        transforms.CenterCrop(img_size),
        transforms.ToTensor()        
    ]),
}

#### Load Dataset

In [3]:
# Create training and validation datasets
train_loader = change_dataset_np.ChangeDatasetNumpy(train_pickle_file, data_transforms['train'])
val_loader = change_dataset_np.ChangeDatasetNumpy(val_pickle_file, data_transforms['val'])
image_datasets = {'train': train_loader, 'val': val_loader}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}

#### Start Tensorboard Interface

In [4]:
# Default directory "runs"
writer = SummaryWriter()

#### Initialize Model

In [5]:
img_reference_dummy = torch.randn(1,3,img_size,img_size)
img_test_dummy = torch.randn(1,3,img_size,img_size)
change_net = models.ChangeNet(num_classes=num_classes)

# Add on Tensorboard the Model Graph
writer.add_graph(change_net, [img_reference_dummy, img_test_dummy])



#### Send Model to GPUs (If Available)

In [6]:
if num_gpu > 1:
    change_net = nn.DataParallel(change_net)
change_net = change_net.to(device)

#### Initialize Loss Functions and Optimizers

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(change_net.parameters(), lr=base_lr)    
sc_plt = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True)    

#### Train Model

In [None]:
utils_train.train_model(change_net, dataloaders_dict, criterion, optimizer, writer, num_epochs=25)

In [8]:
import copy
def train_model(model, dataloaders, criterion, optimizer, writer, num_epochs=25):    
    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for sample in dataloaders[phase]:                
                reference_img = sample['reference'].to(device)
                test_img = sample['test'].to(device)
                labels = sample['label'].squeeze(1).type(torch.LongTensor).to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss                    
                    outputs = model([reference_img, test_img])
                    
                    print('labels.shape:', labels.shape)
                    print('outputs.shape:', outputs.shape)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * reference_img.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            writer.add_scalar('epoch/loss', epoch_loss, epoch)
            writer.add_scalar('epoch/acc', epoch_loss, epoch)            

            # deep copy the model and save if accuracy is better
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)        
    
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [9]:
train_model(change_net, dataloaders_dict, criterion, optimizer, writer, num_epochs=25)

Epoch 0/24
----------
labels.shape: torch.Size([640, 224, 224])
outputs.shape: torch.Size([640, 3, 224, 224])


NameError: name 'inputs' is not defined