In [0]:
import torch
import torch.nn as nn
from torch.utils import data
import numpy as np
from skimage import io, transform
# import ipdb
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
import torchvision.models as models
from PIL import Image
import time
start = time.time()

# Helper functions for loading images.
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

def accimage_loader(path):
    import accimage
    try:
        return accimage.Image(path)
    except IOError:
        # Potentially a decoding problem, fall back to PIL.Image
        return pil_loader(path)

def default_loader(path):
    from torchvision import get_image_backend
    if get_image_backend() == 'accimage':
        return accimage_loader(path)
    else:
        return pil_loader(path)

# flag for whether you're training or not
is_train = True
is_key_frame = True # TODO: set this to false to train on the video frames, instead of the key frames
model_to_load = 'model_video.ckpt' # This is the model to load during testing, if you want to eval a previously-trained model.

# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
#cudnn.benchmark = True

# Parameters for data loader
params = {'batch_size': 32,  # TODO: fill in the batch size. often, these are things like 32,64,128,or 256
          'shuffle': True,
          'num_workers': 2 
          }
batchsize = 32
num_epochs = 100
learning_rate = 0.0001
# NOTE: depending on your optimizer, you may want to tune other hyperparameters as well

# Datasets
# TODO: put the path to your train, test, validation txt files
if is_key_frame:
    label_file_train =  ''
    label_file_val  =  ''

else:
    label_file_train = ''
    label_file_val = ''
    label_file_test = ''

# You should normalize based on the average image in the training set. This shows 
# an example of doing normalization
mean = #todo
std = #todo
# If you want to pad or resize your images, you can put the parameters for that below.

# Generators
# NOTE: if you don't want to pad or resize your images, you should delete the Pad and Resize
# transforms from all three _dataset definitions.
train_dataset = Mds189(label_file_train,loader=default_loader,transform=transforms.Compose([
#                                                transforms.Pad(requires_parameters),    # TODO: if you want to pad your images
#                                                transforms.Resize(requires_parameters), # TODO: if you want to resize your images
                                               transforms.ToTensor(),
                                               transforms.Normalize(mean, std)
                                           ]))

train_loader = data.DataLoader(train_dataset, **params)

val_dataset = Mds189(label_file_val,loader=default_loader,transform=transforms.Compose([
#                                                transforms.Pad(),
#                                                transforms.Resize(),
                                               transforms.ToTensor(),
                                               transforms.Normalize(mean, std)
                                           ]))
val_loader = data.DataLoader(val_dataset, **params)

if not is_key_frame:
    test_dataset = Mds189(label_file_test,loader=default_loader,transform=transforms.Compose([
#                                                    transforms.Pad(),
#                                                    transforms.Resize(),
                                                   transforms.ToTensor(),
                                                   transforms.Normalize(mean, std)
                                               ]))
    test_loader = data.DataLoader(test_dataset, **params)

# NOTE: you should not overwrite the models you try whose performance you're keeping track of.
#       one thing you could do is have many different model forward passes in class NeuralNet()
#       and then depending on which model you want to train/evaluate, you call that model's
#       forward pass. this strategy will save you a lot of time in the long run. the last thing
#       you want to do is have to recode the layer structure for a model (whose performance
#       you're reporting) because you forgot to e.g., compute the confusion matrix on its results
#       or visualize the error modes of your (best) model

model = models.inception_v3(pretrained=True)
# if we're only testing, we don't want to train for any epochs, and we want to load a model
if not is_train:
    num_epochs = 0
    # model.load_state_dict(torch.load('model_video.ckpt'))
# model.load_state_dict(torch.load('model_video2.ckpt'))

# Loss and optimizer
criterion = torch.nn.CrossEntropyLoss().cuda() #TODO: define your loss here. hint: should just require calling a built-in pytorch layer.
# NOTE: you can use a different optimizer besides Adam, like RMSProp or SGD, if you'd like
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
# Loop over epochs
val_loss_list_epoch = []
train_loss_list_step = []
train_loss_list = []
val_loss_list = []
train_groundtruth_list = []
prev_val_acc = 0
predicted = None
validation_count = 0

print('Beginning training..')
total_step = len(train_loader)

for epoch in range(num_epochs):
    # Training
    print('epoch {}'.format(epoch))
    loss_list = []
    
    for i, (local_batch,local_labels) in enumerate(train_loader):
        # Transfer to GPU
        local_ims, local_labels = local_batch.to(device), local_labels.to(device)
        
        # Forward pass
        outputs = model.forward(local_ims)
        loss = criterion(outputs, local_labels)
        _, predicted = torch.max(outputs.data, 1)

      
        # NOTE: if you use Google Colab's tensorboard-like feature to visualize
        #       the loss, you do not need to plot it here. just take a screenshot
        #       of the loss curve and include it in your write-up.
        train_loss_list.append(loss.item())

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_total = local_labels.size(0)
        train_correct = (predicted == local_labels).sum().item()
        train_loss_list_step.append(train_correct*100/train_total)

        if (i+1) % 4 == 0:
            print('training accuracy: ' + str(train_correct*100/train_total))
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))  
            
        print('training accuracy: ' + str(train_correct*100/train_total))
        

            
    val_correct = 0
    val_total = 0 
    predicted_list = []
    val_groundtruth_list = []
    loss_sum = 0
    for (val_batch,val_labels) in val_loader:
        # Transfer to GPU
        val_ims, val_labels = val_batch.to(device), val_labels.to(device)
        val_outputs = model.forward(val_ims)
        _, val_predicted = torch.max(val_outputs.data, 1)
        predicted_list.extend(val_predicted)

        val_total += val_labels.size(0)
        val_groundtruth_list.extend(val_labels)
        val_correct += (val_predicted == val_labels).sum().item()
        loss = criterion(val_outputs, val_labels)
        loss_sum += (loss.item())
        
    
    val_acc = val_correct*100/val_total
    val_loss_list.append(loss_sum)
    print ("Epoch validation accuracy: " + str(val_acc))
    val_loss_list_epoch.append(val_acc)
    
    if (val_acc <= prev_val_acc):
      validation_count += 1
      if validation_count >= 5:
        print ("Early termination")
        break;
    else:
      validation_count = 0
        
    prev_val_acc = val_acc
    

end = time.time()
print('Time: {}'.format(end - start))

