In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import torch
import os
from PIL import Image
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F 

In [2]:
import torchvision.models as models
model = models.densenet169(pretrained=True)
model.classifier = nn.Sequential(nn.Linear(1664, 121))


In [3]:
# Setup: initialize the hyperparameters/variables
# Setup: initialize the hyperparameters/variables
num_epochs = 10           # Number of full passes through the dataset
batch_size = 128         # Number of samples in each minibatch
learning_rate = 0.01  
seed = np.random.seed(0) # Seed the random number generator for reproducibility
p_val = 0.1              # Percent of the overall dataset to reserve for validation
p_test = 0.2             # Percent of the overall dataset to reserve for testing


#TODO: Convert to Tensor - you can later add other transformations, such as Scaling here

transform = transforms.Compose([
        #transforms.RandomResizedCrop(224),
        transforms.ToPILImage('L'),
        transforms.RandomHorizontalFlip(),
        transforms.Resize([224,224],interpolation=2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


In [4]:

# Check if your system supports CUDA
use_cuda = torch.cuda.is_available()

# Setup GPU optimization if CUDA is supported
if use_cuda:
    computing_device = torch.device("cuda")
    extras = {"num_workers": 1, "pin_memory": True}
    print("CUDA is supported")
else: # Otherwise, train on the CPU
    computing_device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

model = model
model = model.to(computing_device)
print("Model on CUDA?", next(model.parameters()).is_cuda)    

CUDA is supported
Model on CUDA? True


In [5]:
### need to modify
# Setup the training, validation, and testing dataloaders
from plankton_dataloader import create_split_loaders
root_dir = "../dataset/data_subset/"
train_loader, val_loader, test_loader = create_split_loaders(root_dir,batch_size, seed, transform=transform, 
                                                             p_val=p_val, p_test=p_test,
                                                             shuffle=True, show_sample=False, 
                                                             extras=extras)


In [6]:
criterion = nn.CrossEntropyLoss().to(computing_device)

optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.0001)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [7]:
def validate(val_loader,model,optimizer):
    start = time.time()
    sum_loss = 0.0
    sum_accuracy = 0.0
    list_sum_loss = []
    num = 0
    for mb_count, (val_images, val_labels) in enumerate(val_loader, 0):
        model.eval()
        with torch.no_grad():  
            optimizer.zero_grad()      
            val_images = torch.squeeze(torch.stack([val_images,val_images,val_images], dim=1, out=None))
            val_images, val_labels = val_images.to(computing_device), val_labels.to(computing_device)
            val_labels = val_labels.type(torch.cuda.FloatTensor)
            outputs = model(val_images)
            loss = criterion(outputs,torch.max(val_labels, 1)[1])
            sum_loss += loss
            output_np = outputs.cpu().detach().numpy()
            label_np = val_labels.cpu().detach().numpy()

            accuracy_train = accuracy(label_np, output_np)
            sum_accuracy += accuracy_train
            #print('validation accuracy',accuracy_train)
    print("Vali Loss: {}, Vali Accuracy: {}".format(1.0 * sum_loss / mb_count, sum_accuracy / mb_count))
    print("validation time = ", time.time()-start)    
    return (1.0*sum_loss / mb_count).item(), sum_accuracy / mb_count

In [8]:
def accuracy(out, labels):
    outputs = np.argmax(out, axis=1)
    lab = np.argmax(labels, axis=1)
    return np.sum(outputs==lab)/float(lab.size)

In [9]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10,resume=False, direct=""):
    since = time.time()
    total_loss = []
    if resume == False:
        avg_minibatch_loss = []
        avg_minibatch_accuracy = []
        total_vali_loss = []
        total_vali_accuracy = []
        epc_save = 0 # MAYBE
    else: 
        print('Resume model: %s' % direct)
        check_point = torch.load(direct)
        model.load_state_dict(check_point['state_dict'])
        optimizer.load_state_dict(check_point['optimizer'])
        avg_minibatch_loss = list(np.load('avg_train_loss.npy'))
        avg_minibatch_accuracy = list(np.load('avg_train_accuracy.npy'))
        total_vali_loss = list(np.load('total_vali_loss.npy'))
        total_vali_accuracy = list(np.load('total_vali_accuracy.npy'))
        epc_save = check_point['epoch'] - 1 
    
    tolerence = 3
    i = 0 
    for epoch in range(epc_save, num_epochs):
        N = 20
        M = 20
        N_minibatch_loss = 0.0    
        best_loss = 100
        early_stop = 0
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:
            scheduler.step()
            # Iterate over data.
            for minibatch_count, (inputs, labels) in enumerate(train_loader, 0):
                inputs = torch.squeeze(torch.stack([inputs,inputs,inputs], dim=1, out=None))
                inputs = inputs.to(computing_device)
                labels = labels.to(computing_device)
                labels = labels.long()
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    labels = labels.long()
                    loss = criterion(outputs,torch.max(labels, 1)[1])
                    #loss = criterion(torch.max(outputs,1)[1],torch.max(labels, 1)[1])
                    N_minibatch_loss += loss
                    # backward + optimize only if in training phase
                    loss.backward()
                    optimizer.step()
                
                # statistics
                # training stats
                if minibatch_count % N == 0 and minibatch_count!=0:    

                    # Print the loss averaged over the last N mini-batches    
                    N_minibatch_loss /= N
                    print('Epoch %d, average minibatch %d loss: %.3f' %
                        (epoch + 1, minibatch_count, N_minibatch_loss))

                    # Add the averaged loss over N minibatches and reset the counter
                    avg_minibatch_loss.append(N_minibatch_loss)
                    
                    avg_minibatch_loss_1 = np.array(avg_minibatch_loss)
                    np.save('avg_train_loss', avg_minibatch_loss_1)
                    
                    N_minibatch_loss = 0.0

                    output_np = outputs.cpu().detach().numpy()
                    label_np = labels.cpu().detach().numpy()

                    accuracy_train = accuracy(label_np, output_np)
                    avg_minibatch_accuracy.append(accuracy_train)
                    np.save('avg_train_accuracy', np.array(avg_minibatch_accuracy))
                    
                    print('accuracy',accuracy_train)
                    #print('accuracy, precision, recall', accuracy, precision, recall)
                
                #Validation
                if minibatch_count % M == 0 and minibatch_count!=0: 
                    #model = torch.load('./checkpoint')
                    save_checkpoint({'epoch': epoch + 1,
                                'state_dict': model.state_dict(),
                                'optimizer': optimizer.state_dict(),
                                },
                                filename='./checkpoint/'+'%d_model_epoch%d.pth' % (epoch , minibatch_count))
                    
                    v_loss, v_accuracy = validate(val_loader,model,optimizer)
#                     print(v_loss)
                    
                    # Save validation loss and accuracy
                    
                    total_vali_loss.append(v_loss)
                    total_vali_loss_1 = np.array(total_vali_loss)
                    np.save('total_vali_loss', total_vali_loss_1)                    
                    total_vali_accuracy.append(v_accuracy)
                    np.save('total_vali_accuracy', np.array(total_vali_accuracy))
                    
                    if total_vali_loss[i] > best_loss and i != 0:
                        early_stop += 1
                        if early_stop == tolerence:
                            print('early stop here')
                            break
                    else:
                        best_loss = total_vali_loss[i] 
                        early_stop = 0
                    i = i + 1
            print("Finished", epoch + 1, "epochs of training")
    print("Training complete after", epoch, "epochs")
    
    avg_minibatch_loss = np.array(avg_minibatch_loss)
    np.save('avg_minibatch_loss_new', avg_minibatch_loss)

    total_vali_loss = np.array(total_vali_loss)
    np.save('total_vali_loss_new', total_vali_loss)  
    print("total_vali_loss")
    print(total_vali_loss)
    print("avg_minibatch_loss")
    print(avg_minibatch_loss)
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s '.format(
        time_elapsed // 60, time_elapsed % 60))


In [10]:
def save_checkpoint(state, is_best=0, filename='models/checkpoint.pth.tar'):
    torch.save(state, filename)


In [11]:
model_trained = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=10, resume=True, direct = './checkpoint/7_model_epoch40.pth',)


Resume model: ./checkpoint/7_model_epoch40.pth
Epoch 7/9
----------
Epoch 8, average minibatch 20 loss: 1.137
accuracy 0.8046875
Vali Loss: 0.8354580402374268, Vali Accuracy: 0.8286097032242306
validation time =  22.61225914955139
Epoch 8, average minibatch 40 loss: 0.543
accuracy 0.78125
Vali Loss: 0.5235973596572876, Vali Accuracy: 0.8740191438690768
validation time =  21.87377667427063
Epoch 8, average minibatch 60 loss: 0.507
accuracy 0.828125
Vali Loss: 0.5340747237205505, Vali Accuracy: 0.8669508732291158
validation time =  27.340603351593018
Epoch 8, average minibatch 80 loss: 0.534
accuracy 0.8203125
Vali Loss: 0.5185205340385437, Vali Accuracy: 0.869370572789448
validation time =  25.759816646575928
Epoch 8, average minibatch 100 loss: 0.485
accuracy 0.8671875
Vali Loss: 0.5200827121734619, Vali Accuracy: 0.8676722032242306
validation time =  25.804400205612183
Epoch 8, average minibatch 120 loss: 0.521
accuracy 0.8359375
Vali Loss: 0.5420551300048828, Vali Accuracy: 0.8561652

In [2]:
np.load('./total_vali_accuracy.npy')

array([0.3478986 , 0.51854467, 0.57367871, 0.5944179 , 0.64172035,
       0.67212308, 0.67490153, 0.69511404, 0.71182294, 0.71154815,
       0.71318164, 0.7116741 , 0.70725834, 0.75114115, 0.72534502,
       0.74755358, 0.74910311, 0.7542402 , 0.74322179, 0.73680997,
       0.75525922, 0.74477131, 0.7297188 , 0.74424081, 0.71216262,
       0.80415318, 0.78534135, 0.80238993, 0.8075919 , 0.78880297,
       0.79827186, 0.81170997, 0.82134679, 0.79642465, 0.81349231,
       0.82499924, 0.81423654, 0.81181684, 0.80372573, 0.81627458,
       0.80446996, 0.81468307, 0.8075919 , 0.81393884, 0.81234734,
       0.80986276, 0.80313416, 0.80816439, 0.80175256, 0.81355719,
       0.8286097 , 0.87401914, 0.86695087, 0.86937057, 0.8676722 ,
       0.85616527, 0.85567675, 0.8709201 , 0.8733627 , 0.86389381,
       0.86058103, 0.87883946, 0.86423348, 0.87714109, 0.86593185,
       0.86332132, 0.86053905, 0.85885976, 0.87208796, 0.87038959])