In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
import tqdm
import time
from PIL import Image
from google.colab.patches import cv2_imshow

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
import torchvision.models
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection import FasterRCNN

path = "/content/drive/My Drive/APS360 Project/v6-dataset/"

In [0]:
if torch.cuda.is_available():
    print("CUDA activated")
    use_cuda = True
    device = torch.device("cuda")
    
else:
    print("Using CPU")
    device = "cpu"

CUDA activated


In [0]:
# Get the type of GPU

!nvidia-smi

Tue Mar 31 20:18:06 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   45C    P0    28W / 250W |     10MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [0]:
# These ae the classes we will be training in our model

classes = {"Apple":0,
           "Bagel":1,
           "Banana":2,
           "Bread":3,
           "Broccoli":4,
           "Burrito":5,
           "Carrot":6,
           "Cheese":7,
           "Coffee":8,
           "Cookie":9,
           "Cucumber":10,
           "Egg (Food)":11,
           "French fries":12,
           "Grape":13,
           "Hamburger":14,
           "Hot dog":15,
           "Juice":16,
           "Lemon":17,
           "Lobster":18,
           "Muffin":19,
           "Orange":20,
           "Pancake":21,
           "Pasta":22,
           "Pear":23,
           "Pizza":24,
           "Potato":25,
           "Salad":26,
           "Sandwich":27,
           "Strawberry":28,
           "Taco":29,
           "Tomato":30,
           "Waffle":31}

In [0]:
# Write the dataloaders
# filePath | Class_Description | XMin | XMax | YMin | YMax

headers = pd.read_csv(os.path.join(path, 'clean-train-id.csv'))
headers = headers.sample(frac=1).head(5)
len(headers.filePath)

5

In [0]:
def dataloader(filePath='clean-train-id.csv', batch_size=8, normalize=True):
    # Load default width
    widthScale = 300
    
    # Load csv and shuffle
    headers = pd.read_csv(os.path.join(path, filePath))
    headers = headers.sample(frac=1)
    transform = transforms.ToTensor()
    if normalize:
        transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    
    # Make tensor of images, (labels and bbox)
    for i in range(0, len(headers.filePath), batch_size):
        imgs, label = [], []
        
        for j in range(batch_size):

            # Old image
            if i+j >= len(headers.filePath):
                break

            imgOG = Image.open(headers.filePath[i+j]).convert('RGB')
            wpercent = (widthScale/float(imgOG.size[0]))
            hsize = int((float(imgOG.size[1])*wpercent))
            # display(imgOG)

            # New image
            img = imgOG.resize((widthScale, hsize), Image.ANTIALIAS)
            # display(img)
            w, h = img.size
            img = transform(img).cuda()
            
            # print(f"Old: {imgOG.size}")
            # print(f"New: ({w}, {h})")
            # print(f"Shape: {img.shape}")
            # print('=====================')
            imgs.append(img)

            # Add labels to the dictionary
            boxes_dict = {}
            boxes_dict['boxes'] = torch.tensor([[headers.XMin[i+j] * w, 
                                                 headers.YMin[i+j] * h, 
                                                 headers.XMax[i+j] * w, 
                                                 headers.YMax[i+j] * h]]).cuda()
            boxes_dict['labels'] = torch.tensor([classes[headers.Class_Description[i+j]]]).cuda()

            label.append(boxes_dict)
        
        yield imgs, label

In [0]:
def sampleDataSet(filePath='clean-train-id.csv', batch_size=8, normalize=True):
    # Load default width
    widthScale = 250
    
    # Load csv and shuffle
    headers = pd.read_csv(os.path.join(path, filePath))
    headers = headers.sample(frac=1)
    length = int(0.05 * len(headers.filePath))
    print(f"LENGTH: {length}")
    transform = transforms.ToTensor()
    
    if normalize:
        transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    
    for i in range(0, length, batch_size):
        imgs, label = [], []

        for j in range(batch_size):
            # Old image
            # Old image

            if i+j >= length:
                break

            imgOG = Image.open(headers.filePath[i+j]).convert('RGB')
            wpercent = (widthScale/float(imgOG.size[0]))
            hsize = int((float(imgOG.size[1])*wpercent))
            # display(imgOG)

            # New image
            img = imgOG.resize((widthScale, hsize), Image.ANTIALIAS)
            # display(img)
            w, h = img.size
            img = transform(img).cuda()
            
            imgs.append(img)

            # Add labels to the dictionary
            boxes_dict = {}
            boxes_dict['boxes'] = torch.tensor([[headers.XMin[i+j] * w, 
                                                 headers.YMin[i+j] * h, 
                                                 headers.XMax[i+j] * w, 
                                                 headers.YMax[i+j] * h]]).cuda()
            boxes_dict['labels'] = torch.tensor([classes[headers.Class_Description[i+j]]]).cuda()

            label.append(boxes_dict)
        
        yield imgs, label

In [0]:
def get_train_accuracy(model,box_labels,img):
    model = model.cuda(0)
    model = model.eval()
    out = model(img)
    total = 0
    correct = 0

    with torch.no_grad():
        for i in range(len(out)):
            if out[i]['labels'].tolist()!=[]:
                find_box = 0

                if out[i]['labels'].tolist()[find_box]==box_labels[i]['labels'].tolist()[0]:
                    correct += 1

                resize_box = np.array(out[i]['boxes'][find_box].cuda(0).tolist())*0.375
                print(('\t\tPredicted {},{} |  expected {}, {}').format(out[i]['labels'][find_box].cuda(0),resize_box,box_labels[i]['labels'][0],box_labels[i]['boxes'][0].tolist()))
            else:
                print(('\t\tPredicted NONE |  expected {}, {}').format(box_labels[i]['labels'][0],box_labels[i]['boxes'][0].tolist()))
            total += 1

    model = model.train()
    return correct/total

def get_train_acc_end(model, batch_size=6):
    s_losses = []
    tr_idx = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        imported = sampleDataSet(filePath='clean-train-id.csv', batch_size=batch_size, normalize=True)
        
        for img, box in imported:
            box_cp = box[0].copy()
            model = model.eval()

            if model(img)[0]['labels'].tolist()!=[]:
                predict = model(img)[0]
                find_idx = 0

                for each in range(predict['labels'].shape[0]):
                    # print(predict['labels'].tolist())
                    # print(box_cp['labels'].tolist())

                    if predict['labels'].tolist()[each]==box_cp['labels'].tolist()[0]:
                        correct += 1
                        find_idx = each
                        break
                        
                out = predict['labels'][find_idx]
                resize_box = np.array(predict['boxes'][find_idx].tolist())*0.375 
                print(('\t\tPredicted {},{} | Expected {}, {}').format(out, resize_box, box_cp['labels'][0], box_cp['boxes'][0].tolist()))
            else:
                print(('\t\tPredicted NONE |  Expected {}, {}').format(box_cp['labels'][0], box_cp['boxes'][0].tolist()))

            total += 1
            print (('\tProcessing iteration {} | Training Acc: {}').format(tr_idx, correct/total))
            tr_idx += 1
    
    return correct/total
    
def get_val_loss_acc(val_model, batch_size=4):
    s_losses = []
    val_idx = 0
    correct = 0
    total = 0
    with torch.no_grad():
        val_model = val_model.train()    
        imported = dataloader(filePath='clean-validation-id.csv', batch_size=batch_size, normalize=True)
        
        for img, box in imported:
            box_cp = box[0].copy()
            loss_dict = val_model(img,box)
            losses = sum(loss for loss in loss_dict.values())
            s_losses.append(losses/batch_size)
            del loss_dict
            val_model = val_model.eval()

            if val_model(img)[0]['labels'].tolist()!=[]:
                predict =  val_model(img)[0]
                find_idx = 0

                for each in range(predict['labels'].shape[0]):
                    # print(predict['labels'].tolist())
                    # print(box_cp['labels'].tolist())

                    if predict['labels'].tolist()[each]==box_cp['labels'].tolist()[0]:
                        correct += 1
                        find_idx = each
                        break
                        
                out = predict['labels'][find_idx]
                resize_box = np.array(predict['boxes'][find_idx].tolist())*0.375 
                print(('\t\tPredicted {},{} | Expected {}, {}').format(out,resize_box,box_cp['labels'][0],box_cp['boxes'][0].tolist()))
            else:
                print(('\t\tPredicted NONE |  Expected {}, {}').format(box_cp['labels'][0],box_cp['boxes'][0].tolist()))

            total += 1
            print (('\tProcessing iteration {}... Val Loss: {} | Val Acc: {}').format(val_idx,s_losses[-1],correct/total))
            val_idx += 1
            val_model = val_model.train()

    return [sum(s_losses)/len(s_losses), correct/total]

In [0]:
# Training Code (from a checkpoint)
def train_net_continue(model, batch_size=4, num_epochs=50, learning_rate=0.0001,
                      weight_decay=0.0002, lr_decay=4, ep=0, ck=0):
    
    model_path = f"bs{batch_size}_lr{learning_rate}_epoch{ep}_checkpoint_{ck}"
    checkpoint = torch.load(os.path.join(path, 'faster-rcnn-checkpoints/')+ model_path+'.pth')

    start_time = time.time()
    torch.manual_seed(1000)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=learning_rate, weight_decay=weight_decay)

    # Load states from the checkpoint
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    model.load_state_dict(checkpoint['model_state_dict'])
    checkpoint_num = checkpoint['checkpoint_num']+1
    model = model.cuda(0)
    model = model.train()
    start_time = time.time()

    # Load values from the checkpoint
    saved_x = checkpoint['saved_x']
    saved_train_losses = checkpoint['saved_train_loss']
    saved_val_losses = checkpoint['saved_val_loss']
    train_acc = checkpoint['train_acc']
    val_acc = checkpoint['val_acc']

    del checkpoint

    # training
    saved_idx = 0                                                                        
    if saved_x!=[]:
        saved_idx = saved_x[-1]+1 

    iter_acc = []
    iter_loss = []

    print(f"\nval_acc: {val_acc[-1]}\nsaved_idx: {saved_idx}")

    for epoch in range(ep+1, num_epochs):
        file_idx = 0
        print(f"This is epoch: {epoch}")
        bar = tqdm.tqdm(total=31236//batch_size,desc='TRAINING',position=0)
        i = 0
        data = dataloader(batch_size=batch_size)
        correct = 0

        for batch_img, batch_box in data:
            bar.update(1)
            batch_box_cp = batch_box.copy()
            loss_dict = model(batch_img, batch_box)
            losses = sum(loss for loss in loss_dict.values())
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            iter_loss.append(float(losses)/batch_size)
            i += 1
            print (('{} Processing iteration {}... Train Loss: {}').format(file_idx,i,iter_loss[-1]))

            file_idx += 1
            if file_idx%1000==0:
                saved_x.append(saved_idx)
                saved_idx += 1
                saved_train_losses.append(sum(iter_loss)/len(iter_loss))
                train_acc.append(get_train_acc_end(model, batch_size=6))
                [current_val_loss,current_val_acc] = get_val_loss_acc(model, batch_size=6)
                saved_val_losses.append(current_val_loss)

                print ('-------------------------------------------------------------------------------------------')
                iter_loss = []
                val_acc.append(current_val_acc)  # compute validation accuracy

                print(("Epoch {} | Train Acc: {} | Validation acc: {}").format(epoch + 1, train_acc[-1], val_acc[-1]))
                print ('-------------------------------------------------------------------------------------------')
                model_path = "bs{0}_lr{1}_epoch{2}_checkpoint_{3}".format(batch_size,learning_rate,epoch,checkpoint_num)
                torch.save({
                    'checkpoint_num': checkpoint_num,
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'saved_train_loss': saved_train_losses,
                    'saved_val_loss':saved_val_losses,
                    'saved_x':saved_x,
                    'train_acc':train_acc,
                    'val_acc':val_acc
                    }, os.path.join(path, 'faster-rcnn-checkpoints/')+ model_path+'.pth')

                checkpoint_num += 1

    # plotting
    plt.title("Training vs Validation Loss")
    plt.plot(saved_x, saved_train_losses, label="Train")
    plt.plot(saved_x, saved_val_losses, label='Validation')
    plt.xlabel("Iterations")
    plt.ylabel("Loss")
    plt.legend(loc='best')
    plt.show()

    plt.title("Training vs Validation Accuracy")
    plt.plot(saved_x, val_acc, label="Validation")
    plt.xlabel("Iterations")
    plt.ylabel("Training Accuracy")
    plt.legend(loc='best')
    plt.show()

    end_time = time.time()
    duration = end_time - start_time

    if len(val_acc) != 0:
        print("Final Validation Accuracy: {}".format(val_acc[-1]))

    print ("Trained Duration: {} seconds".format(duration))

In [0]:
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 200),),aspect_ratios=((0.5, 1.0, 2.0),))
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],output_size=7,sampling_ratio=2)
rcnn1 = FasterRCNN(backbone,num_classes=32,rpn_anchor_generator=anchor_generator,box_roi_pool=roi_pooler).cuda(0)

In [0]:
train_net_continue(rcnn1, batch_size=6, num_epochs=10, learning_rate=0.0001, weight_decay=0.0001, ep=3, ck=17)

Output hidden; open in https://colab.research.google.com to view.