In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
import time
from PIL import Image
from google.colab.patches import cv2_imshow

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
import torchvision.models
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection import FasterRCNN

path = "/content/drive/My Drive/APS360 Project/v6-dataset/"

In [0]:
if torch.cuda.is_available():
    print("CUDA activated")
    use_cuda = True
    device = torch.device("cuda")
    
else:
    print("Using CPU")
    device = "cpu"

CUDA activated


In [0]:
# These ae the classes we will be training in our model

classes = ["Apple",
           "Bagel",
           "Banana",
           "Bread",
           "Broccoli",
           "Burrito",
           "Carrot",
           "Cheese",
           "Coffee",
           "Cookie",
           "Cucumber",
           "Egg (Food)",
           "French fries",
           "Grape",
           "Hamburger",
           "Hot dog",
           "Juice",
           "Lemon",
           "Lobster",
           "Muffin",
           "Orange",
           "Pancake",
           "Pasta",
           "Pear",
           "Pizza",
           "Potato",
           "Salad",
           "Sandwich",
           "Strawberry",
           "Taco",
           "Tomato",
           "Waffle"]

In [0]:
def dataloader(filePath='clean-train-id.csv', batch_size=8, normalize=True):
    
    # Load csv and shuffle
    headers = pd.read_csv(os.path.join(path, filePath))
    headers = headers.sample(frac=1)
    transform = transforms.ToTensor()
    if normalize:
        transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    
    # Make tensor of images, (labels and bbox)
    for i in range(0, len(headers.filePath), batch_size):
        imgs, label = [], []
        
        for j in range(batch_size):

            img = Image.open(headers.filePath[i+j]).convert('RGB')
            w, h = img.size
            img = transform(img).cuda()
            imgs.append(img)

            # Add labels to the dictionary
            boxes_dict = {}
            boxes_dict['boxes'] = torch.tensor([[headers.XMin[i+j] * w, 
                                                 headers.YMin[i+j] * h, 
                                                 headers.XMax[i+j] * w, 
                                                 headers.YMax[i+j] * h]]).cuda()
            boxes_dict['labels'] = torch.tensor([classes[headers.Class_Description[i+j]]]).cuda()

            label.append(boxes_dict)
        
        yield imgs, label

In [0]:
def get_train_acc_end(model, batch_size=6):
    s_losses = []
    tr_idx = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        imported = dataloader(filePath='clean-train-id.csv', batch_size=batch_size, normalize=True)
        
        for img, box in imported:
            if model(img)[0]['labels'].tolist()!=[]:
                predict = model(img)[0]
                find_idx = 0

                for each in range(predict['labels'].shape[0]):

                    if predict['labels'].tolist()[each]==box['labels'].tolist()[0]:
                        correct += 1
                        find_idx = each
                        break
                        
                out = predict['labels'][find_idx]
                resize_box = np.array(predict['boxes'][find_idx].tolist())

            total += 1
            tr_idx += 1
    
    return correct/total
    
def get_val_loss_acc(val_model, batch_size=4):
    s_losses = []
    val_idx = 0
    correct = 0
    total = 0
    with torch.no_grad():
        val_model = val_model.train()    
        imported = dataloader(filePath='clean-validation-id.csv', batch_size=batch_size, normalize=True)
        
        for img, box in imported:
            loss_dict = val_model(img,box)
            losses = sum(loss for loss in loss_dict.values())
            s_losses.append(losses/batch_size)
            del loss_dict
            val_model = val_model.eval()

            if val_model(img)[0]['labels'].tolist()!=[]:
                predict =  val_model(img)[0]
                find_idx = 0

                for each in range(predict['labels'].shape[0]):
                    if predict['labels'].tolist()[each]==box['labels'].tolist()[0]:
                        correct += 1
                        find_idx = each
                        break
                        
                out = predict['labels'][find_idx]
                resize_box = np.array(predict['boxes'][find_idx].tolist())

            total += 1
            val_idx += 1
            val_model = val_model.train()

    return [sum(s_losses)/len(s_losses), correct/total]

In [0]:
# Training Code (from a checkpoint)
def train_net_continue(model, batch_size=4, num_epochs=50, learning_rate=0.0001,
                      weight_decay=0.0002, lr_decay=4, ep=0, ck=0):
    
    model_path = f"bs{batch_size}_lr{learning_rate}_epoch{ep}_checkpoint_{ck}"
    checkpoint = torch.load(os.path.join(path, 'faster-rcnn-checkpoints/')+ model_path+'.pth')

    start_time = time.time()
    torch.manual_seed(1000)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=learning_rate, weight_decay=weight_decay)

    # Load states from the checkpoint
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    model.load_state_dict(checkpoint['model_state_dict'])
    checkpoint = checkpoint['checkpoint']+1
    model = model.cuda(0)
    model = model.train()
    start_time = time.time()
    del checkpoint                                                                     

    iter_acc = []
    iter_loss = []
    for epoch in range(ep+1, num_epochs):
        file_idx = 0
        data = dataloader(batch_size=batch_size)

        for batch_img, batch_box in data:
            batch_box_cp = batch_box.copy()
            loss_dict = model(batch_img, batch_box)
            losses = sum(loss for loss in loss_dict.values())
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            iter_loss.append(float(losses)/batch_size)
            i += 1
            print("\n")

            file_idx += 1
            if file_idx%1000==0:
                # Note: Compute accuracy and loss again at the checkpoint since the initial training was screwed up 
                model_path = "bs{0}_lr{1}_epoch{2}_checkpoint_{3}".format(batch_size,learning_rate,epoch,checkpoint)
                torch.save({
                    'checkpoint': checkpoint,
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': iter_loss,
                    }, os.path.join(path, 'faster-rcnn-checkpoints/')+ model_path+'.pth')

                checkpoint += 1

In [0]:
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 200),), aspect_ratios=((0.5, 1.0, 2.0),)) # follow documentation
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280 # documentation
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2)
rcnn1 = FasterRCNN(backbone,num_classes=32,rpn_anchor_generator=anchor_generator,box_roi_pool=roi_pooler).cuda(0)

In [0]:
train_net_continue(rcnn1, batch_size=6, num_epochs=10, learning_rate=0.0001, 
                   weight_decay=0.0001, ep=5, ck=28)

# Note: Checkpoints have been deleted since they were taking too much
#       memory. Only the model checkpoint used for testing, and the last epoch
#       were saved