In [None]:
from __future__ import print_function, division
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.utils.data
from torch.utils.data import SubsetRandomSampler
from torch.optim import lr_scheduler
import torchvision
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torchvision import datasets, models
import matplotlib.pyplot as plt
import time
import os
import copy
import argparse
import random
import numpy as np
import torchnet as tnt
import torch.utils.data as data
from PIL import Image
import yaml
from engine import train_one_epoch, evaluate
plt.ion()
import math
import sys
import time
import torch


   
class Args():
    
    workers = 4
    batchSize = 64
    niter = 25
    lr = 0.001
    cuda = "cuda:0"
    ngpu = 3
    outf = '.'
    
class LabeledDataset(torch.utils.data.Dataset):
    def __init__(self, root, split, transforms): 
        """
        Args:
            root: Location of the dataset folder, usually it is /labeled
            split: The split you want to used, it should be training or validation
            transform: the transform you want to applied to the images.
        """

        self.split = split
        self.transforms = transforms

        self.image_dir = os.path.join(root,split, "images") 
        self.label_dir = os.path.join(root,split, "labels") 

        self.num_images = len(os.listdir(self.image_dir))

    def __len__(self):
        return self.num_images  

    def __getitem__(self, idx):
#         the idx of training image is from 1 to 30000
#         the idx of validation image is from 30001 to 50000

        if self.split == "training":
            offset = 1
        if self.split == "validation":
            offset = 30001

        with open(os.path.join(self.image_dir, f"{offset + idx}.JPEG"), "rb") as f:
            img = Image.open(f).convert("RGB")
        with open(os.path.join(self.label_dir, f"{offset + idx}.yml"), "rb") as f:
            yamlfile = yaml.load(f, Loader=yaml.FullLoader)

        num_objs = len(yamlfile["labels"])
        # xmin, ymin, xmax, ymax
        boxes = torch.as_tensor(yamlfile["bboxes"], dtype=torch.float32)
        labels = []
        for label in yamlfile["labels"]:
            labels.append(class_dict[label])
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

def collate_fn(batch):
    return tuple(zip(*batch))

def do_training(model, torch_dataset, torch_dataset_test, num_epochs=10):
    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(
        torch_dataset, batch_size= 16, shuffle=True, num_workers=4,
        collate_fn=collate_fn)
    
    data_loader_test = torch.utils.data.DataLoader(
        torch_dataset_test, batch_size= 16, shuffle=False, num_workers=4,
        collate_fn=collate_fn)

    # train on the GPU or on the CPU, if a GPU is not available
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print("Using device %s" % device)

    # move model to the right device
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.Adam(params, lr=0.001)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                    step_size=3,
                                                    gamma=0.1)

    for epoch in range(num_epochs):
        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=15)
        lr_scheduler.step()
        evaluate(model, data_loader_test, device=device)
        torch.save(model.state_dict(), '%s/faster_rcnn_epoch.pth' % (opt.outf))
    return model

class_dict = {
    "cup or mug": 0, "bird": 1, "hat with a wide brim": 2, "person": 3, "dog": 4, "lizard": 5, "sheep": 6, "wine bottle": 7,
    "bowl": 8, "airplane": 9, "domestic cat": 10, "car": 11, "porcupine": 12, "bear": 13, "tape player": 14, "ray": 15, "laptop": 16,
    "zebra": 17, "computer keyboard": 18, "pitcher": 19, "artichoke": 20, "tv or monitor": 21, "table": 22, "chair": 23,
    "helmet": 24, "traffic light": 25, "red panda": 26, "sunglasses": 27, "lamp": 28, "bicycle": 29, "backpack": 30, "mushroom": 31,
    "fox": 32, "otter": 33, "guitar": 34, "microphone": 35, "strawberry": 36, "stove": 37, "violin": 38, "bookshelf": 39,
    "sofa": 40, "bell pepper": 41, "bagel": 42, "lemon": 43, "orange": 44, "bench": 45, "piano": 46, "flower pot": 47, "butterfly": 48,
    "purse": 49, "pomegranate": 50, "train": 51, "drum": 52, "hippopotamus": 53, "ski": 54, "ladybug": 55, "banana": 56, "monkey": 57,
    "bus": 58, "miniskirt": 59, "camel": 60, "cream": 61, "lobster": 62, "seal": 63, "horse": 64, "cart": 65, "elephant": 66,
    "snake": 67, "fig": 68, "watercraft": 69, "apple": 70, "antelope": 71, "cattle": 72, "whale": 73, "coffee maker": 74, "baby bed": 75,
    "frog": 76, "bathing cap": 77, "crutch": 78, "koala bear": 79, "tie": 80, "dumbbell": 81, "tiger": 82, "dragonfly": 83, "goldfish": 84,
    "cucumber": 85, "turtle": 86, "harp": 87, "jellyfish": 88, "swine": 89, "pretzel": 90, "motorcycle": 91, "beaker": 92, "rabbit": 93,
    "nail": 94, "axe": 95, "salt or pepper shaker": 96, "croquet ball": 97, "skunk": 98, "starfish": 99,
}

def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=False)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

if __name__ == '__main__':
    
    VALID_DATASET_PATH = "/scratch/sr6172/DL/labeled_data/labeled_data"
    
    train_dataset = LabeledDataset(
        root=VALID_DATASET_PATH,
        split="training",
        transforms=  lambda x, y: (torchvision.transforms.functional.to_tensor(x), y),
    )
    
    valid_dataset = LabeledDataset(
        root=VALID_DATASET_PATH,
        split="validation",
        transforms=  lambda x, y: (torchvision.transforms.functional.to_tensor(x), y),
    )
    opt = Args()
    f = open("{}/training_logs_faster_rcnn_10_epochs_final.txt".format(opt.outf),"w+")
    device = torch.device("cuda:0" if opt.cuda else "cpu")
    f.write("using " + str(device) + "\n")
    f.flush()
    model_ft = nn.DataParallel(get_model(100))
    criterion = nn.CrossEntropyLoss()
    model = do_training(model_ft, train_dataset, valid_dataset, num_epochs=10)
    torch.save(model.state_dict(), '%s/faster_rcnn_10_epochs_final.pth' % (opt.outf))
    f.close()

In [None]:
torch.save(model, '%s/MOBILENET.pt' % (opt.outf))