In [2]:
# !pip install pascal-voc-writer

Collecting pascal-voc-writer
  Downloading pascal_voc_writer-0.1.4-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: pascal-voc-writer
Successfully installed pascal-voc-writer-0.1.4


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import cv2 as cv
import numpy
import torch
import torchvision
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
import pandas as pd

In [3]:
# Load Dataset 
import collections
from PIL import Image
from torchvision.transforms import transforms as T

class PhoneDataset(Dataset):
    def __init__ (self, root, transform = None):
        self.root = root
        self.transform = transform
        self.images = list(sorted([file for file in os.listdir(root) if file.endswith('.jpg')]))
        self.annotations = self.generateAnnotFiles(self.root)
    
    def __getitem__(self, idx):
        image_path = os.path.join(self.root, self.images[idx])
        img = Image.open(image_path).convert("RGB")
        img = T.ToTensor()(img)

        boxes = self.annotations[self.images[idx]]
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        boxes = boxes.unsqueeze(0)
        # print("check: ", boxes.shape)
        
        labels = torch.ones((len(boxes),), dtype= torch.int64)
        area = (boxes[:,3] - boxes [:,1]) * (boxes[:,2] - boxes[:,0])
        iscrowd = torch.zeros((boxes.shape[0], ), dtype= torch.int64)
        image_id = int(self.images[idx][:-4])

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['area'] = area
        target['iscrowd'] = iscrowd
        target['image_id'] = torch.as_tensor(image_id, dtype = torch.int64)
        # print("verify: ",target)
        return img, target

    def __len__(self):
        return len(self.images)

    def getDatasetInfo(self, foldername):
        file = open(foldername + '/labels.txt', 'r')
        lines = file.read().splitlines()
        obj_coordinates = {}
        for line in lines:
            label = line.split(' ')
            obj_coordinates[label[0]] = {'coordinates' : (float(label[1]), float(label[2]))}
        for file in os.listdir(foldername):
            if file.endswith('.jpg'):
                # print(file)
                img = cv.imread(foldername + "/" +file)
                h, w, c = img.shape
                obj_coordinates[file]['size'] = (h,w)
        return obj_coordinates

    def getPixelfromCoordsinX(self, img_w, x):
        pix_x = img_w * x
        return round(pix_x)

    def getPixelfromCoordsinY(self, img_h, y):
        pix_y = img_h * y
        return round(pix_y)

    def generateAnnotFiles(self, foldername):
        dataset_info = self.getDatasetInfo(foldername)
        dict = {}
        for file in os.listdir(foldername):
            if file.endswith('.jpg'):
                h, w = dataset_info[file]['size']
                c1, c2 = dataset_info[file]['coordinates']
                # print("check:  ", file, ": ", (c1, c2),": ",(c1*w, c2*h))
                pix_x_min = self.getPixelfromCoordsinX(w, c1 - 0.05)
                pix_y_min = self.getPixelfromCoordsinY(h, c2 - 0.05)
                pix_x_max = self.getPixelfromCoordsinX(w, c1 + 0.05)
                pix_y_max = self.getPixelfromCoordsinY(h, c2 + 0.05)
                dict[file] = [pix_x_min, pix_y_min, pix_x_max, pix_y_max]
        
        return dict

In [None]:
# %cd ..

/Users/krmayank/Documents/Internship/Companies/Brain Corp/find_phone_task_4


In [4]:
from sklearn.model_selection import train_test_split
# PATH = os.path.join(os.getcwd(), "find_phone/")
PATH = '/content/drive/MyDrive/University of Washington - Seattle/Internship/BrainCorp/find_phone_task_4/find_phone'
complete_dataset = PhoneDataset(PATH)
train_dataset, val_dataset = train_test_split(complete_dataset, test_size=0.2, shuffle=True, random_state=43)

In [5]:
complete_dataset.__getitem__(0)

(tensor([[[0.4784, 0.4824, 0.5059,  ..., 0.6824, 0.6863, 0.6863],
          [0.4745, 0.4824, 0.5020,  ..., 0.6824, 0.6745, 0.6549],
          [0.4863, 0.4980, 0.5137,  ..., 0.6745, 0.6941, 0.6902],
          ...,
          [0.4745, 0.4863, 0.5059,  ..., 0.6824, 0.6627, 0.6549],
          [0.4667, 0.4784, 0.5059,  ..., 0.6824, 0.6667, 0.6627],
          [0.4588, 0.4588, 0.4824,  ..., 0.6706, 0.6863, 0.6824]],
 
         [[0.4627, 0.4667, 0.4902,  ..., 0.6667, 0.6745, 0.6745],
          [0.4588, 0.4667, 0.4863,  ..., 0.6667, 0.6627, 0.6431],
          [0.4706, 0.4824, 0.4980,  ..., 0.6588, 0.6824, 0.6784],
          ...,
          [0.4549, 0.4667, 0.4863,  ..., 0.6667, 0.6471, 0.6392],
          [0.4471, 0.4588, 0.4863,  ..., 0.6667, 0.6510, 0.6471],
          [0.4392, 0.4392, 0.4627,  ..., 0.6549, 0.6706, 0.6667]],
 
         [[0.4510, 0.4549, 0.4784,  ..., 0.6549, 0.6549, 0.6549],
          [0.4471, 0.4549, 0.4745,  ..., 0.6549, 0.6431, 0.6235],
          [0.4588, 0.4706, 0.4863,  ...,

In [6]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [None]:
# !pip install pycocotools

Collecting pycocotools
  Downloading pycocotools-2.0.4.tar.gz (106 kB)
[K     |████████████████████████████████| 106 kB 4.2 MB/s eta 0:00:01
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (PEP 517) ... [?25ldone
[?25h  Created wheel for pycocotools: filename=pycocotools-2.0.4-cp39-cp39-macosx_10_9_x86_64.whl size=85974 sha256=772697e896356d403dc2c4015abebd2df53dcf43882326ad8eb28644e3836318
  Stored in directory: /Users/krmayank/Library/Caches/pip/wheels/7e/b0/8e/f2c3593944ead79f5146d057d1310ee6d7b60d30b826779846
Successfully built pycocotools
Installing collected packages: pycocotools
Successfully installed pycocotools-2.0.4


In [9]:
# !git clone https://github.com/pytorch/vision.git
# !cd vision
# !git checkout v0.8.2

Cloning into 'vision'...
remote: Enumerating objects: 118542, done.[K
remote: Counting objects: 100% (11248/11248), done.[K
remote: Compressing objects: 100% (980/980), done.[K
remote: Total 118542 (delta 10347), reused 11007 (delta 10212), pack-reused 107294[K
Receiving objects: 100% (118542/118542), 232.41 MiB | 32.73 MiB/s, done.
Resolving deltas: 100% (102991/102991), done.
fatal: not a git repository (or any of the parent directories): .git


In [None]:
# !ls
# %cd ./vision
# !ls
# !cp references/detection/utils.py ../
# !cp references/detection/transforms.py ../
# !cp references/detection/coco_eval.py ../
# !cp references/detection/engine.py ../
# !cp references/detection/coco_utils.py ../

CMakeLists.txt     [34mcmake[m[m              [34mpackaging[m[m          [34mtorchvision[m[m
CODE_OF_CONDUCT.md [34mdocs[m[m               [34mreferences[m[m         tox.ini
LICENSE            [34mexamples[m[m           setup.cfg          [34mtravis-scripts[m[m
MANIFEST.in        hubconf.py         setup.py           [34mvision[m[m
README.rst         mypy.ini           [34mtest[m[m
/Users/krmayank/Documents/Internship/Companies/Brain Corp/find_phone_task_4/vision/vision
CMakeLists.txt         [34mdocs[m[m                   [34mreferences[m[m
CODE_OF_CONDUCT.md     [34mexamples[m[m               [34mscripts[m[m
CONTRIBUTING.md        [34mgallery[m[m                setup.cfg
CONTRIBUTING_MODELS.md hubconf.py             setup.py
LICENSE                [34mios[m[m                    [34mtest[m[m
MANIFEST.in            mypy.ini               [34mtorchvision[m[m
README.rst             [34mpackaging[m[m              version.txt
[34mandro

In [10]:
### this is from pytorch tutorial that includes MASK RCNN as well.
import sys
sys.path.append('/content/drive/MyDrive/University of Washington - Seattle/Internship/BrainCorp/find_phone_task_4')
from engine import train_one_epoch, evaluate
import utils
from tqdm import tqdm

def train(model, full_dataset, num_epochs = 10):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
    dataset, dataset_test = train_test_split(full_dataset, test_size=0.2, shuffle=True, random_state=43)
    
    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, 
                                                num_workers=4,collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=False, 
                                                num_workers=4,collate_fn=utils.collate_fn)

    model.to(device) # send model to appropriate device

    # Optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
    
    # Schedular
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

    for epoch in tqdm(range(num_epochs)):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
        lr_scheduler.step() # update the learning rate
        evaluate(model, data_loader_test, device=device) # evaluate on the test dataset

    print("Training  and Evaluation Completed!!")

In [11]:
train(model, complete_dataset,num_epochs = 10)

  cpuset_checked))
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch: [0]  [ 0/52]  eta: 0:01:45  lr: 0.000103  loss: 0.5883 (0.5883)  loss_classifier: 0.4120 (0.4120)  loss_box_reg: 0.1385 (0.1385)  loss_objectness: 0.0358 (0.0358)  loss_rpn_box_reg: 0.0020 (0.0020)  time: 2.0236  data: 0.1886  max mem: 2335
Epoch: [0]  [10/52]  eta: 0:01:02  lr: 0.001082  loss: 0.3729 (0.4082)  loss_classifier: 0.2167 (0.2478)  loss_box_reg: 0.1343 (0.1411)  loss_objectness: 0.0170 (0.0173)  loss_rpn_box_reg: 0.0020 (0.0020)  time: 1.4858  data: 0.0244  max mem: 2601
Epoch: [0]  [20/52]  eta: 0:00:47  lr: 0.002062  loss: 0.2869 (0.3437)  loss_classifier: 0.1026 (0.1689)  loss_box_reg: 0.1652 (0.1618)  loss_objectness: 0.0044 (0.0108)  loss_rpn_box_reg: 0.0021 (0.0022)  time: 1.4594  data: 0.0092  max mem: 2601
Epoch: [0]  [30/52]  eta: 0:00:32  lr: 0.003041  loss: 0.2357 (0.3025)  loss_classifier: 0.0527 (0.1287)  loss_box_reg: 0.1701 (0.1639)  loss_objectness: 0.0015 (0.0077)  loss_rpn_box_reg: 0.0021 (0.0022)  time: 1.4533  data: 0.0092  max mem: 2601
Epoch: [

 10%|█         | 1/10 [01:24<12:41, 84.59s/it]

Test:  [25/26]  eta: 0:00:00  model_time: 0.3446 (0.3517)  evaluator_time: 0.0012 (0.0014)  time: 0.3618  data: 0.0041  max mem: 2601
Test: Total time: 0:00:09 (0.3678 s / it)
Averaged stats: model_time: 0.3446 (0.3517)  evaluator_time: 0.0012 (0.0014)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.570
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.542
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.570
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.619
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.619
 Average Recall     (AR) @[ IoU=0.50:

 20%|██        | 2/10 [02:48<11:15, 84.40s/it]

Test:  [25/26]  eta: 0:00:00  model_time: 0.3407 (0.3423)  evaluator_time: 0.0011 (0.0012)  time: 0.3484  data: 0.0040  max mem: 2601
Test: Total time: 0:00:09 (0.3582 s / it)
Averaged stats: model_time: 0.3407 (0.3423)  evaluator_time: 0.0011 (0.0012)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.597
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.674
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.597
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.658
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.658
 Average Recall     (AR) @[ IoU=0.50:

 30%|███       | 3/10 [04:12<09:49, 84.24s/it]

Test:  [25/26]  eta: 0:00:00  model_time: 0.3436 (0.3435)  evaluator_time: 0.0011 (0.0012)  time: 0.3501  data: 0.0040  max mem: 2601
Test: Total time: 0:00:09 (0.3594 s / it)
Averaged stats: model_time: 0.3436 (0.3435)  evaluator_time: 0.0011 (0.0012)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.754
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.754
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.792
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.792
 Average Recall     (AR) @[ IoU=0.50:

 40%|████      | 4/10 [05:37<08:25, 84.24s/it]

Test:  [25/26]  eta: 0:00:00  model_time: 0.3429 (0.3431)  evaluator_time: 0.0011 (0.0012)  time: 0.3499  data: 0.0049  max mem: 2601
Test: Total time: 0:00:09 (0.3608 s / it)
Averaged stats: model_time: 0.3429 (0.3431)  evaluator_time: 0.0011 (0.0012)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.778
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.940
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.778
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.819
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.819
 Average Recall     (AR) @[ IoU=0.50:

 50%|█████     | 5/10 [07:01<07:00, 84.15s/it]

Test:  [25/26]  eta: 0:00:00  model_time: 0.3439 (0.3430)  evaluator_time: 0.0010 (0.0011)  time: 0.3498  data: 0.0040  max mem: 2601
Test: Total time: 0:00:09 (0.3585 s / it)
Averaged stats: model_time: 0.3439 (0.3430)  evaluator_time: 0.0010 (0.0011)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.793
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.793
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.831
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.831
 Average Recall     (AR) @[ IoU=0.50:

 60%|██████    | 6/10 [08:25<05:36, 84.12s/it]

Test:  [25/26]  eta: 0:00:00  model_time: 0.3410 (0.3424)  evaluator_time: 0.0010 (0.0011)  time: 0.3484  data: 0.0038  max mem: 2601
Test: Total time: 0:00:09 (0.3586 s / it)
Averaged stats: model_time: 0.3410 (0.3424)  evaluator_time: 0.0010 (0.0011)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.812
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.941
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.812
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.842
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.842
 Average Recall     (AR) @[ IoU=0.50:

 70%|███████   | 7/10 [09:49<04:12, 84.08s/it]

Test:  [25/26]  eta: 0:00:00  model_time: 0.3429 (0.3424)  evaluator_time: 0.0011 (0.0012)  time: 0.3494  data: 0.0039  max mem: 2601
Test: Total time: 0:00:09 (0.3588 s / it)
Averaged stats: model_time: 0.3429 (0.3424)  evaluator_time: 0.0011 (0.0012)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.808
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.941
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.808
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.838
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.838
 Average Recall     (AR) @[ IoU=0.50:

 80%|████████  | 8/10 [11:13<02:48, 84.18s/it]

Test:  [25/26]  eta: 0:00:00  model_time: 0.3464 (0.3464)  evaluator_time: 0.0010 (0.0012)  time: 0.3535  data: 0.0040  max mem: 2601
Test: Total time: 0:00:09 (0.3623 s / it)
Averaged stats: model_time: 0.3464 (0.3464)  evaluator_time: 0.0010 (0.0012)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.812
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.946
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.812
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.842
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.842
 Average Recall     (AR) @[ IoU=0.50:

 90%|█████████ | 9/10 [12:38<01:24, 84.42s/it]

Test:  [25/26]  eta: 0:00:00  model_time: 0.3460 (0.3450)  evaluator_time: 0.0010 (0.0012)  time: 0.3521  data: 0.0040  max mem: 2601
Test: Total time: 0:00:09 (0.3613 s / it)
Averaged stats: model_time: 0.3460 (0.3450)  evaluator_time: 0.0010 (0.0012)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.809
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.947
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.809
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.838
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.838
 Average Recall     (AR) @[ IoU=0.50:

100%|██████████| 10/10 [14:03<00:00, 84.34s/it]

Test:  [25/26]  eta: 0:00:00  model_time: 0.3479 (0.3467)  evaluator_time: 0.0011 (0.0012)  time: 0.3534  data: 0.0039  max mem: 2601
Test: Total time: 0:00:09 (0.3629 s / it)
Averaged stats: model_time: 0.3479 (0.3467)  evaluator_time: 0.0011 (0.0012)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.809
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.947
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.809
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.838
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.838
 Average Recall     (AR) @[ IoU=0.50:




In [14]:
## save model
PATH = "/content/drive/MyDrive/University of Washington - Seattle/Internship/BrainCorp/find_phone_task_4/trained_model/FasterRCNN_10EPOCHS.pth"
torch.save(model.state_dict(), PATH)

In [17]:
dataset, dataset_test = train_test_split(complete_dataset, test_size=0.2, shuffle=True, random_state=43)
img, _ = dataset_test[0]
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

In [19]:
prediction

[{'boxes': tensor([[407.9182, 125.2831, 456.6463, 158.6404]], device='cuda:0'),
  'labels': tensor([1], device='cuda:0'),
  'scores': tensor([0.9943], device='cuda:0')}]

In [65]:
predictionBox = prediction[0]['boxes'].tolist()[0]
predictionBox

[407.91815185546875, 125.28311920166016, 456.6462707519531, 158.64035034179688]

In [101]:
img.shape

torch.Size([3, 326, 490])

In [85]:
def getCenter(boxes):
  centerX = ( round(boxes[0]) + round(boxes[2]) ) /(2*490)
  centerY = ( round(boxes[1]) + round(boxes[3]) ) /(2*326)
  return round(centerX,4), round(centerY,4)
getCenter(predictionBox)

(0.8827, 0.4356)

In [35]:
complete_dataset[0]

(tensor([[[0.4784, 0.4824, 0.5059,  ..., 0.6824, 0.6863, 0.6863],
          [0.4745, 0.4824, 0.5020,  ..., 0.6824, 0.6745, 0.6549],
          [0.4863, 0.4980, 0.5137,  ..., 0.6745, 0.6941, 0.6902],
          ...,
          [0.4745, 0.4863, 0.5059,  ..., 0.6824, 0.6627, 0.6549],
          [0.4667, 0.4784, 0.5059,  ..., 0.6824, 0.6667, 0.6627],
          [0.4588, 0.4588, 0.4824,  ..., 0.6706, 0.6863, 0.6824]],
 
         [[0.4627, 0.4667, 0.4902,  ..., 0.6667, 0.6745, 0.6745],
          [0.4588, 0.4667, 0.4863,  ..., 0.6667, 0.6627, 0.6431],
          [0.4706, 0.4824, 0.4980,  ..., 0.6588, 0.6824, 0.6784],
          ...,
          [0.4549, 0.4667, 0.4863,  ..., 0.6667, 0.6471, 0.6392],
          [0.4471, 0.4588, 0.4863,  ..., 0.6667, 0.6510, 0.6471],
          [0.4392, 0.4392, 0.4627,  ..., 0.6549, 0.6706, 0.6667]],
 
         [[0.4510, 0.4549, 0.4784,  ..., 0.6549, 0.6549, 0.6549],
          [0.4471, 0.4549, 0.4745,  ..., 0.6549, 0.6431, 0.6235],
          [0.4588, 0.4706, 0.4863,  ...,

In [86]:
# evaluate in complete dataset
def ReadCordinates():
    foldername = '/content/drive/MyDrive/University of Washington - Seattle/Internship/BrainCorp/find_phone_task_4/find_phone'
    file = open(foldername + '/labels.txt', 'r')
    lines = file.read().splitlines()
    obj_coordinates = {}
    for line in lines:
        label = line.split(' ')
        obj_coordinates[int(label[0].replace('.jpg',""))] = {'coordinates' : (float(label[1]), float(label[2]))}
    return obj_coordinates

def getFullPrediction():
  prediction_df = pd.DataFrame(columns=['file_index','ActualX','ActualY','PredictX', 'PredictY'])
  centerX, centerY, img_ID = [],[],[]
  actualX, actualY = [],[]
  allCordinates = ReadCordinates()
  for i in range(len(complete_dataset)):
    img, other = complete_dataset[i]
    model.eval()
    with torch.no_grad():
      prediction = model([img.to(device)])
      predictionBox = prediction[0]['boxes'].tolist()[0]
      tempX, tempY = getCenter(predictionBox)
      tempActualX, tempActualY = allCordinates[other['image_id'].tolist()]['coordinates']
      centerX.append(tempX)
      centerY.append(tempY)
      img_ID.append(other['image_id'].tolist())
      actualX.append(tempActualX)
      actualY.append(tempActualY)

  prediction_df['file_index'] = img_ID
  prediction_df['PredictX'] = centerX
  prediction_df['PredictY'] = centerY
  prediction_df['ActualY'] = actualY
  prediction_df['ActualX'] = actualX
  
  return prediction_df

In [87]:
final_prediction = getFullPrediction()
final_prediction.head(10)
# 1.jpg 0.8714 0.1718
# 100.jpg 0.8204 0.8558

Unnamed: 0,file_index,ActualX,ActualY,PredictX,PredictY
0,0,0.8306,0.135,0.8316,0.1334
1,1,0.8714,0.1718,0.8684,0.1687
2,10,0.4918,0.4356,0.4888,0.4233
3,100,0.8204,0.8558,0.8163,0.8558
4,101,0.2,0.6503,0.2,0.6518
5,102,0.4551,0.6258,0.4531,0.6258
6,103,0.249,0.7699,0.2459,0.7715
7,104,0.7388,0.2546,0.7398,0.2531
8,105,0.5776,0.8436,0.5765,0.8436
9,106,0.6694,0.3804,0.6673,0.3773


In [89]:
import numpy as np
final_prediction['diffDistance'] = np.sqrt((final_prediction['ActualX']-final_prediction['PredictX'])**2 +
                                   (final_prediction['ActualY']-final_prediction['PredictY'])**2)

In [94]:
final_prediction['Outcome'] = final_prediction['diffDistance'] <= 0.05

In [96]:
final_prediction['Outcome'].sum()

129

In [100]:
outcomePATH = '/content/drive/MyDrive/University of Washington - Seattle/Internship/BrainCorp/find_phone_task_4/outcomes/Outcome_all_Images.csv'
final_prediction.to_csv(outcomePATH)

In [116]:
imgPATH = '/content/drive/MyDrive/University of Washington - Seattle/Internship/BrainCorp/find_phone_task_4/find_phone/0.jpg'
img = cv.imread(imgPATH)
img.shape, img.shape[0]

((326, 490, 3), 326)

In [114]:
img = torch.from_numpy(img)
img = img.permute(2,0,1)
img.shape

torch.Size([3, 326, 490])

In [131]:
def ImageToTensor(imgPATH): 
    img = Image.open(imgPATH).convert("RGB")
    img = T.ToTensor()(img)
    # print(img.shape)
    return img

def getCenter(prediction, XLen, YLen):
    prediction = prediction[0]['boxes'].tolist()[0]
    centerX = ( prediction[0] + prediction[2] )/(2 * XLen)
    centerY = ( prediction[1] + prediction[3] )/(2 * YLen)
    return round(centerX,4), round(centerY,4)

def getPrediction(imgPATH, model):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    img = ImageToTensor(imgPATH)
    with torch.no_grad():
        prediction = model([img.to(device)])
    # print(img.shape[1], img.shape[2])
    return getCenter(prediction, img.shape[2], img.shape[1])
    

In [132]:
getPrediction(imgPATH, model)

torch.Size([3, 326, 490])


(0.831, 0.1344)

In [133]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]