### papers
- [Faster R-CNN](https://arxiv.org/pdf/1506.01497.pdf)
- [FPN](https://arxiv.org/pdf/1612.03144.pdf)

### some code is taken from 
- https://colab.research.google.com/github/pytorch/vision/blob/temp-tutorial/tutorials/torchvision_finetuning_instance_segmentation.ipynb#scrollTo=cFHKCvCTxiff

In [9]:
import unittest
import os
import sys
import pathlib
import urllib
import shutil
import re
import zipfile

import torch
# import torchvision
import torchvision.transforms as T
import matplotlib.pyplot as plt
from torchviz import make_dot, make_dot_from_trace

import numpy as np
import torch.utils.data
from PIL import Image

%load_ext autoreload
%autoreload 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Using device: cuda


In [2]:
from torchvision.utils import make_grid
from PIL import Image, ImageDraw

def draw_bbox(tensor, bboxes, fill=None, outline=None, width=1,):    
    im = T.ToPILImage()(tensor.cpu())
    for bbox in bboxes.cpu():
        ImageDraw.Draw(im).rectangle(np.array(bbox), fill=fill, outline=outline, width=width)
    return im

In [3]:

# import cs236781.plot as plot
# import utils.download
import myutils.download

DATA_DIR = pathlib.Path.home().joinpath('.pytorch-datasets')
DATA_URL = 'https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip'

_, dataset_dir = myutils.download.download_data(out_path=DATA_DIR, url=DATA_URL, extract=True, force=False)

from dataset import PennFudanDataset
import torchvision.transforms as T

tf = T.Compose([
    T.ToTensor(),
])

ds = PennFudanDataset(dataset_dir,  tf)

File C:\Users\Noam\.pytorch-datasets\PennFudanPed.zip exists, skipping download.
Extracting C:\Users\Noam\.pytorch-datasets\PennFudanPed.zip...
Extracted 516 to C:\Users\Noam\.pytorch-datasets\PennFudanPed


In [4]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)

# replace the classifier with a new one, that has
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.roi_heads

RoIHeads(
  (box_roi_pool): MultiScaleRoIAlign()
  (box_head): TwoMLPHead(
    (fc6): Linear(in_features=12544, out_features=1024, bias=True)
    (fc7): Linear(in_features=1024, out_features=1024, bias=True)
  )
  (box_predictor): FastRCNNPredictor(
    (cls_score): Linear(in_features=1024, out_features=2, bias=True)
    (bbox_pred): Linear(in_features=1024, out_features=8, bias=True)
  )
)

In [5]:
# Download TorchVision repo to use some files from
# references/detection
"""
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

pip install pycocotools-windows
"""

'\ngit clone https://github.com/pytorch/vision.git\ncd vision\ngit checkout v0.3.0\n\ncp references/detection/utils.py ../\ncp references/detection/transforms.py ../\ncp references/detection/coco_eval.py ../\ncp references/detection/engine.py ../\ncp references/detection/coco_utils.py ../\n\npip install pycocotools-windows\n'

In [6]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T

def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

# use our dataset and defined transformations
dataset = PennFudanDataset(dataset_dir, get_transform(train=True))
dataset_test = PennFudanDataset(dataset_dir, get_transform(train=False))

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [7]:

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
# model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [8]:
# let's train it for 10 epochs
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

KeyboardInterrupt: 