In [1]:
!pip uninstall torch -y
!pip uninstall torchvision -y
!pip install torch==2.1.0
!pip install torchvision==0.16.0

import torch
import torchvision
import os
import matplotlib.pyplot as plt

from ochumanApi.ochuman import Poly2Mask
from torch.utils.data import Dataset
from torchvision import tv_tensors
from ochumanApi.ochuman import OCHuman

%matplotlib inline
plt.rcParams['figure.figsize'] = (15, 15)
torch.manual_seed(1)

Found existing installation: torch 2.1.0
Uninstalling torch-2.1.0:
  Successfully uninstalled torch-2.1.0
Found existing installation: torchvision 0.16.0
Uninstalling torchvision-0.16.0:
  Successfully uninstalled torchvision-0.16.0
Collecting torch==2.1.0
  Obtaining dependency information for torch==2.1.0 from https://files.pythonhosted.org/packages/4c/53/f1e58e147df8601c963df4b15045631f7e3d3caa5973bdf4e54a5cf6834e/torch-2.1.0-cp39-none-macosx_11_0_arm64.whl.metadata
  Using cached torch-2.1.0-cp39-none-macosx_11_0_arm64.whl.metadata (24 kB)
Using cached torch-2.1.0-cp39-none-macosx_11_0_arm64.whl (59.5 MB)
Installing collected packages: torch
Successfully installed torch-2.1.0
Collecting torchvision==0.16.0
  Obtaining dependency information for torchvision==0.16.0 from https://files.pythonhosted.org/packages/7d/fd/9c2b3d0200532dc4a6211ef0fcf78c0556a27e3b03800333d4caa32bedc5/torchvision-0.16.0-cp39-cp39-macosx_11_0_arm64.whl.metadata
  Using cached torchvision-0.16.0-c

<torch._C.Generator at 0x108dc01b0>

In [2]:
import numpy as np 

from torchvision.io import read_image, ImageReadMode

class OCHumanDataset(Dataset):
    def __init__(self, image_root: str, oc_human: OCHuman):
        self.root = image_root
        self.data = oc_human.loadImgs(imgIds=oc_human.getImgIds())
        self.images, self.masks, self.bounding_boxes = self.__get_properties(oc_human_data=self.data)
        self.__getitem__(2)
        
    def __getitem__(self, index):
        image = self.images[index]/255
        masks = self.masks[index]
        masks = torch.as_tensor(masks, dtype=torch.uint8)
        
        number_of_objects = len(masks)
        
        labels = torch.ones(number_of_objects, dtype=torch.int64)
        
        bounding_boxes = torch.as_tensor(self.bounding_boxes[index][:])
        area = torch.as_tensor(self.__get_areas(index))
        
        image_id = index
        
        is_crowd = torch.zeros(number_of_objects, dtype=torch.int64)
        
        target = {'boxes': bounding_boxes,
                  'masks': masks, 'labels': labels, 'image_id': image_id, 'area': area,
                  'iscrowd': is_crowd}
        return image, target
    
    def __len__(self):
        return len(self.images)
    
    def __get_properties(self, oc_human_data):
        images = []
        image_masks = []
        bounding_boxes = []
        for file in oc_human_data:
            images.append(read_image(os.path.join(self.root, file['file_name']), mode=ImageReadMode.RGB))
            image_masks.append(self.__get_binary_masks(file=file))
            bounding_boxes.append(self.__get_bounding_boxes(file=file))
        return images, image_masks, bounding_boxes
    
    @staticmethod
    def __get_binary_masks(file):
        masks = []
        for annotation in file['annotations']:
            segmentation = annotation['segms']
            if segmentation is not None:
                masks.append(Poly2Mask(segmentation))
        return masks
        
    @staticmethod
    def __get_bounding_boxes(file):
        bounding_boxes = []
        for annotation in file['annotations']:
            bounding_box = annotation['bbox']
            if bounding_box is not None:
                bounding_boxes.append(bounding_box)
        return bounding_boxes
    
    def __get_areas(self, index):
        bounding_boxes = self.bounding_boxes[index][:][:]
        areas = []
        for box in bounding_boxes:
            areas.append((box[3] - box[1]) * (box[2] - box[0]))
        return np.array(areas)

In [4]:
import utils

from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from engine import train_one_epoch, evaluate
from tqdm import trange
from datetime import datetime

import torch._dynamo
torch._dynamo.config.suppress_errors = True

ochuman = OCHuman(AnnoFile='./ochuman.json', Filter='kpt&segm')

dataset = OCHumanDataset(image_root='./images/', oc_human=ochuman)
dataset_test = OCHumanDataset(image_root='./images/', oc_human=ochuman)

indices = torch.randperm(len(dataset)).tolist()
dataset_train = torch.utils.data.Subset(dataset, indices[:200])
dataset_test = torch.utils.data.Subset(dataset_test, indices[201:251])

data_loader = torch.utils.data.DataLoader(
    dataset_train,
    batch_size=4,
    shuffle=True,
    collate_fn=utils.collate_fn
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=1,
    shuffle=False,
    collate_fn=utils.collate_fn
)

model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights='DEFAULT')

number_of_classes = 2

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, number_of_classes)

in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
    
model.roi_heads.mask_predictor = MaskRCNNPredictor(
    in_features_mask,
    hidden_layer,
    number_of_classes,
)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.compile()
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]

num_epochs = 2

optimizer = torch.optim.SGD(
    params,
    lr=0.005,
    momentum=0.9,
    weight_decay=0.0005
)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=3,
    gamma=0.1
)

def get_current_time():
    now = datetime.now()
    now = now.strftime("%b-%d-%Y %H:%M:%S")
    return now

def save_model(current_model, time, directory):
    model_scripted = torch.jit.script(current_model) 
    model_scripted.save(directory + '/checkpoint ' + time + '.pt')
    
filename = "run " + get_current_time()
run_directory = "runs/" + filename
os.mkdir(run_directory)

for epoch in trange(num_epochs, desc='Training Epoch'):
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    save_model(model, get_current_time(), run_directory)
    lr_scheduler.step()
    evaluate(model, data_loader_test, device=device)

Training Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch: [0]  [ 0/50]  eta: 2:10:24  lr: 0.000107  loss: 2.6470 (2.6470)  loss_classifier: 0.6061 (0.6061)  loss_box_reg: 0.1605 (0.1605)  loss_mask: 1.8647 (1.8647)  loss_objectness: 0.0112 (0.0112)  loss_rpn_box_reg: 0.0046 (0.0046)  time: 156.4876  data: 0.3110
Epoch: [0]  [10/50]  eta: 0:49:06  lr: 0.001126  loss: 2.0396 (1.8897)  loss_classifier: 0.4635 (0.4190)  loss_box_reg: 0.1315 (0.1390)  loss_mask: 1.3813 (1.2967)  loss_objectness: 0.0225 (0.0207)  loss_rpn_box_reg: 0.0117 (0.0143)  time: 73.6562  data: 0.3222




Epoch: [0]  [20/50]  eta: 0:27:19  lr: 0.002146  loss: 1.0801 (1.4711)  loss_classifier: 0.1262 (0.2714)  loss_box_reg: 0.1358 (0.1380)  loss_mask: 0.7860 (1.0313)  loss_objectness: 0.0187 (0.0193)  loss_rpn_box_reg: 0.0073 (0.0112)  time: 49.5455  data: 0.3152
Epoch: [0]  [30/50]  eta: 0:15:28  lr: 0.003165  loss: 0.9399 (1.2784)  loss_classifier: 0.0975 (0.2095)  loss_box_reg: 0.1319 (0.1323)  loss_mask: 0.6764 (0.9061)  loss_objectness: 0.0149 (0.0176)  loss_rpn_box_reg: 0.0077 (0.0128)  time: 31.4182  data: 0.3002
Epoch: [0]  [40/50]  eta: 0:06:59  lr: 0.004184  loss: 0.7669 (1.1343)  loss_classifier: 0.0612 (0.1706)  loss_box_reg: 0.1196 (0.1292)  loss_mask: 0.5352 (0.8073)  loss_objectness: 0.0111 (0.0156)  loss_rpn_box_reg: 0.0094 (0.0116)  time: 28.6866  data: 0.2936
Epoch: [0]  [49/50]  eta: 0:00:39  lr: 0.005000  loss: 0.6794 (1.0465)  loss_classifier: 0.0447 (0.1471)  loss_box_reg: 0.1215 (0.1279)  loss_mask: 0.4809 (0.7449)  loss_objectness: 0.0095 (0.0146)  loss_rpn_box_re



creating index...
index created!




Test:  [ 0/50]  eta: 0:20:58  model_time: 25.0882 (25.0882)  evaluator_time: 0.0140 (0.0140)  time: 25.1705  data: 0.0683


Training Epoch:  50%|█████     | 1/2 [42:14<42:14, 2534.57s/it]

Test:  [49/50]  eta: 0:00:04  model_time: 2.5637 (4.4011)  evaluator_time: 0.0165 (0.0178)  time: 3.2228  data: 0.0601
Test: Total time: 0:03:44 (4.4813 s / it)
Averaged stats: model_time: 2.5637 (4.4011)  evaluator_time: 0.0165 (0.0178)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.338
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.704
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.271
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.338
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.340
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.520
 



creating index...
index created!
Test:  [ 0/50]  eta: 0:02:21  model_time: 2.7487 (2.7487)  evaluator_time: 0.0106 (0.0106)  time: 2.8269  data: 0.0675


Training Epoch: 100%|██████████| 2/2 [1:10:49<00:00, 2124.70s/it]

Test:  [49/50]  eta: 0:00:01  model_time: 1.5203 (1.8587)  evaluator_time: 0.0081 (0.0104)  time: 1.6710  data: 0.0609
Test: Total time: 0:01:36 (1.9314 s / it)
Averaged stats: model_time: 1.5203 (1.8587)  evaluator_time: 0.0081 (0.0104)
Accumulating evaluation results...
DONE (t=0.00s).
Accumulating evaluation results...
DONE (t=0.00s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.354
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.726
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.329
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.354
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.369
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.499
 


