<a href="https://colab.research.google.com/github/pauljohn99/ML-learnings/blob/main/mouthanotation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pycocotools --quiet
!git clone https://github.com/pytorch/vision.git
!git checkout v0.3.0
!cp vision/references/detection/utils.py ./
!cp vision/references/detection/transforms.py ./
!cp vision/references/detection/coco_eval.py ./
!cp vision/references/detection/engine.py ./
!cp vision/references/detection/coco_utils.py ./

Cloning into 'vision'...
remote: Enumerating objects: 276027, done.[K
remote: Counting objects: 100% (15228/15228), done.[K
remote: Compressing objects: 100% (743/743), done.[K
remote: Total 276027 (delta 14526), reused 15119 (delta 14455), pack-reused 260799[K
Receiving objects: 100% (276027/276027), 552.51 MiB | 16.42 MiB/s, done.
Resolving deltas: 100% (252295/252295), done.
fatal: not a git repository (or any of the parent directories): .git


In [None]:
# Basic python and ML Libraries
import os
import random
import numpy as np
import pandas as pd
# for ignoring warnings
import warnings
warnings.filterwarnings('ignore')

# We will be reading images using OpenCV
import cv2

# xml library for parsing xml files
from xml.etree import ElementTree as et

# matplotlib for visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# torchvision libraries
import torch
import torchvision
from torchvision import transforms as torchtrans  
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# these are the helper libraries imported.
from engine import train_one_epoch, evaluate
import utils
import transforms as T

# for image augmentations
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# defining the files directory and testing directory
files_dir = '/content/drive/MyDrive/mouth-area-practice/training/images'
test_dir = '/content/drive/MyDrive/mouth-area-practice/testing/images'


class FruitImagesDataset(torch.utils.data.Dataset):

    def __init__(self, files_dir, width, height, transforms=None):
        self.transforms = transforms
        self.files_dir = files_dir
        self.height = height
        self.width = width
        
        # sorting the images for consistency
        # To get images, the extension of the filename is checked to be jpg
        self.imgs = [image for image in sorted(os.listdir(files_dir))
                        if image[-4:]=='.jpg']
        
          
        # classes: 0 index is reserved for background
        self.classes = [_,'mouth']

    def __getitem__(self, idx):

        img_name = self.imgs[idx]
        image_path = os.path.join(self.files_dir, img_name)

        # reading the images and converting them to correct size and color    
        img = cv2.imread(image_path)
       
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img_res = cv2.resize(img_rgb, (self.width, self.height), cv2.INTER_AREA)
        # diving by 255
        img_res /= 255.0
        # annotation file
        annot_filename = img_name[:-4] + '.xml'
        annot_file_path = os.path.join(self.files_dir, annot_filename)
        
        boxes = []
        labels = []
        tree = et.parse(annot_file_path)
        root = tree.getroot()
        
        # cv2 image gives size as height x width
        wt = img.shape[1]
        ht = img.shape[0]
        
        # box coordinates for xml files are extracted and corrected for image size given
        for member in root.findall('object'):
            labels.append(self.classes.index(member.find('name').text))
            
            # bounding box
            xmin = int(member.find('bndbox').find('xmin').text)
            xmax = int(member.find('bndbox').find('xmax').text)
            
            ymin = int(member.find('bndbox').find('ymin').text)
            ymax = int(member.find('bndbox').find('ymax').text)
            
            
            xmin_corr = (xmin/wt)*self.width
            xmax_corr = (xmax/wt)*self.width
            ymin_corr = (ymin/ht)*self.height
            ymax_corr = (ymax/ht)*self.height
            
            boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr])
        
        # convert boxes into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # getting the areas of the boxes
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # suppose all instances are not crowd
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
        
        labels = torch.as_tensor(labels, dtype=torch.int64)


        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd
        # image_id
        image_id = torch.tensor([idx])
        target["image_id"] = image_id


        if self.transforms:
            
            sample = self.transforms(image = img_res,
                                     bboxes = target['boxes'],
                                     labels = labels)
            
            img_res = sample['image']
            target['boxes'] = torch.Tensor(sample['bboxes'])
            
            
            
        return img_res, target

    def __len__(self):
        return len(self.imgs)


# check dataset
dataset = FruitImagesDataset(files_dir, 224, 224)
print('length of dataset = ', len(dataset), '\n')

# getting the image and target for a test index.  Feel free to change the index.
img, target = dataset[1]
print(img.shape, '\n',target)

length of dataset =  100 

(224, 224, 3) 
 {'boxes': tensor([[ 75.7008,  96.2500, 164.7424, 121.9750]]), 'labels': tensor([1]), 'area': tensor([2290.5940]), 'iscrowd': tensor([0]), 'image_id': tensor([1])}


In [None]:
# def plot_img_bbox(img, target):
#     # plot the image and bboxes
#     # Bounding boxes are defined as follows: x-min y-min width height
#     fig, a = plt.subplots(1,1)
#     fig.set_size_inches(5,5)
#     a.imshow(img)
#     for box in (target['boxes']):
#         x, y, width, height  = box[0], box[1], box[2]-box[0], box[3]-box[1]
#         rect = patches.Rectangle((x, y),
#                                  width, height,
#                                  linewidth = 2,
#                                  edgecolor = 'g',
#                                  facecolor = 'none')

#         # Draw the bounding box on top of the image
#         print(type(rect))
#         a.add_patch(rect)
#     plt.show()
    
# # plotting the image with bboxes. Feel free to change the index
# img, target = dataset[0]
# plot_img_bbox(img, target)

In [None]:
# def get_object_detection_model(num_classes):

#     # load a model pre-trained pre-trained on COCO
#     model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
#     # get number of input features for the classifier
#     in_features = model.roi_heads.box_predictor.cls_score.in_features
#     # replace the pre-trained head with a new one
#     model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 

#     return model

In [None]:
# Send train=True fro training transforms and False for val/test transforms
def get_transform(train):
    
    if train:
        return A.Compose([
                            A.HorizontalFlip(0.5),
                     # ToTensorV2 converts image to pytorch tensor without div by 255
                            ToTensorV2(p=1.0) 
                        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
    else:
        return A.Compose([
                            ToTensorV2(p=1.0)
                        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [None]:
# use our dataset and defined transformations
dataset = FruitImagesDataset(files_dir, 480, 480, transforms= get_transform(train=True))
dataset_test = FruitImagesDataset(files_dir, 480, 480, transforms= get_transform(train=False))
print(dataset)
# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()

# train test split
test_split = 0.2
tsize = int(len(dataset)*test_split)
dataset = torch.utils.data.Subset(dataset, indices[:-tsize])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-tsize:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=5, shuffle=True, num_workers=1,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=3, shuffle=False, num_workers=0,
    collate_fn=utils.collate_fn)

NameError: ignored

In [None]:
# # to train on gpu if selected.
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


# num_classes = 4

# # get the model using our helper function
# model = get_object_detection_model(num_classes)

# # move model to the right device
# model.to(device)

# # construct an optimizer
# params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.SGD(params, lr=0.005,
#                             momentum=0.9, weight_decay=0.0005)

# # and a learning rate scheduler which decreases the learning rate by
# # 10x every 3 epochs
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
#                                                step_size=3,
#                                                gamma=0.1)

In [None]:
# num_epochs = 10

# for epoch in range(num_epochs):
#     # training for one epoch
#     train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
#     # update the learning rate
#     lr_scheduler.step()
#     # evaluate on the test dataset
#     evaluate(model, data_loader_test, device=device)

Epoch: [0]  [ 0/16]  eta: 0:22:40  lr: 0.000002  loss: 0.2547 (0.2547)  loss_classifier: 0.0974 (0.0974)  loss_box_reg: 0.1322 (0.1322)  loss_objectness: 0.0200 (0.0200)  loss_rpn_box_reg: 0.0051 (0.0051)  time: 85.0347  data: 0.5771
Epoch: [0]  [10/16]  eta: 0:07:32  lr: 0.000025  loss: 0.2330 (0.2177)  loss_classifier: 0.0952 (0.0930)  loss_box_reg: 0.1133 (0.1060)  loss_objectness: 0.0121 (0.0131)  loss_rpn_box_reg: 0.0054 (0.0056)  time: 75.4859  data: 0.0651
Epoch: [0]  [15/16]  eta: 0:01:15  lr: 0.000034  loss: 0.2330 (0.2218)  loss_classifier: 0.0952 (0.0945)  loss_box_reg: 0.1133 (0.1084)  loss_objectness: 0.0118 (0.0135)  loss_rpn_box_reg: 0.0044 (0.0054)  time: 75.2690  data: 0.0493
Epoch: [0] Total time: 0:20:04 (75.2707 s / it)
creating index...
index created!
Test:  [0/7]  eta: 0:01:59  model_time: 16.9739 (16.9739)  evaluator_time: 0.0016 (0.0016)  time: 17.0596  data: 0.0840
Test:  [6/7]  eta: 0:00:16  model_time: 16.8896 (16.1078)  evaluator_time: 0.0023 (0.0024)  time:

In [None]:
# the function takes the original prediction and the iou threshold.

def apply_nms(orig_prediction, iou_thresh=0.3):
    
    # torchvision returns the indices of the bboxes to keep
    keep = torchvision.ops.nms(orig_prediction['boxes'], orig_prediction['scores'], iou_thresh)
    
    final_prediction = orig_prediction
    final_prediction['boxes'] = final_prediction['boxes'][keep]
    final_prediction['scores'] = final_prediction['scores'][keep]
    final_prediction['labels'] = final_prediction['labels'][keep]
    
    return final_prediction

# function to convert a torchtensor back to PIL image
def torch_to_pil(img):
    return torchtrans.ToPILImage()(img).convert('RGB')

In [None]:
model = torch.load(/content/mouth_model.pt)

In [None]:
# pick one image from the test set
img, target = dataset_test[1]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])[0]
    
print('predicted #boxes: ', len(prediction['labels']))
print('real #boxes: ', len(target['labels']))

In [None]:
print('EXPECTED OUTPUT')
print(target)
plot_img_bbox(torch_to_pil(img), target)

In [None]:
print('MODEL OUTPUT')
print(prediction['boxes'].detach().cpu().numpy())
pred=prediction['boxes'].detach().cpu()
pred1={'boxes':pred}
# prediction['boxes'].detach().cpu()
# print(prediction['boxes'][1])
print(pred1)
out = torch_to_pil(img)
plot_img_bbox(out,pred1)

In [None]:
nms_prediction = apply_nms(prediction, iou_thresh=0.1)
print(prediction)
pred=nms_prediction['boxes'].detach().cpu()
pred1={'boxes':pred}
print(pred1)
print('NMS APPLIED MODEL OUTPUT')
plot_img_bbox(torch_to_pil(img),pred1)

In [None]:
nms_prediction = apply_nms(prediction, iou_thresh=0.2)
print(nms_prediction)

print(nms_prediction['scores'])
print(nms_prediction['scores'].detach().cpu().numpy())
p=nms_prediction['scores'].detach().cpu().numpy()
pr=np.argmax(p)
print(pr)
pred=np.reshape(nms_prediction['boxes'][pr].detach().cpu().numpy(), (-1, 4))
print(type(pred))
pred1={'boxes': pred}
print(pred1)
print('NMS APPLIED MODEL OUTPUT')
plot_img_bbox(torch_to_pil(img),pred1)

In [None]:
test_dataset = FruitImagesDataset(test_dir, 480, 480, transforms= get_transform(train=True))
# pick one image from the test set
y=len(test_dataset)
for i in range(y):
 img, target = test_dataset[i]
 # put the model in evaluation mode
 model.eval()
 with torch.no_grad():
    prediction = model([img.to(device)])[0]
 i=i+1  
#  print('EXPECTED OUTPUT\n')
#  plot_img_bbox(torch_to_pil(img), target)
 print('MODEL OUTPUT\n')
 nms_prediction = apply_nms(prediction, iou_thresh=0.1)
#  print(nms_prediction['scores'])
#  print(nms_prediction['scores'].detach().cpu().numpy())
 p=nms_prediction['scores'].detach().cpu().numpy()
 pr=np.argmax(p)
 x=max(p)
 print(x)
 pred=np.reshape(nms_prediction['boxes'][pr].detach().cpu().numpy(), (-1, 4))
# print(type(pred))
 pred1={'boxes': pred}
# print(pred1)
 print('NMS APPLIED MODEL OUTPUT')
 plot_img_bbox(torch_to_pil(img), pred1)

In [None]:
import matplotlib.pyplot as plt
import numpy
from sklearn import metrics

pred=target['boxes'].detach().cpu().numpy()
print(pred[0])
pred1=pred[0]
# pred1={'boxes':pred}
# print(pred1)
# pred2=pred1['boxes'].detach().cpu().numpy()
# print(pred2)

p=nms_prediction['scores'].detach().cpu().numpy()
pr=np.argmax(p)
print(pr)
pred3=nms_prediction['boxes'][pr].detach().cpu().numpy()
print(pred3)
# preds={'boxes': pred1}
# print(preds)


confusion_matrix = metrics.confusion_matrix(pred1, pred3)


cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [False, True])

cm_display.plot()
plt.show()