<a href="https://colab.research.google.com/github/saba6099/Soccer-robot-perception/blob/master/Cuda_Final_Project-Saba.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:

#import required packages
import os
import cv2
import torch
import random
import numbers
import numpy as np
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torch import randperm
from torch._utils import _accumulate
import torchvision
from torchvision import datasets, transforms, models
import torchvision.transforms.functional as F
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn as nn
import time
from skimage import io, transform
import sklearn.metrics as skm
import imutils
from scipy.spatial import distance

In [0]:
def New_bnddtls(bnddtls,scaling):
    """ 
    Calculating the scaled boundary details
    Args:
    bnddtls: actual details of the boundary box
    scaling: scaling factor to change the details of boundary box
    Returns:
    scaled boundary box details
    """
    bnddtls[0] = bnddtls[0]/scaling
    bnddtls[1] = bnddtls[1]/scaling
    bnddtls[2][0] = (bnddtls[0, 0]+bnddtls[1, 0]) /2.0
    bnddtls[2][1] = (bnddtls[0, 1]+bnddtls[1, 1])/2.0
    bnddtls[3][0] = torch.abs(bnddtls[0, 0]-bnddtls[1, 0])
    bnddtls[3][1] = torch.abs(bnddtls[0, 1]-bnddtls[1, 1])
    return bnddtls

In [0]:
def find_bnddetails(bndvalues):
   """ Finding details of boundary box from xml tag.
   Args:
   bndvalues: boundary details of the bounding box.
    
   Returns:
   corners of boundary box, center of the box, height and width as a array.
   """
   xmin = int(bndvalues.find('xmin').text)
   ymin = int(bndvalues.find('ymin').text)
   xmax = int(bndvalues.find('xmax').text)
   ymax = int(bndvalues.find('ymax').text)
   return np.array((xmin,ymin,xmax,ymax,(xmin+xmax)/2,(ymin+ymax)/2,np.abs(xmax-xmin),np.abs(ymin-ymax))).reshape(-1,2)

In [0]:
def HeatMap(bnddtls,filesize):
    """ 
    Generating heatmap based on boundary details
    Args:
    bnddtls: actual details of the boundary box
    filesize: size of the boundary box

    Returns:
    heat-map of size filesize/4 with center from bnddtls
    """
    bnddtls.float()
    height = bnddtls[3][0]
    width  = bnddtls[3][1]
    bnddtls = New_bnddtls(bnddtls,4.0)
    img_heatmap = torch.zeros(int(filesize[0]/4),int(filesize[1]/4))
    size=8
    kernel = cv2.getGaussianKernel(size, 8)
    kernel = np.dot(kernel, kernel.T)
    kernel *= 100
  
    if bnddtls[2][1].item()+size > img_heatmap.shape[0]-1:
                y_begin = img_heatmap.shape[0]-1-size
    else:
                y_begin = int(bnddtls[2][1].item())

    if bnddtls[2][0].item()+size > img_heatmap.shape[1]-1:
                x_begin = img_heatmap.shape[1]-1-size
    else:
                x_begin = int(bnddtls[2][0].item())

    y_end = y_begin + (size)
    x_end = x_begin + (size)
    img_heatmap[y_begin : y_end, x_begin : x_end] = torch.from_numpy(kernel)
    return img_heatmap

In [0]:
class ToTensor(object):
    """Convert ndarrays or PIL image in sample to Tensors."""
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W

    def __call__(self, sample):
      items = dict()
      for key in sample.keys():
          if key == 'image':
            image = sample[key]
            image = F.to_tensor(image)
            items[key] = image
          else:
            dtls = torch.FloatTensor(sample[key])
            items[key] = dtls
      return items

In [0]:
class Rescale(object):
    """
    Rescale the object to the size of given in output_size
    Args:
    output_size (tensor): required size of the output image
    """
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        items = dict()
        image = sample['image']
        w, h = image.size
        if isinstance(self.output_size, int):
           if h > w:
              new_h, new_w = self.output_size * h / w, self.output_size
           else:
              new_h, new_w = self.output_size, self.output_size * w / h
        else:
           new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)

        for key in sample.keys():
          if key == 'image':
              img = F.resize(image, (new_h, new_w))
              items[key] = img
          else:
              all_dtls = sample[key]
              
              for i, dtls in enumerate(all_dtls):
                  all_dtls[i][0] = np.round(dtls[0] * np.array([new_w / w, new_h / h]), 0)
                  all_dtls[i][1] = np.round(dtls[1] * np.array([new_w / w, new_h / h]), 0)
                  all_dtls[i][3] = np.abs([dtls[0, 0]-dtls[1, 0], dtls[0, 1]-dtls[1, 1]])
                  all_dtls[i][2] = np.array([dtls[0, 0]+dtls[3, 0]/2, dtls[0, 1]+dtls[3, 1]/2])

              items[key] = all_dtls
              
        return items

In [0]:
class RandomVerticalFlip(object):
    """
    Vertical flip the given PIL Image randomly with a given probability.
    Args:
    p (float): probability of the image being flipped. Default value is 0.5
    """

    def __init__(self, p=0.5):
        self.p = p

    def __call__(self, sample):
      items = dict()
      image = sample['image']
      w, h = image.size
      if random.random() < self.p:
        for key in sample.keys():
            if key == 'image':
               image = F.vflip(image)
               items[key] = image
            else:
              all_dtls = sample[key]
              
              for i, dtls in enumerate(all_dtls):
                 if dtls[2][0] > 0 and dtls[2][1] > 0:
                    dtls[0][1] = h-1-dtls[0][1]-dtls[3][1]
                    dtls[1][1] = h-1-dtls[1][1]+dtls[3][1]
                    dtls[2][1] = h-1-dtls[2][1]

              items[key] = all_dtls
      else:
         return sample
      return items

In [0]:
class RandomHorizontalFlip(object):
    """
    Horizontally flip the given PIL Image randomly with a given probability.

    Args:
    p (float): probability of the image being flipped. Default value is 0.5
    """

    def __init__(self, p=0.5):
        self.p = p

    def __call__(self, sample):
      items = dict()
      image = sample['image']
      w, h = image.size
      
      if random.random() < self.p:
          for key in sample.keys():
             if(key == 'image'):  
                image = F.hflip(image)
                items[key] = image
             else:
                all_dtls = sample[key]
              
                for i, dtls in enumerate(all_dtls):
                   if dtls[2][0] > 0 and dtls[2][1] > 0:
                      dtls[0][0] = w-1-dtls[0][0]-dtls[3][0]
                      dtls[1][0] = w-1-dtls[1][0]+dtls[3][0]
                      dtls[2][0] = w-1-dtls[2][0]

                items[key] = all_dtls
      else:
         return sample
      return items

In [0]:
#Normalizing the dataset
class Normalize(object):
    """Normalize the image using mean and standard deviation provided.

    Args:
    mean(tensor): Mean of the guassian distribution to be used for normalization in 
    each dimension of the image.
    std: Standard deviation of the guassian distribution to be used for normalization in 
    each dimension of the image.
    """
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, sample):
        """ 
        Normalizes the image in each dimension as per the mean and std values.      
        """
        items = dict()
        for key in sample.keys():
          if key == 'image':
            image = sample[key]
            items[key] = F.normalize(image, self.mean, self.std)
          else:
            items[key] = sample[key]
        return items

In [0]:
class Lambda(object):
    """Apply a user-defined lambda as a transform.

    Args:
    lambd (function): Lambda/function to be used for transform.
    """

    def __init__(self, lambd):
        assert callable(lambd), repr(type(lambd).__name__) + \
            " object is not callable"
        self.lambd = lambd

    def __call__(self, img):
        return self.lambd(img)

    def __repr__(self):
        return self.__class__.__name__ + '()'

In [0]:
class ColorJitter(object):
    """Randomly change the brightness, contrast and saturation of an image.

    Args:
        brightness (float or tuple of float (min, max)): How much to jitter brightness.
            brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
            or the given [min, max]. Should be non negative numbers.
        contrast (float or tuple of float (min, max)): How much to jitter contrast.
            contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
            or the given [min, max]. Should be non negative numbers.
        saturation (float or tuple of float (min, max)): How much to jitter saturation.
            saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
            or the given [min, max]. Should be non negative numbers.
        hue (float or tuple of float (min, max)): How much to jitter hue.
            hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
            Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
    """

    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
        self.brightness = self._check_input(brightness, 'brightness')
        self.contrast = self._check_input(contrast, 'contrast')
        self.saturation = self._check_input(saturation, 'saturation')
        self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
                                     clip_first_on_zero=False)

    def _check_input(self, value, name, center=1, bound=(0, float('inf')), clip_first_on_zero=True):
        if isinstance(value, numbers.Number):
            if value < 0:
                raise ValueError(
                    "If {} is a single number, it must be non negative.".format(name))
            value = [center - value, center + value]
            if clip_first_on_zero:
                value[0] = max(value[0], 0)
        elif isinstance(value, (tuple, list)) and len(value) == 2:
            if not bound[0] <= value[0] <= value[1] <= bound[1]:
                raise ValueError(
                    "{} values should be between {}".format(name, bound))
        else:
            raise TypeError(
                "{} should be a single number or a list/tuple with lenght 2.".format(name))

        # if value is 0 or (1., 1.) for brightness/contrast/saturation
        # or (0., 0.) for hue, do nothing
        if value[0] == value[1] == center:
            value = None
        return value

    @staticmethod
    def get_params(brightness, contrast, saturation, hue):
        """Get a randomized transform to be applied on image.

        Arguments are same as that of __init__.

        Returns:
            Transform which randomly adjusts brightness, contrast and
            saturation in a random order.
        """
        tforms = []

        if brightness is not None:
            brightness_factor = random.uniform(brightness[0], brightness[1])
            tforms.append(
                Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))

        if contrast is not None:
            contrast_factor = random.uniform(contrast[0], contrast[1])
            tforms.append(
                Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))

        if saturation is not None:
            saturation_factor = random.uniform(saturation[0], saturation[1])
            tforms.append(
                Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))

        if hue is not None:
            hue_factor = random.uniform(hue[0], hue[1])
            tforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))

        random.shuffle(tforms)
        transform = transforms.Compose(tforms)

        return transform

    def __call__(self, sample):
        """
        Args:
            sample (List): List of Input image and bounding box

        Returns:
            List: Color jittered image and original bounding box.
        """
        items = dict()
        for key in sample.keys():
          if key == 'image':
            image = sample[key]
            transform = self.get_params(self.brightness, self.contrast,
                                    self.saturation, self.hue)
            items[key] =  transform(image)
          else:
            items[key] = sample[key]
        return items



In [0]:
class RobotDataset(Dataset):
    """Cutomized Dataset used to train the model.
    Args:
      root_dir: path where all the training files are saved.
      transform: transformations to be applied to the dataset.
      filenames: names of all the files in training dataset.
    """
    def __init__(self, root_dir, transform=None):
            
        self.root_dir = root_dir
        self.transform = transform
        self.filenames = []
        for file in os.listdir(root_dir):
            if file.endswith(".jpg") or file.endswith('.jpeg'):
               self.filenames.append(file)
    def __len__(self):
        """Length of the dataset"""
        return len(self.filenames)

    def __getitem__(self, ind):
        filepath =os.path.join(self.root_dir,self.filenames[ind])
        image = Image.open(filepath)
        if(filepath.endswith('.jpg')):
          xml_data = ET.parse(filepath.replace('.jpg','.xml')).getroot()
        elif(filepath.endswith('.jpeg')):
          xml_data = ET.parse(filepath.replace('.jpeg','.xml')).getroot()        
        all_dtls= []
        
        for group in xml_data.findall('object'):
            bndvalues = group.find('bndbox')
            all_dtls.append(find_bnddetails(bndvalues))       
        sample = {'image': image, 'dtls': all_dtls}
        if self.transform:
            if type(self.transform) is not list:
              self.transform = [self.transform]
            for idx in range(len(self.transform)):
                  sample = self.transform[idx](sample)
        size = sample['image'].shape
        heatmap_list = torch.zeros([4,int(size[1]/4),int(size[2]/4)])
        i=0
        for group in xml_data.findall('object'):
            label = group.find('name').text
            img_heatmap = HeatMap(sample['dtls'][i],(size[1],size[2]))
            if(label == 'Head'):
               heatmap_list[0] += img_heatmap
            elif (label == 'Foot'):
               heatmap_list[1] += img_heatmap 
            elif (label == 'Trunk'):
               heatmap_list[2] += img_heatmap 
            elif (label == 'Hand'):
               heatmap_list[3] += img_heatmap 
            i+=1  
        dataset = {'image': sample['image'], 'heatmap': heatmap_list}
        return dataset

In [0]:
class TestDataset(Dataset):

    """Customized dataset used for testing the model.

    Args:
        root_dir: path where all the training files are saved.
        transform: transformations to be applied to the dataset.
        filenames: names of all the files in training dataset.
        
    """
    def __init__(self, root_dir, transform=None):
            
        self.root_dir = root_dir
        self.transform = transform
        self.filenames = []
        for file in os.listdir(root_dir):
            if file.endswith(".jpg"):
               self.filenames.append(file)
    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, ind):
        filepath =os.path.join(self.root_dir,self.filenames[ind])
        image = Image.open(filepath)
        xml_data = ET.parse(filepath.replace('jpg','xml')).getroot()
        head_dtls = np.zeros((3,4,2))
        trunk_dtls = np.zeros((3,4,2))
        hands_dtls = np.zeros((6,4,2))
        foot_dtls = np.zeros((6,4,2))
        head_idx = 0
        trunk_idx = 0
        hands_idx = 0
        foot_idx = 0
        for group in xml_data.findall('object'):
            bndvalues = group.find('bndbox')
            label = group.find('name').text
            if(label == 'Head' and head_idx < 3):
              head_dtls[head_idx] = find_bnddetails(bndvalues)
              head_idx += 1
            elif(label == 'Trunk' and trunk_idx < 3):
              trunk_dtls[trunk_idx] = find_bnddetails(bndvalues)
              trunk_idx += 1
            elif(label == 'Foot' and foot_idx < 6):
              foot_dtls[foot_idx] = find_bnddetails(bndvalues)
              foot_idx += 1
            elif(label == 'Hand' and hands_idx < 6):
              hands_dtls[hands_idx] = find_bnddetails(bndvalues)
              hands_idx += 1
        sample = {'image': image, 
                  'head_dtls': head_dtls,
                  'trunk_dtls':trunk_dtls,
                  'hands_dtls':hands_dtls,
                  'foot_dtls' : foot_dtls}
        if self.transform:
          if type(self.transform) is not list:
              self.transform = [self.transform]
          for idx in range(len(self.transform)):
              sample = self.transform[idx](sample)
        
        return sample


In [1]:
#mounting google drive
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········


ValueError: ignored

In [0]:
transforms1 = [Rescale((480,640)),RandomHorizontalFlip(), RandomVerticalFlip(),
                ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
                ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
transforms2 = [Rescale((480,640)),ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
train_dataset = RobotDataset(root_dir = '/content/drive/My Drive/dataset/blob/forceTrain',transform=transforms1)
test_dataset = TestDataset(root_dir = '/content/drive/My Drive/dataset/blob/forceTest',transform=transforms2)

NameError: ignored