# Setup

In [2]:
!pip install "opencv-python-headless<4.3"
!pip install -U git+https://github.com/albumentations-team/albumentations

Collecting opencv-python-headless<4.3
  Downloading opencv_python_headless-4.2.0.34-cp37-cp37m-manylinux1_x86_64.whl (21.6 MB)
[K     |████████████████████████████████| 21.6 MB 1.6 MB/s 
Installing collected packages: opencv-python-headless
Successfully installed opencv-python-headless-4.2.0.34
Collecting git+https://github.com/albumentations-team/albumentations
  Cloning https://github.com/albumentations-team/albumentations to /tmp/pip-req-build-yj1sx9jl
  Running command git clone -q https://github.com/albumentations-team/albumentations /tmp/pip-req-build-yj1sx9jl
Collecting qudida>=0.0.4
  Downloading qudida-0.0.4-py3-none-any.whl (3.5 kB)
Building wheels for collected packages: albumentations
  Building wheel for albumentations (setup.py) ... [?25l[?25hdone
  Created wheel for albumentations: filename=albumentations-1.1.0-py3-none-any.whl size=112723 sha256=cf468a21148da3e933f820adc388a83fb768abd66e85cb88af90f6638dd8aa8b
  Stored in directory: /tmp/pip-ephem-wheel-cache-ip44oa9j

In [3]:
import pandas as pd
import os
import re
import csv
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import albumentations

In [None]:
%pwd
os.mkdir("train")
%cd train
os.mkdir("Images")
os.mkdir("Labels")
%cd ../
%pwd
os.mkdir("test")
%cd test
os.mkdir("Images")
os.mkdir("Labels")

/content/train
/content
/content/test


In [None]:
%cd ../

/content


In [None]:
##Download Zip File
!gdown https://thor.robots.ox.ac.uk/~vgg/data/MoCA/MoCA.zip
##Unzip Zip File
!unzip MoCA.zip

# Dataset Handler Class

In [None]:
class MOCA:
  def __init__(self):
    """
    Intializes video_count and classes dictionary for further use.
    """
    # Define folder path for MoCA Images
    path = "/content/MoCA/JPEGImages/"
    # Store list of directories
    lst = os.listdir(path)
    # Set to store classes
    self.classes = dict()
    # Dictionary to store the count of each video each class and respective folders.
    self.video_count = {}
    for folder in lst:
        # Remove _ & digits from folder names.
        temp = folder.replace("_"," ")
        result = re.sub(r'[0-9]+', '', temp)
        result = result.strip()
        result= result.capitalize()
        # Particular class does not exist yet
        if self.video_count.get(result,-1)== -1:
            self.video_count[result] = {"count":1,"folders":["/"+folder]}
            self.classes[result] = len(self.classes)
        # Class already exists.
        else:
            self.video_count[result]["count"]+=1
            self.video_count[result]["folders"].append("/"+folder)
    print("The MoCA Dataset containts {s} classes.".format(s=len(self.classes)))
  def moca_train_test_split(self,X):
      """
      Train Test split for one particular class.
      X can either be either folders or X can be video frames.
      """
      y = [i for i in range(len(X))]
      X_train, X_test, _, _ = train_test_split( X, y, test_size=0.2, random_state=42)
      return X_train,X_test

  def train_test_split(self):
    """
    Creates train & test split for the entire dataset.
    """
    self.training_images = []
    self.test_images = []
    path = "/content/MoCA/JPEGImages"
    for key,value in self.video_count.items():
      if value["count"] == 1:
        images = [value["folders"][0]+"/"+img for img in os.listdir(path+value["folders"][0])]
        X_train,X_test = self.moca_train_test_split(images)
        self.training_images.extend(X_train) 
        self.test_images.extend(X_test)
      else:
        X_train,X_test = self.moca_train_test_split(value["folders"])
        X_train_img = [ X_train[i]+"/"+img for i in range(len(X_train)) for img in os.listdir(path+X_train[i]) ]
        X_test_img = [ X_test[i]+"/"+img for i in range(len(X_test)) for img in os.listdir(path+X_test[i]) ]
        self.training_images.extend(X_train_img) 
        self.test_images.extend(X_test_img)
    print(len(self.training_images),len(self.test_images))
  def create_annotations(self):
    """
    Create annotations.
    """
    path = "/content/MoCA/JPEGImages"
    self.df = pd.read_csv("/content/MoCA/Annotations/annotations.csv",skiprows=9)
    
    self.df['spatial_coordinates'] = self.df['spatial_coordinates'].str.replace('[','')
    self.df['spatial_coordinates'] = self.df['spatial_coordinates'].str.replace(']','')
    self.df['x_min'] = self.df['spatial_coordinates'].str.split(',').str[1].astype(float)
    self.df['y_min'] = self.df['spatial_coordinates'].str.split(',').str[2].astype(float)
    self.df['width'] = self.df['spatial_coordinates'].str.split(',').str[3].astype(float)
    self.df['height'] = self.df['spatial_coordinates'].str.split(',').str[4].astype(float)
    self.df['x_max'] = self.df["x_min"]+self.df["width"]
    self.df['y_max'] = self.df["y_min"]+self.df["height"]


    data = self.df[['file_list','x_min','y_min','x_max','y_max']].copy()
    values = data.values
    save = []
    for val in values:
        if val[0] in self.training_images:
            subset="Train"
            temp = val[0].replace("_"," ")
            result = re.sub(r'[0-9]+', '', temp)
            result = result.strip()
            s= result.capitalize()
            idx =self.classes[s.split("/")[1].strip().capitalize()]
            img = plt.imread(path+val[0])
            h,w,_ = img.shape
            val[1] = max(0,val[1])
            val[2] = max(0,val[2])
            val[3] = min(w,val[3])
            val[4] = min(h,val[4])
            
            save.append(["MoCA",subset,path+val[0],val[1],val[2],val[3],val[4],idx])

        else:
            subset="Test"
            temp = val[0].replace("_"," ")
            result = re.sub(r'[0-9]+', '', temp)
            result = result.strip()
            s= result.capitalize()
            idx =self.classes[s.split("/")[1].strip().capitalize()]
            img = plt.imread(path+val[0])
            h,w,_ = img.shape
            val[1] = max(0,val[1])
            val[2] = max(0,val[2])
            val[3] = min(w,val[3])
            val[4] = min(h,val[4])
            

            #print(s.split("/")[1].strip().capitalize())
            save.append(["MoCA",subset,path+val[0],val[1],val[2],val[3],val[4],idx])
    print("Number of annotated images {}".format(len(save)))
        
    header = ['Dataset','Subset','Path','x_min','y_min','x_max','y_max','label']
    with open('MoCA.csv', 'w', encoding='UTF8', newline='') as f:
        writer = csv.writer(f)

        # write the header
        writer.writerow(header)

        # write multiple rows
        writer.writerows(save)
  def resize_image(self,img_arr, bboxes, h, w):
    """
    :param img_arr: original image as a numpy array
    :param bboxes: bboxes as numpy array where each row is 'x_min', 'y_min', 'x_max', 'y_max', "class_id"
    :param h: resized height dimension of image
    :param w: resized weight dimension of image
    :return: dictionary containing {image:transformed, bboxes:['x_min', 'y_min', 'x_max', 'y_max', "class_id"]}
    """
    # create resize transform pipeline
    transform = albumentations.Compose(
        [albumentations.Resize(height=h, width=w, always_apply=True)],
        bbox_params=albumentations.BboxParams(format='pascal_voc'))

    transformed = transform(image=img_arr, bboxes=bboxes)

    return transformed
  def resize_all(self):
    data = pd.read_csv("MoCA.csv")
    values = data[["Subset","Path","x_min","y_min","x_max","y_max","label"]].values
    train_image ="/content/train/Images/"
    train_labels ="/content/train/Labels/"
    test_image ="/content/test/Images/"
    test_labels ="/content/test/Labels/"
    for i,val in enumerate(values):
      print("i : {}".format(i))
      sample = Image.open(val[1])
      sample_arr = np.asarray(sample)
      bboxes_og = np.array([val[2:]])
      # bboxes_og[0] = max(0,bboxes_og[0])
      # bboxes_og[1] = max(0,bboxes_og[1])
      # bboxes_og[2] = min(1,bboxes_og[2])
      # bboxes_og[3] = min(1,bboxes_og[3])
      print(bboxes_og)
      transformed_dict = self.resize_image(sample_arr, bboxes_og, 416, 416)

      transformed_arr = transformed_dict["image"]
      transformed_info = np.array(list(map(list, transformed_dict["bboxes"]))).astype(float)
      new_bbox = list(transformed_info[0])
      label = self.pascal_voc_to_yolo(new_bbox)
      if val[0] == "Train":
          img = Image.fromarray(transformed_arr)
          img.save(train_image+"{}.jpg".format(i))
          with open(os.path.join(train_labels+"{}.txt".format(i)),"w") as f:
            f.write(label)
          

      elif val[0] == "Test":
          img = Image.fromarray(transformed_arr)
          img.save(test_image+"{}.jpg".format(i))
          with open(os.path.join(test_labels+"{}.txt".format(i)),"w") as f:
            f.write(label)
  def pascal_voc_to_yolo(self,new_bbox): # xmin, ymin, xmax,ymax,  xmin,ymin,w,h
    x1,y1 = new_bbox[0],new_bbox[1]
    x2,y2 = new_bbox[2],new_bbox[3]
    label = new_bbox[4]
    image_w,image_h = 416,416
    w = x2 - x1
    h = y2 - y1
    b_center_x = (x1+x2) / (2*image_w) 
    b_center_y = (y1+y2) / (2*image_h)
    b_width    = w/image_w
    b_height   = h/image_h
        
    return ("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(label,b_center_x, b_center_y, b_width, b_height))


    


In [None]:
m = MOCA()
m.train_test_split()
m.create_annotations()
m.resize_all()


The MoCA Dataset containts 67 classes.
28210 9040




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
i : 5117
[[191.624 143.718 548.102 507.241 32]]
i : 5118
[[187.397 152.172 552.329 538.239 32]]
i : 5119
[[170.489 145.127 525.558 526.967 32]]
i : 5120
[[115.538 126.81 521.331 522.74 32]]
i : 5121
[[84.54 111.311 511.468 512.877 32]]
i : 5122
[[53.542 104.266 508.65 517.104 32]]
i : 5123
[[69.041 128.219 507.241 511.468 32]]
i : 5124
[[499.081 359.596 807.883 536.1700000000001 32]]
i : 5125
[[500.692 313.639 808.69 509.563 32]]
i : 5126
[[514.399 293.483 813.5260000000001 493.438 32]]
i : 5127
[[514.401 294.289 810.3019999999999 495.857 32]]
i : 5128
[[512.788 290.258 812.721 490.213 32]]
i : 5129
[[512.788 231.4 812.721 462.799 32]]
i : 5130
[[514.401 207.212 811.915 457.962 32]]
i : 5131
[[514.401 215.274 814.334 456.349 32]]
i : 5132
[[513.594 198.343 811.9150000000001 456.34899999999993 32]]
i : 5133
[[503.113 170.123 811.108 460.381 32]]
i : 5134
[[491.825 176.574 809.496 461.187 32]]
i : 5135
[[488.6 180.605 811.1

# Export Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!zip -r /content/train.zip /content/train
!zip -r /content/test.zip /content/test

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/train/Images/6292.jpg (deflated 0%)
  adding: content/train/Images/4743.jpg (deflated 0%)
  adding: content/train/Images/7513.jpg (deflated 0%)
  adding: content/train/Images/3454.jpg (deflated 2%)
  adding: content/train/Images/2876.jpg (deflated 0%)
  adding: content/train/Images/3230.jpg (deflated 2%)
  adding: content/train/Images/863.jpg (deflated 0%)
  adding: content/train/Images/289.jpg (deflated 1%)
  adding: content/train/Images/4327.jpg (deflated 0%)
  adding: content/train/Images/230.jpg (deflated 1%)
  adding: content/train/Images/535.jpg (deflated 0%)
  adding: content/train/Images/6391.jpg (deflated 0%)
  adding: content/train/Images/5419.jpg (deflated 0%)
  adding: content/train/Images/5610.jpg (deflated 3%)
  adding: content/train/Images/4575.jpg (deflated 1%)
  adding: content/train/Images/6904.jpg (deflated 1%)
  adding: content/train/Images/6701.jpg (deflated 0%)
  adding: content/tra

In [None]:
%cp /content/train.zip /content/drive/MyDrive/MoCA
%cp /content/test.zip /content/drive/MyDrive/MoCA