In [None]:
# This must be run within a Google Colab environment 
from google.colab import drive  
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
!ls

gdrive	sample_data


In [None]:
import sys
import os

sys.path.append('/content/gdrive/My Drive/EE6885/')

os.chdir("/content/gdrive/My Drive/EE6885/")

In [None]:
!ls adnet_datasets/OTB

Basketball  Bolt  CarDark   Crossing  Dudek	Human6	 Man	       RedTeam
Bird2	    Box   CarScale  Crowds    FaceOcc2	Jumping  Matrix        Skater
BlurCar3    Boy   ClifBar   Deer      Fish	Lemming  MotorRolling  Skating1
Board	    Car4  Coupon    Doll      Human2	Liquor	 Panda


In [None]:
import numpy as np
import tensorflow as tf

import cv2
import glob
import random
import re

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import matplotlib.patches as patches

from typing import Tuple

import scipy.io as sio
import random
from random import choices


print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [None]:
class ADNET(tf.keras.Model):
    def __init__(self):
        super(ADNET, self).__init__()

        self.action_history = tf.keras.layers.Input(shape = (1,1,110))

        self.conv1 = tf.keras.layers.Conv2D(filters = 96, kernel_size = (7, 7), strides = (2, 2), padding = 'VALID', activation = 'relu', name = 'conv_1')
        self.max1  = tf.keras.layers.MaxPooling2D(pool_size=(3,3), strides = (1, 1), padding = 'VALID')
        self.conv2 = tf.keras.layers.Conv2D(filters = 256, kernel_size = (5, 5), strides = (2, 2), padding = 'VALID', activation = 'relu', name = 'conv_2')
        self.max2  = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides = (2, 2), padding = 'VALID')
        self.conv3 = tf.keras.layers.Conv2D(filters = 512, kernel_size = (3, 3), strides = (2, 2), padding = 'VALID', activation = 'relu', name = 'conv_3')
        self.max3  = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides = (1, 1), padding = 'VALID')
        
        self.fc1 = tf.keras.layers.Conv2D(filters = 512, kernel_size = (3, 3), padding = 'VALID', activation = 'relu', name = 'fc1')
        self.fc2 = tf.keras.layers.Conv2D(filters = 512, kernel_size = (1,1), padding = 'VALID', activation = 'relu', name = 'fc2')
        self.fc3 = tf.keras.layers.Conv2D(filters = 11, kernel_size = (1,1), padding = 'VALID', name = 'fc3',activation="softmax")
        

    def build(self, action_history):
      super(ADNET, self).build((None, 112, 112, 3))
      self.action_history=action_history

    def setActionHistory(self, action_history):
      self.action_history=action_history

    def call(self, input_tensor, training=False):
        x = self.conv1(input_tensor)
        x = self.max1(x)
        x = self.conv2(x)
        x = self.max2(x)
        x = self.conv3(x)
        x = self.max3(x)

        x = self.fc1(x)
        x = self.fc2(x)
        x = tf.keras.layers.Concatenate(axis=-1)([x, self.action_history])
        action = self.fc3(x)
        return action

    def compile(self, optimizer):
   		super().compile(optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False))
      
    def debugModelSummary(self):
      '''
      call model.debugModelSummary().summary() to get around the inconvenience 
      from model.summary() returning 'multiple' for each layer's output shape
      '''
      dummyInput = tf.keras.layers.Input(shape = (112,112,3))
      return tf.keras.Model(inputs=[dummyInput], outputs = self.call(dummyInput))

In [None]:
class ADNET_v2(tf.keras.Model):
    def __init__(self):
        super(ADNET_v2, self).__init__()

        self.action_history = tf.keras.layers.Input(shape = (110))

        self.resnet=tf.keras.applications.ResNet50V2(include_top=False,weights="imagenet",input_shape=(112,112,3),pooling="max")
        #self.resnet = tf.keras.applications.ResNet50V2(include_top=False,weights="imagenet",input_shape=(112,112,3),pooling=None)
        #self.conv1 = tf.keras.layers.Conv2D(filters = 128, kernel_size = (4,4), padding = 'VALID', name = 'fc4',activation="relu")
        
        self.fc1 = tf.keras.layers.Dense(256,kernel_initializer='glorot_uniform',activation = 'relu', name = 'fc1')
        self.fc2 = tf.keras.layers.Dense(128,kernel_initializer='glorot_uniform', activation = 'relu', name = 'fc2')
        self.fc3 = tf.keras.layers.Dense(11,kernel_initializer='glorot_uniform', name = 'fc3',activation="softmax")
        

    def build(self, action_history):
      super(ADNET_v2, self).build((None, 112, 112, 3))
      self.action_history=action_history
      self.resnet.trainable = False

    def setActionHistory(self, action_history):
      self.action_history=action_history

    def call(self, input_tensor, training=False):
        x = self.resnet(input_tensor)
        #x = self.conv1(x)
        x = self.fc1(x)
        x = self.fc2(x)
        x = tf.keras.layers.Concatenate(axis=-1)([x, self.action_history])
        action = self.fc3(x)
        return action

    def compile(self, optimizer):
   		super().compile(optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False))
     
    def debugModelSummary(self):
      '''
      call model.debugModelSummary().summary() to get around the inconvenience 
      from model.summary() returning 'multiple' for each layer's output shape
      '''
      dummyInput = tf.keras.layers.Input(shape = (112,112,3))
      return tf.keras.Model(inputs=[dummyInput], outputs = self.call(dummyInput))

In [None]:
def get_ground_truths(ground_truth_file: str) -> np.array:
  '''
  Use me to convert a ground_truth_file to a numpy array
  '''
  with open(ground_truth_file) as f:
    ground_truths = f.readlines()
    to_nparray = lambda s: np.array(re.findall('\d+', s), dtype=int)
    truths = list(map(to_nparray, ground_truths))
    return np.asarray(truths)
  return None

In [None]:
# MOVEMENT helper functions

ALPHA = 0.03 # See p. 4 of the paper
STOP_ACTION_INDEX = 8
MIN_WINDOW_SIZE = 10

def calculate_IOU(bbox1: np.array, bbox2: np.array):
    x1, y1, w1, h1 = bbox1
    x2, y2, w2, h2 = bbox2
           
    i_x1 = max(x1, x2)
    i_y1 = max(y1, y2)
    i_x2 = min(x1 + w1, x2 + w2)
    i_y2 = min(y1 + h1, y2 + h2)
    if i_x1 >= i_x2 or i_y1 >= i_y2:
      return 0.0

    intersection_area = (i_x2 - i_x1) * (i_y2 - i_y1)
    box1_area = w1 * h1
    box2_area = w2 * h2
    
    iou = intersection_area / float(box1_area + box2_area - intersection_area)
    return iou


def move(img_shape: tuple, bbox: np.array, action: str, 
         stride_magnitude: int=1) -> np.array:
  '''
  Returns the new bounding box after taking an action: 
  {"left", "right", "up", "down"}. Use stride to indicate the step size.
  '''
  if action not in set(["left", "right", "up", "down"]):
    raise RuntimeError("Invalid action taken :(") 
  
  x, y, w, h = bbox
  if action in set(["left", "right"]):
    step = max(1, int(ALPHA * w)) * stride_magnitude * (-1 if action=="left" else 1)
    x = min(max(0, int(x + step)), int(img_shape[1] - w - 1)) 
  else:
    step = max(1, int(ALPHA * h)) * stride_magnitude * (-1 if action=="up" else 1)
    y = min(max(0, int(y + step)), int(img_shape[0] - h - 1)) 

  return np.array([x, y, w, h])

def scale(img_shape: tuple, bbox: np.array, scaleUp: bool):
  x, y, w, h = bbox
  deltaW, deltaH = max(2, ALPHA * w), max(2, ALPHA * h)
  if not scaleUp: 
    deltaW *= -1
    deltaH *= -1
  w = min(img_shape[1], max(MIN_WINDOW_SIZE, int(w + deltaW))) 
  h = min(img_shape[0], max(MIN_WINDOW_SIZE, int(h + deltaH)))
  x = max(0, min(int(x + -1 * deltaW / 2), int(img_shape[1] - w - 1)))
  y = max(0, min(int(y + -1 * deltaH / 2), int(img_shape[0] - h - 1)))
  return np.array([x, y, w, h])


def selectAction(img_shape: tuple, bbox: np.array, index: int): 
  if index == 0 :
    bbox = move(img_shape, bbox, "left")
  elif index == 1 :
    bbox = move(img_shape, bbox, "left", stride_magnitude=2)
  elif index == 2 :
    bbox = move(img_shape, bbox, "right")
  elif index == 3 :
    bbox = move(img_shape, bbox, "right", stride_magnitude=2)  
  elif index == 4 :
    bbox = move(img_shape, bbox, "up")  
  elif index == 5 :
    bbox = move(img_shape, bbox, "up", stride_magnitude=2)
  elif index == 6 :
    bbox = move(img_shape, bbox, "down")
  elif index == 7 :
    bbox = move(img_shape, bbox, "down", stride_magnitude=2)
  elif index == 8:
    bbox = bbox
  elif index == 9 :
    # Scale Down
    bbox = scale(img_shape, bbox, False)
  elif index == 10 :
    # Scale Up
    bbox = scale(img_shape, bbox, True)  
  return bbox
def isStop(action: int):
  return action == STOP_ACTION_INDEX

In [None]:
def getPatch(img: np.array, bbox: np.array) -> tf.Tensor:
  def _getImagefromBbox(img: np.array, bbox: np.array) -> np.array:
    x, y, w, h, = bbox
    return img[y : (y + h), x : (x + w)]

  patch = tf.image.resize(_getImagefromBbox(img, bbox),[112, 112])
  #patch = tf.cast(patch, dtype=tf.uint8)
  return tf.reshape(patch, (112, 112, 3))

In [None]:
def getFrame(f_path: str) -> np.array:
  img = cv2.imread(f_path)
  return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
def generateBBox(original_bbox: np.array,number_of_sample: int ,img_shape: tuple) -> np.array:
  counter = 0
  x, y, w, h = original_bbox
  cov_matrix = np.diag([pow((0.05 * w), 2), pow((0.05 * h), 2), pow((0.1 * w), 2), pow((0.1 * h), 2)])
  #random_noises = np.random.multivariate_normal([0, 0, 0, 0], cov_matrix,number_of_sample).astype(np.int64)
  #generated_bboxes = [original_bbox + noise for noise in random_noises]
  generated_bboxes = []
  #for noise in random_noises:
  while (counter != number_of_sample):
    noise = np.random.multivariate_normal([0, 0, 0, 0], cov_matrix,1).astype(np.int64)[0]
    x, y, w, h = original_bbox + noise
    if not (x + w > img_shape[1] or x + w < 0 or y + h > img_shape[0] or y + h < 0 or w <= 0 or h <= 0 or x < 0 or y < 0):
      generated_bboxes.append([x, y, w, h])
      counter += 1
  generated_bboxes.append(original_bbox)
  return generated_bboxes

In [None]:
def generateActionLabels(original_bbox: np.array, generated_bboxes: np.array, img_shape: tuple) -> list:
  action_labels = []
  for generated_bbox in generated_bboxes:
    iou_scores = []
    for i in range(11):
      new_bbox = selectAction(img_shape, generated_bbox, i)
      #print(new_bbox,generated_bbox,np.sum(new_bbox == generated_bbox) == 4)
      if np.sum(new_bbox == generated_bbox) == 4 and i != 8:
        iou_scores.append(0)
      else:
        #print("Before Action : {}, After Action : {}".format(bbox,new_bbox))
        iou_scores.append(calculate_IOU(new_bbox,original_bbox))
    #print(iou_scores)
    action_labels.append(np.argmax(iou_scores))
  #print(original_bbox)
  #print(generated_bboxes)
  #print(action_labels)
  return action_labels

In [None]:
def training_generator(ALL_DATASETS_LIST: list, number_of_frames: int, K: int):
  #dataset_list = [*range(0,len(ALL_DATASETS_LIST))]
  dataset_list = [*range(0,1)]
  while (True):
    #random_idxs = np.random.choice(dataset_list,len(ALL_DATASETS_LIST),replace=False)
    random_idxs = np.random.choice(dataset_list,1,replace=False)
    for rand_idx in random_idxs:
      ALL_DATASETS_LIST = glob.glob("adnet_datasets/OTB/*")
      d = ALL_DATASETS_LIST[rand_idx] 
      gt = get_ground_truths("%s/groundtruth_rect.txt" % d)
      frames = sorted(glob.glob(os.path.join('%s/img' % d, '*.jpg')))
      if len(frames)!= 0:
        folder_name = os.path.join('%s/img' % d)
        img_shape = cv2.imread(frames[0]).shape[:2]
        #print("Selected Dataset : {}, # of Frames {}".format(folder_name,len(frames)))
        selected_frames= []
        original_bboxs = []
        for i in range(number_of_frames):
          idx = random.randint(0, len(frames)- 1)
          selected_frames.append(frames[idx])
          original_bboxs.append(gt[idx])
        images = [] 
        labels = []
        for original_bbox in original_bboxs:
          index = 0
          generated_bboxes = generateBBox(original_bbox, K, img_shape)
          selected_actions = generateActionLabels(original_bbox, generated_bboxes, img_shape)
          for generated_bbox in generated_bboxes:
            images.append(getPatch(getFrame(selected_frames[index]),generated_bbox))
            labels.append(selected_actions[index])
            index += 1
        dataset = list(zip(images, labels))
        random.shuffle(dataset)
        images, labels = zip(*dataset)
        images = tf.reshape(images, (-1,112,112,3))
        labels = tf.reshape(labels, (-1,1))
        #print(labels)
        #print(images[:128].shape)
        yield images[:128], labels[:128]

In [None]:
ALL_DATASETS_LIST = glob.glob("adnet_datasets/OTB/*")
images = []
labels = []
#random_idxs = np.random.choice(len(ALL_DATASETS_LIST),20,replace=False)
for random_idx in range(14,15):
  d = ALL_DATASETS_LIST[random_idx]
  print("Generated Dataset {}".format(d))
  gt = get_ground_truths("%s/groundtruth_rect.txt" % d)
  frames = sorted(glob.glob(os.path.join('%s/img' % d, '*.jpg')))
  img_shape = cv2.imread(frames[0]).shape[:2]
  for i in range(len(frames)):
    generated_bboxes = generateBBox(gt[i],10,img_shape)
    selected_actions = generateActionLabels(gt[i], generated_bboxes, img_shape)
    for j in range(len(generated_bboxes)):
      images.append(getPatch(getFrame(frames[i]),generated_bboxes[j]))
      labels.append(selected_actions[j])
temp = list(zip(images, labels))
random.shuffle(temp)
images, labels = zip(*temp)

Generated Dataset adnet_datasets/OTB/Deer


KeyboardInterrupt: ignored

In [None]:
# i saved one by one, collab crashes if not.
np.save("/content/gdrive/MyDrive/sl-training/Deer",images)
np.save("/content/gdrive/MyDrive/sl-training/Deer-labels",labels)

In [None]:
#load np arrays and create a dataset
dsets = sorted(glob.glob(os.path.join('/content/gdrive/MyDrive/sl-training/*')))
j = 1
images = []
labels = []
index = 0
for i in range(0,len(dsets),2):
  print(dsets[j])
  print(dsets[i])
  images.append(np.load(dsets[j]))
  labels.append(np.load(dsets[i]))
  j += 2

/content/gdrive/MyDrive/sl-training/Basketball.npy
/content/gdrive/MyDrive/sl-training/Basketball-labels.npy
/content/gdrive/MyDrive/sl-training/Bird2.npy
/content/gdrive/MyDrive/sl-training/Bird2-labels.npy
/content/gdrive/MyDrive/sl-training/BlurCar3.npy
/content/gdrive/MyDrive/sl-training/BlurCar3-labels.npy
/content/gdrive/MyDrive/sl-training/Boy.npy
/content/gdrive/MyDrive/sl-training/Boy-labels.npy
/content/gdrive/MyDrive/sl-training/CarDark.npy
/content/gdrive/MyDrive/sl-training/CarDark-labels.npy
/content/gdrive/MyDrive/sl-training/CarScale.npy
/content/gdrive/MyDrive/sl-training/CarScale-labels.npy
/content/gdrive/MyDrive/sl-training/ClifBar.npy
/content/gdrive/MyDrive/sl-training/ClifBar-labels.npy
/content/gdrive/MyDrive/sl-training/Coupon.npy
/content/gdrive/MyDrive/sl-training/Coupon-labels.npy
/content/gdrive/MyDrive/sl-training/Crowds.npy
/content/gdrive/MyDrive/sl-training/Crowds-labels.npy
/content/gdrive/MyDrive/sl-training/Deer.npy
/content/gdrive/MyDrive/sl-trainin

In [None]:
images_np = np.concatenate([np.array(i) for i in images])
labels_np = np.concatenate([np.array(i) for i in labels])
images_np = images_np[:int(images_np.shape[0]/128)*128]
labels_np = labels_np[:int(images_np.shape[0]/128)*128]

In [None]:
del images
del labels

In [None]:
from sklearn.utils import shuffle
images_np,labels_np = shuffle(images_np, labels_np, random_state=0)

In [None]:
#Create tf dataset object from shuffled numpy training dataset. Set batch size for the training,validation and the test dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((images_np, labels_np))
BATCH_SIZE = 128
train_dataset = train_dataset.batch(BATCH_SIZE)

In [None]:
from tensorflow.keras.optimizers import Adam,SGD
ALL_DATASETS_LIST = glob.glob("adnet_datasets/OTB/*")
adamOptimizer = Adam(learning_rate=0.001)
model = ADNET()
action_hist = np.zeros(shape = (128,1,1,110))
model.build(action_hist)
model.compile(adamOptimizer)
model.fit(train_dataset,epochs=100,verbose=True)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f3cb003f250>

In [None]:
model.fit(train_dataset,epochs=5,verbose=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f3c21717190>

In [None]:
model.save_weights('/content/gdrive/My Drive/EE6885/kaan-weights')

In [None]:
[ALL_DATASETS_LIST[11]]

['adnet_datasets/OTB/Coupon']

In [None]:
[ALL_DATASETS_LIST[21]]

['adnet_datasets/OTB/Jumping']