#Preparations

<h3><a href="https://cloud.google.com/tpu/"><img valign="middle" src="https://raw.githubusercontent.com/GoogleCloudPlatform/tensorflow-without-a-phd/master/tensorflow-rl-pong/images/tpu-hexagon.png" width="50"></a>  &nbsp;&nbsp;Use a free Cloud TPU</h3>
 
   1. On the main menu, click Runtime and select **Change runtime type**. Set "TPU" as the hardware accelerator.
   2. Click Runtime again and select **Runtime > Run All**. You can also run the cells manually with Shift-ENTER.

## Mounting Our Google Drive
We're ising our Google Drive to store videos and saved models

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Download the source code
Download the source code of the Mask R-CNN model.

In [None]:
!git clone https://github.com/tensorflow/tpu/

fatal: destination path 'tpu' already exists and is not an empty directory.


## Import libraries

In [None]:
from IPython import display
from PIL import Image
from matplotlib import cm
from matplotlib import pyplot as plt
import numpy as np
%tensorflow_version 1.x
import tensorflow as tf
import sys
sys.path.insert(0, 'tpu/models/official')
sys.path.insert(0, 'tpu/models/official/mask_rcnn')
import coco_metric
from mask_rcnn.object_detection import visualization_utils
import cv2
from google.colab.patches import cv2_imshow
import copy
import codecs
import time  # to check frames per second

# DINO
import torch
import torchvision
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import os

## Load the COCO index mapping
This Colab uses a pretrained checkpoint of the Mask R-CNN model that is trained using the COCO dataset. Here is the mapping between the indices that the model predicts and the categories in text.

In [None]:
ID_MAPPING = {
    1: 'person',
    2: 'bicycle',
    3: 'car',
    4: 'motorcycle',
    5: 'airplane',
    6: 'bus',
    7: 'train',
    8: 'truck',
    9: 'boat',
    10: 'traffic light',
    11: 'fire hydrant',
    13: 'stop sign',
    14: 'parking meter',
    15: 'bench',
    16: 'bird',
    17: 'cat',
    18: 'dog',
    19: 'horse',
    20: 'sheep',
    21: 'cow',
    22: 'elephant',
    23: 'bear',
    24: 'zebra',
    25: 'giraffe',
    27: 'backpack',
    28: 'umbrella',
    31: 'handbag',
    32: 'tie',
    33: 'suitcase',
    34: 'frisbee',
    35: 'skis',
    36: 'snowboard',
    37: 'sports ball',
    38: 'kite',
    39: 'baseball bat',
    40: 'baseball glove',
    41: 'skateboard',
    42: 'surfboard',
    43: 'tennis racket',
    44: 'bottle',
    46: 'wine glass',
    47: 'cup',
    48: 'fork',
    49: 'knife',
    50: 'spoon',
    51: 'bowl',
    52: 'banana',
    53: 'apple',
    54: 'sandwich',
    55: 'orange',
    56: 'broccoli',
    57: 'carrot',
    58: 'hot dog',
    59: 'pizza',
    60: 'donut',
    61: 'cake',
    62: 'chair',
    63: 'couch',
    64: 'potted plant',
    65: 'bed',
    67: 'dining table',
    70: 'toilet',
    72: 'tv',
    73: 'laptop',
    74: 'mouse',
    75: 'remote',
    76: 'keyboard',
    77: 'cell phone',
    78: 'microwave',
    79: 'oven',
    80: 'toaster',
    81: 'sink',
    82: 'refrigerator',
    84: 'book',
    85: 'clock',
    86: 'vase',
    87: 'scissors',
    88: 'teddy bear',
    89: 'hair drier',
    90: 'toothbrush',
}
category_index = {k: {'id': k, 'name': ID_MAPPING[k]} for k in ID_MAPPING}

## Break video into frames and resize

In [None]:
# video_name = 'static.webm'
video_name = 'part_zibi.mp4'
use_drive = True

if use_drive: # use videos on our drive
  cap = cv2.VideoCapture('/content/drive/MyDrive/videos/{}' .format(video_name))
else: 
  cap = cv2.VideoCapture('{}' .format(video_name))

frame_rate = cap.get(cv2.CAP_PROP_FPS)

i=0

%mkdir frames
%mkdir frame_res
%rm frames/*
while(cap.isOpened()):
    ret, frame = cap.read()
    if ret == False:
        break
    cv2.imwrite('frames/frame'+str(i)+'.jpg',frame)
    i+=1


cap.release()

check = cv2.imread('frames/frame1.jpg')
og_hh, og_ww, layers = check.shape
print('Original resolution {} X {}' .format(og_ww, og_hh))

mkdir: cannot create directory ‘frames’: File exists
mkdir: cannot create directory ‘frame_res’: File exists
Original resolution 1920 X 1080


In [None]:
width = og_ww
height = og_hh

should_resize = True
was_resized = False  # keep this False, after resizing happens it turns True
resize_factor = 3

In [None]:
if should_resize and not was_resized:
  width = width // resize_factor
  height = height // resize_factor
  newsize = (width, height)
  numnum = 0
  while os.path.isfile('frames/frame{}.jpg' .format(numnum)):
    image = cv2.imread('frames/frame{}.jpg' .format(numnum))
    os.remove('frames/frame{}.jpg' .format(numnum))
    cv2.imwrite('frames/frame{}.jpg' .format(numnum), cv2.resize(image, newsize))
    numnum+=1
  was_resized = True  # to ensure the size won't change again
  print('After resize - resolution {} X {}' .format(width, height))

After resize - resolution 640 X 360


## DINO

In [None]:
should_dino = True
should_test_dino = True

### Downloading Pretrained DINO net

In [None]:
%%capture
if should_dino:
  resnet50 = torch.hub.load('facebookresearch/dino:main', 'dino_resnet50')
  resnet50.eval()

### Data
To fit the clustering algorithm we need data. 
* place < class1 >'s training images in < data_dir >/train/< class1 >/< class1 > ().jpg
* same for < class2 > and for test data

You can use the drive if you want!

In [None]:
if should_dino:
  use_drive_4_dino = True


  if use_drive_4_dino:
    data_dir = '/content/drive/MyDrive/shoes/dataset_aug'
  else:
    data_dir = 'C:/Users/tomhe/Desktop/kagglecatsanddogs_3367a/PetImages'

  class1_dirname = 'highheel'
  class1_filename = 'highheel'
  class2_dirname = 'sneaker'
  class2_filename = 'sneaker'

  dino_was_trained = False  # keep false, once fitted itll turn true and we wouldn't have to run forward passes o the training data again

### Fitting

DINO forward passes on training data

In [None]:
if should_dino:
  DINO_width = 200
  DINO_height = 200
  channels = 3
  batch_size = 1

  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

  if not dino_was_trained:
    
    # for progress
    # num_1_4_prog = 10 dataset has bad naming
    num_1_4_prog = 1
    num_2_4_prog = 1
    while os.path.isfile('{}/train/{}/{} ({}).jpg' .format(data_dir, class1_dirname, class1_filename, num_1_4_prog)):
      num_1_4_prog += 1
    while os.path.isfile('{}/train/{}/{} ({}).jpg' .format(data_dir, class2_dirname, class2_filename, num_2_4_prog)):
      num_2_4_prog += 1

    # num_1_4_prog -= 10
    # num_2_4_prog -= 10

    if num_1_4_prog > 500:
      num_1_4_prog = 500
    if num_2_4_prog > 500:
      num_2_4_prog = 500

    cls1_features = []
    for num_class1 in range(10, num_1_4_prog):
      cls1 = cv2.imread('{}/train/{}/{} ({}).jpg' .format(data_dir, class1_dirname, class1_filename, num_class1))
      cls1 = cv2.resize(cls1, (DINO_width, DINO_height), interpolation=cv2.INTER_CUBIC)
      im = np.expand_dims(cls1, axis=0)
      img = torch.from_numpy(im)  # to tensor
      images = img.to(device).view(batch_size, channels, DINO_width, DINO_height)  # reshaping 

      cls1_features.append(resnet50(images.float()).cpu().detach().numpy())
      
      print('\r Progress: {}%' .format(int(100*(num_class1)/(num_1_4_prog + num_2_4_prog))), end="")

    cls2_features = []
    for num_class2 in range(10, num_2_4_prog):
      cls2 = cv2.imread('{}/train/{}/{} ({}).jpg' .format(data_dir, class2_dirname, class2_filename, num_class2))
      cls2 = cv2.resize(cls2, (DINO_width, DINO_height), interpolation=cv2.INTER_CUBIC)
      im = np.expand_dims(cls2, axis=0)
      img = torch.from_numpy(im)  # to tensor
      images = img.to(device).view(batch_size, channels, DINO_width, DINO_height)  # reshaping 

      cls2_features.append(resnet50(images.float()).cpu().detach().numpy())

      print('\r Progress: {}%' .format(int(100*(num_class1 + num_class2)/(num_1_4_prog + num_2_4_prog))), end="")

    print('\nTRAIN SET: #class1 {}, #class2 {}'.format(num_1_4_prog, num_2_4_prog))


    if not (num_1_4_prog > 0 and num_2_4_prog > 0):
      print('Data Unavailable')
      raise Exception()

    train_features = np.squeeze(np.concatenate((cls1_features, cls2_features)), axis=1)
    train_labels = np.concatenate((np.zeros(num_1_4_prog), np.ones(num_2_4_prog)))  # 0 = class1, 1 = class2

    # FITTING 
    K = 3

    clfKNN = KNeighborsClassifier(n_neighbors=K)
    clfKNN.fit(train_features, train_labels)

    clfSVM = make_pipeline(StandardScaler(), SVC(gamma='auto'))
    clfSVM.fit(train_features, train_labels)

    
    dino_was_trained = True

 Progress: 99%
TRAIN SET: #class1 58, #class2 230


ValueError: ignored

### Evaluating

DINO forward passes on test data

In [None]:
if should_dino:
  for ind in range(2):
    if ind == 0:
      print('KNN\n')
      clf = clfKNN
    if ind == 1:
      print('SVM\n')
      clf = clfSVM
    if should_test_dino:

      # for progress
      num_1_4_prog = 1
      num_2_4_prog = 1
      while os.path.isfile('{}/test/{}/{} ({}).jpg' .format(data_dir, class1_dirname, class1_filename, num_1_4_prog)):
        num_1_4_prog += 1
      while os.path.isfile('{}/test/{}/{} ({}).jpg' .format(data_dir, class2_dirname, class2_filename, num_2_4_prog)):
        num_2_4_prog += 1

      if num_1_4_prog > 500:
        num_1_4_prog = 500
      if num_2_4_prog > 500:
        num_2_4_prog = 500


      cls1_features_test = []
      for num_class1_test in range(1, num_1_4_prog):
        cls1 = cv2.imread('{}/test/{}/{} ({}).jpg' .format(data_dir, class1_dirname, class1_filename, num_class1_test))
        cls1 = cv2.resize(cls1, (DINO_width, DINO_height), interpolation=cv2.INTER_CUBIC)
        im = np.expand_dims(cls1, axis=0)

        img = torch.from_numpy(im)  # to tensor
        images = img.to(device).view(batch_size, channels, DINO_width, DINO_height)  # reshaping 

        cls1_features_test.append(resnet50(images.float()).cpu().detach().numpy())
        
        print('\r Progress: {}%' .format(int(100*(num_class1_test)/(num_1_4_prog))), end="")


      cls2_features_test = []
      for num_class2_test in range(1, num_2_4_prog):
        cls2 = cv2.imread('{}/test/{}/{} ({}).jpg' .format(data_dir, class2_dirname, class2_filename, num_class2_test))
        cls2 = cv2.resize(cls2, (DINO_width, DINO_height), interpolation=cv2.INTER_CUBIC)
        im = np.expand_dims(cls2, axis=0)

        img = torch.from_numpy(im)  # to tensor
        images = img.to(device).view(batch_size, channels, DINO_width, DINO_height)  # reshaping 

        cls2_features_test.append(resnet50(images.float()).cpu().detach().numpy())

        print('\r Progress: {}%' .format(int(100*(num_class2_test)/(num_2_4_prog))), end="")
          
      print('\nTEST SET: #class1 {}, #class2 {}'.format(num_1_4_prog, num_2_4_prog))

      if not (num_1_4_prog > 0 and num_2_4_prog > 0):
        print('Data Unavailable')
        raise Exception()

      

    # 0 = class1, 1 = class2
    if should_test_dino:
      num_correct_cls1 = 0
      for i in range(num_class1_test):
          pred = clf.predict(cls1_features_test[i])
          if pred == 0:
              num_correct_cls1 += 1
              
      num_correct_cls2 = 0
      for i in range(num_class2_test):
          pred = clf.predict(cls2_features_test[i])
          if pred == 1:
              num_correct_cls2 += 1
              
      ttl_acc = 100*(num_correct_cls2+num_correct_cls1)/(num_class2_test+num_class1_test)
      cls2_acc = 100*num_correct_cls2/num_class2_test
      cls1_acc = 100*num_correct_cls1/num_class1_test
      print('Total Test Acc.  : {}%\nClass1 Acc. : {}\nClass2 Acc. : {}'.format(ttl_acc, cls1_acc, cls2_acc))


KNN

 Progress: 98%
TEST SET: #class1 167, #class2 91
Total Test Acc.  : 44.921875%
Class1 Acc. : 16.265060240963855
Class2 Acc. : 97.77777777777777
SVM

 Progress: 98%
TEST SET: #class1 167, #class2 91
Total Test Acc.  : 37.109375%
Class1 Acc. : 3.0120481927710845
Class2 Acc. : 100.0


Cheking Accuracy

## Create a Tensorflow session for Mask-RCNN
Now let us create a Tensorflow session to run the inference. You can either connect to a TPU or a normal CPU backend.

In [None]:
use_tpu = True 
if use_tpu:
  import os
  import pprint

  assert 'COLAB_TPU_ADDR' in os.environ, 'ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!'
  TPU_ADDRESS = 'grpc://' + os.environ['COLAB_TPU_ADDR']
  print('TPU address is', TPU_ADDRESS)

  session = tf.Session(TPU_ADDRESS, graph=tf.Graph())
  print('TPU devices:')
  pprint.pprint(session.list_devices())
else:
  session = tf.Session(graph=tf.Graph())

TPU address is grpc://10.88.167.242:8470
TPU devices:
[_DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:CPU:0, CPU, -1, 10308121448120690991),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 16405083768195944995),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 13598905960868569781),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 4410537336732872944),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 40015781556742980),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 6152271583902510944),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 3485075665373192257),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:5, TPU, 17179869184, 15413749610181014741),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:6, TPU, 17179869184, 7186223973127

## Load the pretrained model for Mask-RCNN
Loading the COCO pretrained saved model from the public GCS bucket. 

In [None]:
saved_model_dir = 'gs://cloud-tpu-checkpoints/mask-rcnn/1555659850' 
_ = tf.saved_model.loader.load(session, ['serve'], saved_model_dir)

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
INFO:tensorflow:Restoring parameters from gs://cloud-tpu-checkpoints/mask-rcnn/1555659850/variables/variables


##Prepare lobe.ai pretrained Net
We trained a net and uploded it to google drive


###Extract

In [None]:
model_name = "cheat_net_opt" #@param {type:"string"}


In [None]:
sh = """ 
if [ ! -d "/content/tf_model" ]
then 
  mkdir "/content/tf_model"
  unrar x "/content/drive/MyDrive/pretrained_models/{}.rar" "/content/tf_model/"
fi
""" .format(model_name)
with open('script.sh', 'w') as file:
  file.write(sh)

!bash script.sh 


UNRAR 5.50 freeware      Copyright (c) 1993-2017 Alexander Roshal


Extracting from /content/drive/MyDrive/pretrained_models/cheat_net_opt.rar

Creating    /content/tf_model/cheat_net_opt                           OK
Creating    /content/tf_model/cheat_net_opt/example                   OK
Extracting  /content/tf_model/cheat_net_opt/example/README.md              0%  OK 
Extracting  /content/tf_model/cheat_net_opt/example/requirements.txt       0%  OK 
Extracting  /content/tf_model/cheat_net_opt/example/tf_example.py          0%  OK 
Extracting  /content/tf_model/cheat_net_opt/labels.txt                     0%  OK 
Extracting  /content/tf_model/cheat_net_opt/saved_model.pb                 0%  OK 
Extracting  /content/tf_model/cheat_net_opt/signature.json                 0%  OK 
Creating    /content/tf_model/cheat_net_opt/variables                 OK
Extracting  /content/tf_model/cheat_net_opt/variables/variables.data-00000-of-00001 

###Functions for LobeNet

In [None]:
#  -------------------------------------------------------------
#   Copyright (c) Microsoft Corporation.  All rights reserved.
#  -------------------------------------------------------------
"""
Skeleton code showing how to load and run the TensorFlow SavedModel export package from Lobe.
"""
import argparse
import os
import json
import tensorflow as tf
from PIL import Image
import numpy as np

EXPORT_MODEL_VERSION = 1


class TFModel:
    def __init__(self, model_dir) -> None:
        # make sure our exported SavedModel folder exists
        self.model_dir = model_dir
        with open(os.path.join(model_dir, "signature.json"), "r") as f:
            self.signature = json.load(f)
        # self.model_file = "../" + self.signature.get("filename")
        self.model_file = self.signature.get("filename")
        if not os.path.isfile(os.path.join(self.model_dir, self.model_file)):
            raise FileNotFoundError(f"Model file does not exist")
        self.inputs = self.signature.get("inputs")
        self.outputs = self.signature.get("outputs")
        # placeholder for the tensorflow session
        self.session = None

        # Look for the version in signature file.
        # If it's not found or the doesn't match expected, print a message
        version = self.signature.get("export_model_version")
        if version is None or version != EXPORT_MODEL_VERSION:
            print(
                f"There has been a change to the model format. Please use a model with a signature 'export_model_version' that matches {EXPORT_MODEL_VERSION}."
            )

    def load(self) -> None:
        self.cleanup()
        # create a new tensorflow session
        self.session = tf.compat.v1.Session(graph=tf.Graph())
        # load our model into the session
        tf.compat.v1.saved_model.loader.load(sess=self.session, tags=self.signature.get("tags"), export_dir=self.model_dir)

    def predict(self, image: Image.Image) -> dict:
        # load the model if we don't have a session
        if self.session is None:
            self.load()

        image = self.process_image(image, self.inputs.get("Image").get("shape"))
        # create the feed dictionary that is the input to the model
        # first, add our image to the dictionary (comes from our signature.json file)
        feed_dict = {self.inputs["Image"]["name"]: [image]}

        # list the outputs we want from the model -- these come from our signature.json file
        # since we are using dictionaries that could have different orders, make tuples of (key, name) to keep track for putting
        # the results back together in a dictionary
        fetches = [(key, output["name"]) for key, output in self.outputs.items()]

        # run the model! there will be as many outputs from session.run as you have in the fetches list
        outputs = self.session.run(fetches=[name for _, name in fetches], feed_dict=feed_dict)
        return self.process_output(fetches, outputs)

    def process_image(self, image, input_shape) -> np.ndarray:
        """
        Given a PIL Image, center square crop and resize to fit the expected model input, and convert from [0,255] to [0,1] values.
        """
        width, height = image.size
        # ensure image type is compatible with model and convert if not
        if image.mode != "RGB":
            image = image.convert("RGB")
        # center crop image (you can substitute any other method to make a square image, such as just resizing or padding edges with 0)
        if width != height:
            square_size = min(width, height)
            left = (width - square_size) / 2
            top = (height - square_size) / 2
            right = (width + square_size) / 2
            bottom = (height + square_size) / 2
            # Crop the center of the image
            image = image.crop((left, top, right, bottom))
        # now the image is square, resize it to be the right shape for the model input
        input_width, input_height = input_shape[1:3]
        if image.width != input_width or image.height != input_height:
            image = image.resize((input_width, input_height))

        # make 0-1 float instead of 0-255 int (that PIL Image loads by default)
        image = np.asarray(image) / 255.0
        # format input as model expects
        return image.astype(np.float32)

    def process_output(self, fetches, outputs) -> dict:
        # do a bit of postprocessing
        out_keys = ["label", "confidence"]
        results = {}
        # since we actually ran on a batch of size 1, index out the items from the returned numpy arrays
        for i, (key, _) in enumerate(fetches):
            val = outputs[i].tolist()[0]
            if isinstance(val, bytes):
                val = val.decode()
            results[key] = val
        confs = results["Confidences"]
        labels = self.signature.get("classes").get("Label")
        output = [dict(zip(out_keys, group)) for group in zip(labels, confs)]
        sorted_output = {"predictions": sorted(output, key=lambda k: k["confidence"], reverse=True)}
        return sorted_output

    def cleanup(self) -> None:
        # close our tensorflow session if one exists
        if self.session is not None:
            self.session.close()
            self.session = None

    def __del__(self) -> None:
        self.cleanup()


### Initializing the model

In [None]:
lobe_net = TFModel('/content/tf_model/{}/' .format(model_name))
lobe_net.load()

INFO:tensorflow:Restoring parameters from /content/tf_model/cheat_net_opt/variables/variables


#Functions

In [None]:
def print_group_mat(group_matrix):
  print()
  for i, data in enumerate(group_matrix):
    print(str(i).zfill(2) + ":" + str(data))


def normalize(a):
  '''
  To compare between colors invariantly of luminance
  '''
  c = np.zeros(a.shape)
  for i in range(len(a)):
    b = a[i,:]
    m  = float(max(b))
    b = b/m
    c[i,:] = b
  return c


def find_groups(Id_vector, group_matrix, dx_thresh, small_changes_th, vel_diff_th):
  for i in range(len(Id_vector)):
    for j in range(len(Id_vector)):
      if (i != j and Id_vector[i][3] == 0 and Id_vector[i][0] > 0 and Id_vector[j][3] == 0 and Id_vector[j][0] > 0):
        x_prox = abs(Id_vector[i][0] - Id_vector[j][0]) < dx_thresh  # checks if they are close together
        walk_dir = Id_vector[i][11] * Id_vector[j][11] > 0  # if both are same sign, theyre walking in the same direction
        small_changes = (abs(Id_vector[i][11]) <= small_changes_th) or (abs(Id_vector[j][11]) <= small_changes_th)
        vel_diff = abs(Id_vector[i][9] -  Id_vector[j][9])*3.6  # 3.6 is meter/sec to km/h
        if (x_prox and (small_changes or walk_dir) and vel_diff <= vel_diff_th):
          group_matrix[i][j] = group_matrix[i][j] + 1
        else:
          group_matrix[i][j] = 0
  return group_matrix


def is_in_group_list(groups_list, ID):
  for i in range(len(groups_list)):
    for j in range(len(groups_list[i])):
     if(groups_list[i][j] == ID):
       return True
  return False


def create_groups_list(group_matrix, frames_to_group):

  groups_list = [[-1]]
  last_index = 0
  flag = 0

  for i in range(len(group_matrix)):
    if(is_in_group_list(groups_list, i) == False):
      groups_list[last_index][0] = i
      for j in range(len(group_matrix)): 
        if(is_in_group_list(groups_list, j) == False and group_matrix[i][j] > frames_to_group):
          flag = 1
          groups_list[last_index].append(j)

    if flag == 1:
      groups_list.append([-1])
      last_index = last_index + 1
    else:
      groups_list[last_index][0] = -1
    flag = 0

  return groups_list


def findXLimitsOfContours(contours):
  min2 = 0
  max2 = 0
  if len(contours) == 1:
    l1 = contours[0]
    max1 = 0
    min1 = 10000
    for cont in range(len(contours)):
      if (l1[cont][0][0] >= max1):
        max1 = l1[cont][0][0]
      if (l1[cont][0][0] <= min1):
        min1 = l1[cont][0][0]

  elif len(contours) == 2:
    l1 = contours[0]
    max1 = 0
    min1 = 10000
    l2 = contours[1]
    max2 = 0
    min2 = 10000
    for cont in range(len(contours)):
      if (l1[cont][0][0] >= max1):
        max1 = l1[cont][0][0]
      if (l1[cont][0][0] <= min1):
        min1 = l1[cont][0][0]
      if (l2[cont][0][0] >= max2):
        max2 = l2[cont][0][0]
      if (l2[cont][0][0] <= min2):
        min2 = l2[cont][0][0]
  return min1, max1, min2, max2



def findYLimitsOfContours(contours):
  l1 = contours[0]
  max = 0
  min = 10000
  for cont in range(len(contours)):
      if (l1[cont][0][1] >= max):
        max = l1[cont][0][1]
      if (l1[cont][0][1] <= min):
        min = l1[cont][0][1]

  return min, max

# Da Code

In [None]:
# +++++++++++++++ Options +++++++++++++++++++

vel_print = False
forward_passes_on = True  # should the classifer work (whether its conv net or dino)? also control printing of classification 
should_print_class_res = False and forward_passes_on  # whether to print the net's results or not (net must be activated)
print_flag = False

# classification = 'DINO & KNN'
# classification = 'DINO & SVM'
classification = 'CONV NET'

if classification == 'DINO & KNN':
  clf = clfKNN
elif classification == 'DINO & SVM':
  clf = clfSVM

  # +++++++++++++++++++++++++++++++++++++++++

In [None]:
size = (width,height)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.mp4', fourcc, 20.0, size)
frame_rate = -1
print('resolution {} X {}' .format(width, height))

# ==================== CONSTANTS =================================

first_frame = 0
num_frames = 218


color_diff_th = 40

class_th = 0.4

group_distance_thresh = width/6.4 # 100 for part1. how close you have to be in order to be grouped
if frame_rate != -1:
  frames_to_group = frame_rate/2  # to be in a group togehter you need to be close for 0.5 seconds
else:
  frames_to_group = 10  # how many frames you have to be close in order to be grouped
small_ch_th = width/213.3 # 3 for part1
vel_diff_th = width*2/640  # 2 for part1 quick maths

close_th = width / 2.56 # 250 for part1

frame_stride = 1



class_name = 'Shoe' # what's the class' being made, will be printed in the output video

class1 = 'highheel'  # what's the class' name on the output of the net
class2 = 'sneakers'

class1_4_print = 'HH'  # what's the class' name on the output video
class2_4_print = 'SNKR'
# ================================================================

resolution 640 X 360


In [None]:
'''
////////////////////// ID VECTOR DICTIONARY////////////////////////////
0 - x_avg
1 - x_length
2 - y_height
3 - how many frames in a row this person wasn't detected
4 - BBOX's maximal y
5 - mask area in pixels
6 - BBOX's maximal x
7 - avg color in upper body
8 - avg color in bottom body
9 - velocity (after adjusments) 
10 - how many frames in total this person was seen in (doesn't have to be in a row)
11 - dx - how many pixels this person moved
12 - leg_dist
13 - steps - cooldown and frame counter (after X=10 frames you can make another step by closing your legs for Y=3 frames in a row)
14 - step counter
15 - classification score - grows if class 1, diminishes if class 2

///////////////////////////////////////////////////////////////////////
'''

start_time = time.time() # to check FPS

!rm for_lobe_net/*
!mkdir for_lobe_net

!rm bboxes_4_print/*
!mkdir bboxes_4_print



i = first_frame
total_counter = 0
color_diff = 0
number_of_id = 30
number_of_features = 17


Id_vector = [[0]*number_of_features]
last_avail_idx = 0
last_id = copy.deepcopy(Id_vector)



group_matrix = [0]*number_of_id
for k in range(len(group_matrix)):
  group_matrix[k] = [0]*number_of_id


while ((i - first_frame) <= num_frames):  # frames loop

  if print_flag:
    print('============================ FRAME {} ==================================\n\n\n\n\n' .format(i))
  # initializing variables
  flag_exist = 0

  #print("iter num:" ,i)
  image_path = 'frames/frame'+str(i)+'.jpg'
  i += frame_stride
  with open(image_path, 'rb') as f:
    np_image_string = np.array([f.read()])



  orig_img = cv2.imread(image_path) 

  image = Image.open(image_path)

  # width, height = image.size
  np_image = np.array(image.getdata()).reshape(height, width, 3).astype(np.uint8)

  # running the network
  num_detections, detection_boxes, detection_classes, detection_scores, detection_masks, image_info = session.run(
      ['NumDetections:0', 'DetectionBoxes:0', 'DetectionClasses:0', 'DetectionScores:0', 'DetectionMasks:0', 'ImageInfo:0'],
      feed_dict={'Placeholder:0': np_image_string})

  # extracting info from the output
  num_detections = np.squeeze(num_detections.astype(np.int32), axis=(0,))
  detection_boxes = np.squeeze(detection_boxes * image_info[0, 2], axis=(0,))[0:num_detections]
  detection_scores = np.squeeze(detection_scores, axis=(0,))[0:num_detections]
  detection_classes = np.squeeze(detection_classes.astype(np.int32), axis=(0,))[0:num_detections]
  detection_scores[detection_classes != 1] = 0
  instance_masks = np.squeeze(detection_masks, axis=(0,))[0:num_detections]
  ymin, xmin, ymax, xmax = np.split(detection_boxes, 4, axis=-1)
  processed_boxes = np.concatenate([xmin, ymin, xmax - xmin, ymax - ymin], axis=-1)
  segmentations = coco_metric.generate_segmentation_from_masks(instance_masks, processed_boxes, height, width)

  max_boxes_to_draw = 10 
  min_score_thresh = 0.99

  # producing visualization
  image_with_detections = visualization_utils.visualize_boxes_and_labels_on_image_array(
      np_image,
      detection_boxes,
      detection_classes,
      detection_scores,
      category_index,
      instance_masks=segmentations,
      use_normalized_coordinates=False,
      max_boxes_to_draw=max_boxes_to_draw,
      min_score_thresh=min_score_thresh)
  output_image_path = 'frame_res/frame results'+str(i)+'.jpg'
  Image.fromarray(image_with_detections.astype(np.uint8)).save(output_image_path)

  # bbox coordinates for all people detected in frame
  ymin_new = np.zeros(max_boxes_to_draw)
  xmin_new = np.zeros(max_boxes_to_draw)
  ymax_new = np.zeros(max_boxes_to_draw)
  xmax_new = np.zeros(max_boxes_to_draw)
  image = cv2.imread(output_image_path)
  index_person = detection_classes == 1 
  index_thresh = detection_scores >= min_score_thresh
 
  people_in_frame = 0
  for j in range(len(index_thresh)):
    if index_person[j] and index_thresh[j] and people_in_frame < max_boxes_to_draw:
      ymin_new[people_in_frame] = ymin[j]
      xmin_new[people_in_frame] = xmin[j]
      ymax_new[people_in_frame] = ymax[j]
      xmax_new[people_in_frame] = xmax[j]
      people_in_frame+=1

  last_id = copy.deepcopy(Id_vector)



  # -------------------- TRACKING ---------------------------

  for j in range(people_in_frame):  # for all people in frame
    if xmin_new[j] > 0 or xmax_new[j] < width:  # if in frame
      
      x_avg = (int(xmax_new[j])+int(xmin_new[j]))/2
      x_length = int(xmax_new[j])-int(xmin_new[j])
      y_height = int(ymax_new[j])-int(ymin_new[j])
      y_avg = int(((ymax_new[j])+(ymin_new[j]))/2)
      y_coordinate = int((int(ymin_new[j])*0.75+int(ymax_new[j])*0.25))
      

      # color calculations
      up = segmentations[index_person,:,:]
      up[:,y_avg:,:] = 0
      up[:,int(ymin_new[j]):int(ymin_new[j])+20,:] = 0
      up_person_avg = int(np.average(orig_img[up[j,:,:] == 1,]))

      down = segmentations[index_person,:,:]
      down[:,0:y_avg,:] = 0
      down_person_avg = int(np.average(orig_img[down[j,:,:] == 1,]))


      


      checkss = segmentations[index_person,:,:]
      mask_area = int((np.sum(checkss[j,:,:])))
      

      min_color_diff = 300  # arbitrary large value
      min_index = -1  # initial illegal value

      isnt_first_frame = (i != first_frame)
      if isnt_first_frame:
        # calculating match scores
        '''
        color_diff (takes properties of two people and produces scalar) is our target 
        function. 
        We are looking for the person 'k' in Id_vector with the least color_diff from 
        the person 'j' that was detected in this frame and coresponds with the variables
        <up_person_avg> and <down_person_avg>.
        '''

        if print_flag:  # produce masked bbox fot j_th person for printing only
            mbb = np.copy(orig_img)
            mask = [segmentations[index_person,:,:], segmentations[index_person,:,:], segmentations[index_person,:,:]]
            mbb[mask == 1] = 0
            masked_bbox_j = mbb[int(ymin_new[j]):int(ymax_new[j]), int(xmin_new[j]):int(xmax_new[j])]

        for k in range(len(last_id)): 
          if  last_id[k][3] >= 0 and last_id[k] != [0]*number_of_features:
            color_diff =  abs(last_id[k][7] - up_person_avg) + abs(last_id[k][8] - down_person_avg)
            is_close_enough = abs(last_id[k][0] - x_avg) <= close_th
            is_similar_size = (max((last_id[k][5]/mask_area),(mask_area/last_id[k][5])) <= 2)

            # side_margin = 25
            # is_in_sides = (xmin_new[j] <= side_margin or xmax_new[j] >= width - side_margin)


            if print_flag:
              print('\n\n\n-----------------------------------------------------------------\n')
              print('Current (j_th) person\nUpper Avg. = {}, Bottom Avg. = {}' .format(up_person_avg, down_person_avg))
              cv2_imshow(masked_bbox_j)
              print('\n\nCompared to (k_th) person\nUpper Avg. = {}, Bottom Avg. = {}' .format(last_id[k][7], last_id[k][8]))
              cv2_imshow(cv2.imread('bboxes_4_print/mask_id_{}.jpg' .format(k)))

              print('Match Score: {}' .format(color_diff))
              print('\n-----------------------------------------------------------------\n\n\n')
            if (color_diff <= min_color_diff and is_close_enough and is_similar_size):
              min_color_diff = color_diff
              min_index = k

        k = min_index
        color_diff = min_color_diff
        if print_flag: print('\n\nb\Best Match: ID {}, Score {}\n\n\n' .format(min_index, min_color_diff))
        #x_max_change = abs(last_id[min_index][6] - int(xmax_new[j]))
        


        # ========================= EXISTING PERSON ==============================
        '''
        if the best match is good enough' this is an existing person
        (hasn't died) we've already seen
        '''
        if ((color_diff <= color_diff_th and last_id[min_index][3] >= 0) or (color_diff <= 80 and last_id[min_index][3] >= 0 and (abs(y_height-last_id[min_index][2]) <= height/24))):
          flag_exist = 1  # this person does exist!

          if print_flag:
            cv2.imwrite('bboxes_4_print/mask_id_{}.jpg' .format(min_index), masked_bbox_j)

          dx = abs(x_avg - last_id[min_index][0])/(width/5)
          vel = 30*(dx/(last_id[min_index][3]+1))

          # ----------------- updating Id_vector -----------------------
          Id_vector[min_index][10] = Id_vector[min_index][10] + 1
          if (int(ymax_new[j]) >= 320):
            Id_vector[min_index][9] = ((Id_vector[min_index][9]*Id_vector[min_index][10] + vel*0.6)/(Id_vector[min_index][10]+1))
          elif (int(ymax_new[j]) <= 275):
            Id_vector[min_index][9] = ((Id_vector[min_index][9]*Id_vector[min_index][10] + vel*1.3)/(Id_vector[min_index][10]+1))
          else:
            Id_vector[min_index][9] = ((Id_vector[min_index][9]*Id_vector[min_index][10] + vel)/(Id_vector[min_index][10]+1))
          Id_vector[min_index][11] = x_avg - Id_vector[min_index][0]
          Id_vector[min_index][0] = x_avg
          Id_vector[min_index][1] = x_length
          Id_vector[min_index][2] = y_height
          Id_vector[min_index][4] = int(ymax_new[j])
          Id_vector[min_index][5] = mask_area
          Id_vector[min_index][6] = int(xmax_new[j])
          Id_vector[min_index][7] = up_person_avg
          Id_vector[min_index][8] = down_person_avg
          # ------------------------------------------------------------
          # -------------------- step counter --------------------------
          down_f_s = segmentations[index_person,:,:]
          down_f_s[j,0:int(ymax_new[j])-int(0.3*y_height),:] = 0
          yossi = down_f_s[j,:,:]
          kernel = np.ones((5,5), np.uint8)
          img_erosion = cv2.erode(yossi, kernel, iterations=1)
          yossi = cv2.dilate(img_erosion, kernel, iterations=1)
            
          contours, hierarchy = cv2.findContours(yossi, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
          if (len(contours) != 1 and len(contours) != 2):
            continue
          elif (len(contours) == 1):
            cnt2 = 0 # how many adjecent rows had two blobs
            max_blob_dist = 0
            actually_two_legs = False
            for y in range(Id_vector[min_index][4] - Id_vector[min_index][2], Id_vector[min_index][4]):
              prev = 0
              cnt = 0
              x1to0 = 0 
              x0to1 = 0
              for x in range(Id_vector[min_index][6] - Id_vector[min_index][1], Id_vector[min_index][6]):
                if yossi[y,x] == 1 and prev == 0:  # from black to white 
                  cnt += 1   
                  if cnt == 2:
                    x0to1 = x  # saving the x cordinate of the start of the second blob

                elif yossi[y,x] == 0 and prev == 1 and cnt == 1:  # from white to black
                    x1to0 = x 
                prev = yossi[y,x]
              if cnt == 2:
                cnt2 += 1
                if x0to1 - x1to0 > max_blob_dist:
                  max_blob_dist = x0to1 - x1to0
                if cnt2 == 5:
                  actually_two_legs = True
              else:
                cnt2 = 0
                
            if actually_two_legs:
              leg_dist = max_blob_dist
            else:
              leg_dist = 0
          elif (len(contours) == 2):
            l1 = contours[0]
            max1 = 0
            min1 = 1000
            l2 = contours[1]
            max2 = 0
            min2 = 1000
            for cont in range(len(contours)):
              if (l1[cont][0][0] >= max1):
                max1 = l1[cont][0][0]
              if (l1[cont][0][0] <= min1):
                min1 = l1[cont][0][0]
              if (l2[cont][0][0] >= max2):
                max2 = l2[cont][0][0]
              if (l2[cont][0][0] <= min2):
                min2 = l2[cont][0][0]
            if (min1 > min2):
              leg_dist = min1 - max2
            else:
              leg_dist = min2 - max1
          if (Id_vector[min_index][13] < 0):
            Id_vector[min_index][13] += 1
          else:
            if leg_dist < Id_vector[min_index][12] and leg_dist != 0: 
              Id_vector[min_index][13] += 1
            else:
              if abs(leg_dist - Id_vector[min_index][12] > 5):
                Id_vector[min_index][13] = 0
              else:
                Id_vector[min_index][13] = max(0, Id_vector[min_index][13] - 1)
          #if ((leg_dist == 0 and Id_vector[min_index][12] <= 50 and Id_vector[min_index][12] > 0) or (leg_dist <= 50 and leg_dist > 0 and Id_vector[min_index][12] == 0)):
          if (Id_vector[min_index][13] == 3):
            Id_vector[min_index][14] += 1
            Id_vector[min_index][13] = -10

          Id_vector[min_index][12] = leg_dist

          # ------------------------------------------------------------

          if forward_passes_on:
            # ----------------- forward pass -----------------------------
            # shlomo is the masked BBOX
            x_start = Id_vector[min_index][6] - Id_vector[min_index][1]
            x_end = Id_vector[min_index][6]
            y_start = int(Id_vector[min_index][4] - 1*Id_vector[min_index][2])
            y_end = Id_vector[min_index][4]
            shlomo = copy.deepcopy(orig_img)
            # shlomo[checkss[j,:,:] == 0] = 0  # make it masked
            height_shlomo = (int(ymax_new[j]) - int(ymin_new[j]))
            shlomo = shlomo[int(ymin_new[j]+0.85*height_shlomo):int(ymax_new[j]),int(xmin_new[j]):int(xmax_new[j]),:]
            
            if classification == 'CONV NET':
              # outputs = lobe_net.predict(Image.fromarray(np.uint8(shlomo*255)))
              outputs = lobe_net.predict(Image.fromarray(np.uint8(shlomo)))
              winner = outputs["predictions"][0]["label"]
              # print(f"Predicted: {outputs}")

            else:  # DINO and some clustering method
              sh_DINO = cv2.resize(shlomo, (DINO_width, DINO_height), interpolation=cv2.INTER_CUBIC)
              sh_DINO = np.expand_dims(sh_DINO, axis=0)
              sh_DINO_tensor = torch.from_numpy(sh_DINO)  # to tensor
              sh_DINO_tensors = sh_DINO_tensor.to(device).view(batch_size, channels, DINO_width, DINO_height)  # reshaping 

              shlomo_feature_map = resnet50(sh_DINO_tensors.float()).cpu().detach().numpy()
              # SelectStrongFeatures?
              pred = clf.predict(shlomo_feature_map)
              if pred == 0:
                winner = class1
              elif pred == 1:
                winner = class2
            # cv2.imwrite('/content/for_lobe_net/bbox_frame{}_id{}.jpg' .format(i, min_index), shlomo)
            if should_print_class_res:
              cv2_imshow(shlomo)
              print(f"Predicted: {winner}")
            if winner == class1:
              Id_vector[min_index][15] += 1
            elif winner == class2:
              Id_vector[min_index][15] -= 1
            # ------------------------------------------------------------

        # =================== END OF EXISTING PERSON =============================

        # ============================ NEW PERSON ================================
        if (flag_exist == 0):
          if print_flag:  # produce masked bbox fot j_th person for printing only
            cv2.imwrite('bboxes_4_print/mask_id_{}.jpg' .format(last_avail_idx), masked_bbox_j)

          Id_vector[last_avail_idx][0] = x_avg
          Id_vector[last_avail_idx][1] = x_length
          Id_vector[last_avail_idx][2] = y_height
          Id_vector[last_avail_idx][4] = int(ymax_new[j])
          Id_vector[last_avail_idx][5] = mask_area
          Id_vector[last_avail_idx][6] = int(xmax_new[j])
          Id_vector[last_avail_idx][7] = up_person_avg
          Id_vector[last_avail_idx][8] = down_person_avg
          Id_vector[last_avail_idx][10] = 1
          Id_vector[last_avail_idx][13] = 0
          Id_vector[last_avail_idx][14] = 0
          Id_vector[last_avail_idx][16] = 0 # frame counter


          if forward_passes_on:
            # ----------------- forward pass -----------------------------
            # shlomo is the masked BBOX
            x_start = Id_vector[last_avail_idx][6] - Id_vector[last_avail_idx][1]
            x_end = Id_vector[last_avail_idx][6]
            y_start = int(Id_vector[last_avail_idx][4] - 1*Id_vector[last_avail_idx][2])
            y_end = Id_vector[last_avail_idx][4]

            shlomo = copy.deepcopy(orig_img)
            # shlomo[checkss[j,:,:] == 0] = 0  # make it masked
            height_shlomo = (int(ymax_new[j]) - int(ymin_new[j]))
            shlomo = shlomo[int(ymin_new[j]+0.85*height_shlomo):int(ymax_new[j]),int(xmin_new[j]):int(xmax_new[j]),:]
            if classification == 'CONV NET':
              # outputs = lobe_net.predict(Image.fromarray(np.uint8(shlomo*255)))
              outputs = lobe_net.predict(Image.fromarray(np.uint8(shlomo)))
              winner = outputs["predictions"][0]["label"]
              # print(f"Predicted: {outputs}")

            else:  # DINO and some clustering method
              sh_DINO = cv2.resize(shlomo, (DINO_width, DINO_height), interpolation=cv2.INTER_CUBIC)
              sh_DINO = np.expand_dims(sh_DINO, axis=0)
              sh_DINO_tensor = torch.from_numpy(sh_DINO)  # to tensor
              sh_DINO_tensors = sh_DINO_tensor.to(device).view(batch_size, channels, DINO_width, DINO_height)  # reshaping 
              shlomo_feature_map = resnet50(sh_DINO_tensors.float()).cpu().detach().numpy()
              # SelectStrongFeatures?
              pred = clf.predict(shlomo_feature_map)
              if pred == 0:
                winner = class1
              elif pred == 1:
                winner = class2

            # cv2.imwrite('/content/for_lobe_net/bbox_frame{}_id{}.jpg' .format(i, min_index), shlomo)
            if should_print_class_res:
              cv2_imshow(shlomo)
              print(f"Predicted: {winner}")
            if winner == class1:
              Id_vector[last_avail_idx][15] += 1
            elif winner == class2:
              Id_vector[last_avail_idx][15] -= 1
            # ------------------------------------------------------------

          Id_vector.append([0]*number_of_features)
          last_id.append([0]*number_of_features)  
          last_avail_idx += 1
          
              
      else: # it is the first frame, shouldn't run match checks
        print('first Frame!')
        if print_flag:  # produce masked bbox fot j_th person for printing only
            mbb = np.copy(orig_img)
            mask = [segmentations[index_person,:,:], segmentations[index_person,:,:], segmentations[index_person,:,:]]
            mbb[mask == 1] = 0
            masked_bbox_j = mbb[int(ymin_new[j]):int(ymax_new[j]), int(xmin_new[j]):int(xmax_new[j])]
            cv2.imwrite('bboxes_4_print/mask_id_{}.jpg' .format(last_avail_idx), masked_bbox_j)
        Id_vector[last_avail_idx][0] = x_avg
        Id_vector[last_avail_idx][1] = x_length
        Id_vector[last_avail_idx][2] = y_height
        Id_vector[last_avail_idx][4] = int(ymax_new[j])
        Id_vector[last_avail_idx][5] = mask_area
        Id_vector[last_avail_idx][6] = int(xmax_new[j])
        Id_vector[last_avail_idx][7] = up_person_avg
        Id_vector[last_avail_idx][8] = down_person_avg
        Id_vector[last_avail_idx][10] = 1
        Id_vector[last_avail_idx][13] = 0
        Id_vector[last_avail_idx][14] = 0

        if forward_passes_on:
            # ----------------- forward pass -----------------------------
            # shlomo is the masked BBOX
            x_start = Id_vector[last_avail_idx][6] - Id_vector[last_avail_idx][1]
            x_end = Id_vector[last_avail_idx][6]
            y_start = int(Id_vector[last_avail_idx][4] - 1*Id_vector[last_avail_idx][2])
            y_end = Id_vector[last_avail_idx][4]

            shlomo = copy.deepcopy(orig_img)
            # shlomo[checkss[j,:,:] == 0] = 0  # make it masked
            height_shlomo = (int(ymax_new[j]) - int(ymin_new[j]))
            shlomo = shlomo[int(ymin_new[j]+0.85*height_shlomo):int(ymax_new[j]),int(xmin_new[j]):int(xmax_new[j]),:]
            if classification == 'CONV NET':
              # outputs = lobe_net.predict(Image.fromarray(np.uint8(shlomo*255)))
              outputs = lobe_net.predict(Image.fromarray(np.uint8(shlomo)))
              winner = outputs["predictions"][0]["label"]
              # print(f"Predicted: {outputs}")

            else:  # DINO and some clustering method
              sh_DINO = cv2.resize(shlomo, (DINO_width, DINO_height), interpolation=cv2.INTER_CUBIC)
              sh_DINO = np.expand_dims(sh_DINO, axis=0)
              sh_DINO_tensor = torch.from_numpy(sh_DINO)  # to tensor
              sh_DINO_tensors = sh_DINO_tensor.to(device).view(batch_size, channels, DINO_width, DINO_height)  # reshaping 

              shlomo_feature_map = resnet50(sh_DINO_tensors.float()).cpu().detach().numpy()
              # SelectStrongFeatures?
              pred = clf.predict(shlomo_feature_map)
              if pred == 0:
                winner = class1
              elif pred == 1:
                winner = class2

            # cv2.imwrite('/content/for_lobe_net/bbox_frame{}_id{}.jpg' .format(i, min_index), shlomo)
            if should_print_class_res:
              cv2_imshow(shlomo)
              print(f"Predicted: {winner}")
            if winner == class1:
              Id_vector[last_avail_idx][15] += 1
            elif winner == class2:
              Id_vector[last_avail_idx][15] -= 1
            # ------------------------------------------------------------

        Id_vector.append([0]*number_of_features)
        last_id.append([0]*number_of_features)  
        last_avail_idx += 1
      

      re = copy.deepcopy(orig_img)
      re[checkss[j,:,:] == 0] = 0
      re = re[int(ymin_new[j]):int(ymax_new[j]),int(xmin_new[j]):int(xmax_new[j]),:]
      # cv2.imwrite('aut_people/out'+str(i)+'_'+str(j)+'.jpg',re)

  for k in range(len(Id_vector)):
        if last_id[k][:] == Id_vector[k][:] and Id_vector[k][0] > 0 and Id_vector[k][3] >= 0:
          Id_vector[k][3] += 1
        elif Id_vector[k][3] >= 0:
          Id_vector[k][3] = 0
        if (Id_vector[k][3] >= 30 and (Id_vector[k][0] <= 40 or Id_vector[k][0] >= width - 40)):
          Id_vector[k][3] = -1
        elif (Id_vector[k][3] >= 80):
          Id_vector[k][3] = -1
        

  group_matrix = find_groups(Id_vector, group_matrix, group_distance_thresh, small_ch_th, vel_diff_th)

  groups_list = create_groups_list(group_matrix, frames_to_group)

  # PRINTS!
  for k in range(len(Id_vector)):
    if (Id_vector[k][0] > 0):
      total_counter += 1
      if Id_vector[k][3] == 0:
        cv2.putText(image, 'ID:' + str(k+1) + '.',( max(1,(int(Id_vector[k][0])-10)), Id_vector[k][4]), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), 1)
        cv2.putText(image, 'steps:' + str(Id_vector[k][14]) + '.',( max(1,(int(Id_vector[k][0])-10)), min(height,Id_vector[k][4]+10)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), 1)
        if Id_vector[k][9] != 0 and vel_print:
          cv2.putText(image, 'V:' + str(int(round(Id_vector[k][9]*3.6))) + 'KM/H',( max(1,(int(Id_vector[k][0])-10)), Id_vector[k][4]-40), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), 1)
        if forward_passes_on and Id_vector[k][10] > 0 and Id_vector[k][15]/Id_vector[k][10] >= class_th:
          cv2.putText(image, '{}: {}'.format(class_name, class1_4_print),( max(1,(int(Id_vector[k][0])-10)), Id_vector[k][4]-60), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), 1)
        elif forward_passes_on:
          cv2.putText(image, '{}: {}'.format(class_name, class2_4_print),( max(1,(int(Id_vector[k][0])-10)), Id_vector[k][4]-60), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), 1)

    else:
      break

  for group in groups_list:
    max_x = 0
    min_x = 100000
    min_y = 100000
    max_y = 0
    if (len(group) <= 1):
      continue
    if group[0] != -1:
      for person in group:
        if (Id_vector[person][3] == 0):  # don't print if missing from frame
          if (min(Id_vector[person][6], Id_vector[person][6]-Id_vector[person][1]) < min_x):
            min_x = min(Id_vector[person][6], Id_vector[person][6]-Id_vector[person][1])
          if (max(Id_vector[person][6], Id_vector[person][6]-Id_vector[person][1]) > max_x):
            max_x = max(Id_vector[person][6], Id_vector[person][6]-Id_vector[person][1])
          if (min(Id_vector[person][4], Id_vector[person][4]-Id_vector[person][2]) < min_y):
            min_y = min(Id_vector[person][4], Id_vector[person][4]-Id_vector[person][2])
          if (max(Id_vector[person][4], Id_vector[person][4]-Id_vector[person][2]) > max_y):
            max_y = max(Id_vector[person][4], Id_vector[person][4]-Id_vector[person][2])
      if (min_x != 100000 and min_y != 100000):
        cv2.rectangle(image, (int(max_x), int(min_y)), (int(min_x), int(max_y)), (0, 0, 255), 2)


  cv2.putText(image, 'number of people counted:' + str(total_counter) + '.', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)
  total_counter = 0
  if print_flag: cv2_imshow(image)
  out.write(image)
  #print(" iter num {}, Id Vector {}".format(i,Id_vector))
  print("\r iter num {}, Id Vector {}".format(i,Id_vector), end="")
  # print(Id_vector[2])
  if print_flag: print('=======================================================================\n\n\n\n\n' .format(i))

out.release()

duration = time.time() - start_time
avg_fps = num_frames / duration
print('\nRunning took {} [sec]\nAVG FPS: {} [frames/sec]' .format(duration, avg_fps))



rm: cannot remove 'for_lobe_net/*': No such file or directory
mkdir: cannot create directory ‘for_lobe_net’: File exists
rm: cannot remove 'bboxes_4_print/*': No such file or directory
mkdir: cannot create directory ‘bboxes_4_print’: File exists
 iter num 219, Id Vector [[183.0, 50, 162, 0, 265, 4480, 208, 132, 90, 0.37152270735981296, 213, -1.0, 28, 0, 7, 154, 0], [340.0, 72, 211, 0, 287, 7638, 376, 69, 62, 0.6917348130841121, 213, 1.5, 33, -2, 6, 35, 0], [134.0, 58, 177, 0, 278, 5215, 163, 91, 109, 0.30957512315270946, 202, -2.0, 22, 0, 8, 180, 0], [388.5, 83, 199, 0, 283, 5728, 430, 100, 83, 2.5053442158830297, 217, -2.0, 37, 0, 4, 37, 0], [258.0, 96, 230, 0, 307, 8860, 306, 83, 85, 0.40234374999999983, 191, -4.5, 28, 1, 6, 161, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
Running took 422.43919491767883 [sec]
AVG FPS: 0.5160505999981415 [frames/sec]


In [None]:
# out.release()
from google.colab import files
files.download("/content/output.mp4")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# %rm frame_res/*
# %rm frames/*

In [None]:
%%capture
!zip -r /content/file.zip /content/for_lobe_net
from google.colab import files
files.download("/content/file.zip")

# Visualize the detection results
Time to check out the result!

In [None]:
max_boxes_to_draw = 30  
min_score_thresh = 0.5   

image_with_detections = visualization_utils.visualize_boxes_and_labels_on_image_array(
    np_image,
    detection_boxes,
    detection_classes,
    detection_scores,
    category_index,
    instance_masks=segmentations,
    use_normalized_coordinates=False,
    max_boxes_to_draw=max_boxes_to_draw,
    min_score_thresh=min_score_thresh)
output_image_path = 'test_results.jpg'
Image.fromarray(image_with_detections.astype(np.uint8)).save(output_image_path)
display.display(display.Image(output_image_path, width=1024))

#Using LobeNet

In [None]:
should_show = True

# preds = [[]]*num_frames 
 
for i in range(num_frames):
  frame_num = i + first_frame + 1
  flag = True
  id_num = 0
  while flag:
    im_path = 'for_lobe_net/bbox_frame{}_id{}.jpg' .format(frame_num, id_num)
    if os.path.isfile(im_path):
      photo = Image.open(im_path)
      outputs = lobe_net.predict(photo)
      # print(f"Predicted: {outputs}")
      winner = outputs["predictions"][0]["label"]
      # preds[i].append(winner)
      
      if should_show:
        photo2see = cv2.imread('for_lobe_net/bbox_frame{}_id{}.jpg' .format(frame_num, id_num))
        print('-------------\nPrediction: {}, ID:{}, Frame: {}' .format(winner, id_num, frame_num))
        cv2_imshow(photo2see)
        print('---------------------\n\n\n\n\n')
      id_num += 1
    else:
      flag = False
    
# print('#frame  |  #ID  |  prediction')
# for i in range(len(preds)):
#   for id_idx in range(len(preds[i])):
#     print("{}  ,  {}  ,  {}" .format(i + first_frame + 1, id_idx, preds[i][id_idx] ))

Evaluating model on test set:

In [None]:
men_total_cnt = 0
men_corret_cnt = 0
women_total_cnt = 0
women_corret_cnt = 0

for i in range(100):
  im_path = '/content/drive/MyDrive/people/test_set_part2/man/man ({}).png' .format(i)
  if os.path.isfile(im_path):
    photo = Image.open(im_path)
    outputs = lobe_net.predict(photo)
    # print(f"Predicted: {outputs}")
    winner = outputs["predictions"][0]["label"]
    men_total_cnt += 1
    if winner == 'man': 
      men_corret_cnt += 1

  im_path = '/content/drive/MyDrive/people/test_set_part2/woman/woman ({}).png' .format(i)
  if os.path.isfile(im_path):
    photo = Image.open(im_path)
    outputs = lobe_net.predict(photo)
    # print(f"Predicted: {outputs}")
    winner = outputs["predictions"][0]["label"]
    women_total_cnt += 1
    if winner == 'woman': 
      women_corret_cnt += 1

ttl_acc = 100*(women_corret_cnt + men_corret_cnt)/(women_total_cnt + men_total_cnt)
women_acc = 100*women_corret_cnt/women_total_cnt
men_acc = 100*men_corret_cnt/men_total_cnt
print('All Photos: {} ({} woman {} man)'.format(men_total_cnt + women_total_cnt, women_total_cnt, men_total_cnt))
print('Overall Acc.: {}% ({} woman {} man)'.format(ttl_acc, women_acc, men_acc))