In [27]:
import tensorflow as tf # importing libraries
import keras
from keras.applications.inception_resnet_v2 import InceptionResNetV2
import keras.utils as image
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input, decode_predictions
import numpy as np
from keras.models import Model,Sequential
from tensorflow.keras.layers import *
from tensorflow.keras import optimizers
import pandas as pd 
import cv2
import os 
import matplotlib.pyplot as plt
from tensorflow.keras.metrics import Precision,Recall
from skimage.metrics import structural_similarity
import csv

In [28]:
physical_devices =tf.config.list_physical_devices ('GPU') # setting computng device to GPU
tf.config.experimental.set_memory_growth(physical_devices[0],True)

In [29]:
def string_to_sec(start_time):
   hours = start_time[0:2]
   minutes = start_time[3:5]
   seconds = start_time[6:8]
   
   if(hours[0]=='0'):
      hours = hours[1]

   if(minutes[0]=='0'):
      minutes = minutes[1]

   if(seconds[0]=='0'):
      seconds =seconds[1]
 
   hours = int(hours)
   minutes =int(minutes)
   seconds =int(seconds)

   sec = (hours*3600 + minutes*60 + seconds)

   return sec

In [30]:
file_dir = './vosaic_model_verification/S2_T20_Comb/'
video_path = './test_videos/S2_T20.mp4' # setting up the data
video_name = 'S2_T20_comb'
csv_file = video_name+"_model_tags.csv"
video_buffer =  '00:00:00'
buffer = string_to_sec(video_buffer)

In [31]:
class L2Concat(Layer):
    def __init__(self,**kwargs):
        super().__init__()    

    def call(self,image_1_embedding,image_2_embedding):
        return  tf.concat([image_1_embedding,image_2_embedding],-1) # the concatenating layer of the custom model 

In [32]:
model =tf.keras.models.load_model('siamesemodel_multi.h5',compile=False,custom_objects={'L2Concat':L2Concat,'SparseCategoricalCrossentropy':tf.losses.SparseCategoricalCrossentropy}) # loading the model 

In [33]:
input_border_length =299 
# folder path


fields = ['timestamp_A','timestamp_B','action_code'] # make required columns in the csv file
model_tags_csv = os.path.join(file_dir,csv_file)   # file name of the csv

with open(model_tags_csv, 'w') as csvfile: 
    
    csvwriter = csv.writer(csvfile)# creating a csv writer object 
    
    csvwriter.writerow(fields)  

In [34]:
def compare_frame(frameA, frameB):
  grayA = cv2.cvtColor(frameA, cv2.COLOR_BGR2GRAY) # convert to grayscale
  grayB = cv2.cvtColor(frameB, cv2.COLOR_BGR2GRAY) # convert to grayscale
   
  score, diff = structural_similarity(grayA, grayB, full=True) # find SSIM of the image pairs 
  diff = (diff * 255).astype("uint8")
  
  print("SSIM: {}".format(score)) # prints similarity score 

  thresh = cv2.threshold(diff, 180, 255, cv2.THRESH_BINARY_INV)[1]    # if pixel values is greater than threshold (180) mapped to 0, else to 255 (THRESH_BINARY_INV)
  cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # find the binary contours
  cnts=cnts[0] # returns the list of countour boundaries 

  return diff, thresh, cnts, score

def convert_box(cnts):
  box = []
  for c in cnts:
    x, y, w, h = cv2.boundingRect(c) # put a bounding box in contour areas
    if w > 2.5 and h > 8: 
      box.append([x, y, x+w, y+h]) # checks if change has happended in area greater than 16px * 5px size  ,and appends them to the array of box coordinates 
  box = np.array(box) #  making the array of boxes to a numpy array 
  # print(box)
  return box

def non_max_suppression(boxes, overlapThresh):
  if len(boxes) == 0:
    return []
  if boxes.dtype.kind == "i":  
    boxes = boxes.astype("float")  # checks the data type of the boxed array that is passed and if an integer makes it to float

  pick = []
  x1 = boxes[:,0]
  y1 = boxes[:,1]
  x2 = boxes[:,2]
  y2 = boxes[:,3]
  area = (x2 - x1 + 1) * (y2 - y1 + 1) # computes area of all boxes  using the  coordinates available 
  idxs = np.argsort(y2) # returns the indexes in ascending sorted order (it according to top to bottom depending on y axis) 

  while len(idxs) > 0:
   last = len(idxs) - 1
   i = idxs[last]
   pick.append(i)
   xx1 = np.maximum(x1[i], x1[idxs[:last]])
   yy1 = np.maximum(y1[i], y1[idxs[:last]])
   xx2 = np.minimum(x2[i], x2[idxs[:last]])
   yy2 = np.minimum(y2[i], y2[idxs[:last]])
   w = np.maximum(0, xx2 - xx1 + 1)
   h = np.maximum(0, yy2 - yy1 + 1)
   overlap = (w * h) / area[idxs[:last]] 
   idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0]))) # removes the overlapping boxes  and has boxes with less than 0.3 threshold factor intersection

  return boxes[pick].astype("int")
  


def find_max(boxes_nms):
  if len(boxes_nms) == 0:
    return []
  boxes = []
  for box_nms in boxes_nms:
    box_nms = np.append(box_nms, (box_nms[2]-box_nms[0])*(box_nms[3]-box_nms[1]))
    boxes.append(box_nms) 
  boxes = np.array(boxes) # gets the area of the boxes and is appended as andditional column
  idx = np.argsort(boxes[:,4]) # sorts based on area 
  x_center = boxes[idx[-1]][0] + (boxes[idx[-1]][2] - boxes[idx[-1]][0]) / 2  
  y_center = boxes[idx[-1]][1] + (boxes[idx[-1]][3] - boxes[idx[-1]][1]) / 2
  box_max = np.append(boxes[idx[-1]], [x_center, y_center]) # stores coordinates, cneter points of x,y and area of the largest
  box_max = np.array(box_max, dtype = np.int32)
  return box_max

def find_max_region(boxes_nms):
  if len(boxes_nms) == 0:
    return []
  x1 = boxes_nms[:,0]
  y1 = boxes_nms[:,1]
  x2 = boxes_nms[:,2]
  y2 = boxes_nms[:,3]  
  xx1 = min(x1)
  yy1 = min(y1)
  xx2 = max(x2)
  yy2 = max(y2)
  max_region = np.array([xx1, yy1, xx2, yy2]) # computes the coordinates of  maximum sized box that can include the largest share of changes
  return max_region

def to_canvas(region):
  canvas = np.zeros((input_border_length, input_border_length, 3), np.uint8) # defines a canvas of 299x 299 
  print(region.shape)
  if region.shape[0] > region.shape[1]:
    if region.shape[1] % 2:
      canvas[:, int(input_border_length / 2 - (region.shape[1] / 2)):int(input_border_length / 2 + (region.shape[1] / 2) + 1)-1] = region
    else:
      canvas[:, int(input_border_length / 2 - (region.shape[1] / 2)):int(input_border_length / 2 + (region.shape[1] / 2))] = region
  else:
    if region.shape[0] % 2:
      canvas[int(input_border_length/ 2 - (region.shape[0] / 2)):int(input_border_length / 2 + (region.shape[0] / 2) + 1)-1, :] = region
    else:
      canvas[int(input_border_length / 2 - (region.shape[0] / 2)):int(input_border_length / 2 + (region.shape[0] / 2)), :] = region
  return canvas

In [35]:
# function to the predict the label of image pairs 
def tag(img_A,img_B):
 probs = model.predict(list(np.expand_dims([img_A,img_B],axis=1)))
 return np.argmax(probs) # gets the label (index is used) 

In [36]:
def change_region(frame_no_A,frame_no_B):
 
 cap = cv2.VideoCapture(video_path) #reads video from the path 
 w_frame, h_frame = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # computes the dimension of the video frame
 h_min=round(0.3*h_frame) 
 h_max=round(0.6*h_frame) # cropping the area of interest
 w_min=round(0.5*w_frame)
 w_max=round(w_frame)
 w=w_max-w_min
 h=h_max-h_min

 cap.set(1,frame_no_A) # sets the video in the required frame number 
 ret,frame1 = cap.read() 
 frameA = frame1[h_min:h_max, w_min:w_max] # reads  the area of interest 

 cap.set(1,frame_no_B) # sets the video in the required frame number 
 ret,frame2 = cap.read() 
 frameB = frame2[h_min:h_max, w_min:w_max] # reads the area of interest 
 
 diff, thresh, cnts, score= compare_frame(frameA, frameB) # gets the image differences,threshold value
 mask_region = np.zeros((frameB.shape[0], frameB.shape[1]), np.uint8) #  produces a black image of the dimensions of the frame
 boxes = convert_box(cnts) 
 boxes_nms = non_max_suppression(boxes, 0.3) # removes overlaps 
 max_box = find_max(boxes_nms) # optional code
 max_region_box = find_max_region(boxes_nms) 
 for box in boxes_nms:
    cv2.rectangle(mask_region, (box[0], box[1]), (box[2], box[3]), (255, 255, 255), -1) # draws rectangles on the non intersecting boxes

 frameC = frameB.copy()
 frameD = frameB.copy()
 label =-1

 if len(boxes_nms):
    cv2.rectangle(frameC, (max_region_box[0], max_region_box[1]), (max_region_box[2], max_region_box[3]), (0, 255, 0), 2) # draws a rectangle
    max_region_A = frameA[max_region_box[1]:max_region_box[3], max_region_box[0]:max_region_box[2]].copy() # applies the rectangle box mask on frameA
    mask_region = mask_region[max_region_box[1]:max_region_box[3], max_region_box[0]:max_region_box[2]].copy()
    max_region_D = frameD[max_region_box[1]:max_region_box[3], max_region_box[0]:max_region_box[2]].copy() # applies rectangle box mask on frameB
    region_A = cv2.bitwise_and(max_region_A, max_region_A, mask = mask_region)
    region_D = cv2.bitwise_and(max_region_D, max_region_D, mask = mask_region) # compute the bitwise_and to get the image cutout
    f_rate =  input_border_length* 1.0 / max(region_A.shape[0], region_A.shape[1]) # scaling the image propotionally 
    region_A = cv2.resize(region_A, (0,0), fx = f_rate, fy = f_rate) 
    region_D = cv2.resize(region_D, (0,0), fx = f_rate, fy = f_rate) 
    img_A = to_canvas(region_A)
    img_D = to_canvas(region_D)

    ## Model predictions 
    label = tag(img_A,img_D)
    ## figuring out the index label 
    ## classification based on label 
    
    ## to csv 
 else:
    
    label = 0


 return label  

In [37]:
# code to convert milliseconds to seconds
def mil_convert(milliseconds):
  seconds, milliseconds = divmod(milliseconds, 1000)
  minutes, seconds = divmod(seconds, 60)
  hours ,minutes =divmod(minutes,60)
  
  hours_str   =  str(round(hours))
  minutes_str = str(round(minutes))
  seconds_str = str(round(seconds))

  if(round(hours)<10):
    hours_str = "0" + hours_str
  
  if(round(minutes)<10):
    minutes_str = "0" + minutes_str

  if(round(seconds)<10):
    seconds_str = "0" + seconds_str  

  time = hours_str +":"+ minutes_str +":"+seconds_str
  return time

In [38]:
#function to convert string time to milliseconds
def string_to_millis(start_time):
   hours = start_time[0:2]
   minutes = start_time[3:5]
   seconds = start_time[6:8]
   
   if(hours[0]=='0'):
      hours = hours[1]

   if(minutes[0]=='0'):
      minutes = minutes[1]

   if(seconds[0]=='0'):
      seconds =seconds[1]
 
   hours = int(hours)
   minutes =int(minutes)
   seconds =int(seconds)

   millis = (hours*3600 + minutes*60 + seconds)*1000

   return millis 

In [39]:
for i in range(buffer):
    timestamp_A = mil_convert((i*1000))
    timestamp_B = mil_convert((i+1)*1000)
    action_code = 0
    data = { 
             'timestamp_A' : [timestamp_A],  
             'timestamp_B' : [timestamp_B],
             'action_code': [action_code],
            }    
    df = pd.DataFrame(data)
    df.to_csv(model_tags_csv,mode ='a' , index=False, header=False)

In [40]:
cap = cv2.VideoCapture(video_path) # reading testing video
frame_count= int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # frame counts
fps    = cap.get(cv2.CAP_PROP_FPS) # fps
print(frame_count)
print(int(fps))

171570
29


In [41]:
# loop to analyse frame data and write actions to 
i=0
while(int(fps)*(i+1)<=int(frame_count)):
 
 action_code = change_region(int(fps)*(i),int(fps)*(i+1)) # passes a frame pair ,one from the beginning of a 1 second interval and other from the end of a the interval
 timestamp_A =  mil_convert((i)*1000+buffer*1000)
 timestamp_B =  mil_convert((i+1)*1000+buffer*1000)
 data = { 
          'timestamp_A' : [timestamp_A],  
          'timestamp_B' : [timestamp_B],
          'action_code': [action_code],
          } 
 df = pd.DataFrame(data)
 df.to_csv(model_tags_csv,mode ='a' , index=False, header=False)  # write the data into the csv
 
 i+=1

SSIM: 0.9999741709918432
SSIM: 0.9999575098007176
SSIM: 0.9999575098007176
SSIM: 0.9999557004044791
SSIM: 0.9999227682578099
SSIM: 0.9999094839450775
SSIM: 0.9999638216883103
SSIM: 0.999017585231298
(299, 10, 3)
(299, 10, 3)
SSIM: 0.999994130049221
SSIM: 0.9999859632474644
SSIM: 0.9999825335568377
SSIM: 0.9997036200041032
(299, 50, 3)
(299, 50, 3)
SSIM: 0.9992266931599506
(299, 9, 3)
(299, 9, 3)
SSIM: 0.998434643117301
(299, 11, 3)
(299, 11, 3)
SSIM: 0.9999050564082057
SSIM: 1.0
SSIM: 0.9999949878543907
SSIM: 1.0
SSIM: 0.9996487493576498
(299, 9, 3)
(299, 9, 3)
SSIM: 0.9997937502997154
(299, 166, 3)
(299, 166, 3)
SSIM: 0.9996409065517599
(299, 7, 3)
(299, 7, 3)
SSIM: 0.9999890355327069
SSIM: 0.9999987243335476
SSIM: 1.0
SSIM: 0.9999989172739481
SSIM: 0.999957545333234
SSIM: 0.9999161749215555
SSIM: 1.0
SSIM: 0.9999674368960337
SSIM: 1.0
SSIM: 0.9999799894716328
SSIM: 0.9999799893597136
SSIM: 1.0
SSIM: 0.9996660113060054
(299, 8, 3)
(299, 8, 3)
SSIM: 0.998510659295208
(299, 10, 3)
(299,

In [None]:
df_1 = pd.read_csv('./vosaic_model_verification/S2_T20_Dinesh/S2_T20_Dinesh_vosaic_sketch.csv')  
df_2 = pd.read_csv('./vosaic_model_verification/S2_T20_Dinesh/S2_T20_Dinesh_model_tags.csv')

In [None]:
arr_1 = df_1.to_numpy()
arr_2 = df_2.to_numpy()

In [None]:
TP = 0 
FP = 0
TN = 0
FN = 0

In [None]:
for i in range(df_1.shape[0]):
    if(arr_1[i][2] == 0 and arr_2[i][2] == 0):  # calculating the classification parameters
        TN+=1
    elif(arr_1[i][2] == 0 and arr_2[i][2] == 1):
        FP+=1
    elif(arr_1[i][2] == 1 and arr_2[i][2] == 0):
        FN+=1 
    elif(arr_1[i][2] == 1 and arr_2[i][2] == 1):
        TP+=1


In [None]:
print(TP)
print(FP)
print(TN)
print(FN)

1486
889
2119
1230


In [None]:
precision =  TP/(TP+FP)
recall =  TP/(TP+FN)
accuracy =  (TP+TN)/(TP+TN+FP+FN) 
f1_score =  (precision*recall)/(precision+recall)

In [None]:
fields = ['precision','recall','accuracy','f1_score']
video_report_file = video_name+ "_report.csv" 
video_report_csv = os.path.join(file_dir,video_report_file)


with open(video_report_csv, 'w') as csvfile: 
    
    csvwriter = csv.writer(csvfile)# creating a csv writer object 
    
    csvwriter.writerow(fields)  

In [None]:
data = { 
          'precision' : [precision],  
           'recall' : [recall],
          'accuracy': [accuracy],
          'f1_score': [f1_score],
          } 
df = pd.DataFrame(data)
df.to_csv(video_report_csv,mode ='a' , index=False, header=False) 