In [1]:
# importing all the necessary libraries
import tensorflow as tf
import keras
from keras.applications.inception_resnet_v2 import InceptionResNetV2
import keras.utils as image
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input, decode_predictions
import numpy as np
from keras.models import Model,Sequential
from tensorflow.keras.layers import *
from tensorflow.keras import optimizers
import pandas as pd 
import cv2
import os 
import matplotlib.pyplot as plt
from tensorflow.keras.metrics import Precision,Recall
from skimage.metrics import structural_similarity
import csv

Data Pipeline

In [2]:
physical_devices =tf.config.list_physical_devices ('GPU') # setting computng device to GPU
tf.config.experimental.set_memory_growth(physical_devices[0],True) # setting the memory growth of GPU consumption  so that all memory is not allotted and is finished

In [60]:
csv_file_path = './data_2/train.csv'  # location of the csv file having the image - pair labels 
directory = './data_2/' # location of the directory having the image pair data
batch_size= 4 # batch size = number of pairs that will be send into the network at a time during training and validation

In [61]:
df = pd.read_csv(csv_file_path) # reads the csv file to pandas dataframe 
image1 = df['frame_A'].values # gets all frame 1 values
image2 = df['frame_B'].values # gets all frame 2 values
labels = df['label'].values # gets all label values

In [62]:
data = tf.data.Dataset.from_tensor_slices((image1,image2,labels))  #  sorts the data (image pairs names )with labels as tuples 

In [63]:
data # test the format of the read data 

<TensorSliceDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>

In [64]:
# function to read  the image from the path 

def read_image(image_path):

   img = tf.io.read_file(image_path) # returns a tensor with the entire contents of the input filename
   img = tf.image.decode_image(img,channels=3,dtype=tf.float32) # performs the appropriate operation to convert the input bytes string into a Tensor of type dtype.
   
   return img

In [65]:
# passes the two image paths and gets the tensor form of pairs and the label as data
def read_pair(image1_path,image2_path,label):
   
   return (read_image(directory+image1_path),read_image(directory+image2_path),label)

In [9]:
# code to test the and view the data

test_pairs,test_label = read_pair(image_pairs[0],labels[0])
img_test =test_pairs[1]
a = img_test.numpy()
print(a.max())
plt.imshow(a)

In [66]:
data = data.map(read_pair) # maps the each of the image pairs to the labels
data = data.cache() # The first time the dataset is iterated over, its elements will be cached either in the specified file or in memory. Subsequent iterations will use the cached data.
data = data.shuffle(buffer_size=1024) # Randomly shuffles the elements of this dataset.

In [11]:
# view the data y accessing in array format

# samples = data.as_numpy_iterator()
# example = samples.next()
# print(len(example[0][0][0][0]))
# example

In [12]:
# plt.imshow(example[0])

In [67]:
# train test partition
train_data = data.take(round(len(data)*.7))  # Creates a Dataset with at most count elements from this dataset.
train_data = train_data.batch(4) # Combines consecutive elements of this dataset into batches.
train_data = train_data.prefetch(2) # allows later elements to be prepared while the current element is being processed

In [68]:
train_data

<PrefetchDataset element_spec=(TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [69]:
# view the data labes

# train_samples = train_data.as_numpy_iterator()
# train_sample =train_samples.next()
# train_sample[2]

array([0, 3, 1, 0], dtype=int64)

In [70]:
test_data = data.skip(round(len(data)*.7)) # Creates a Dataset that skips count elements from this dataset.
test_data=test_data.take(round(len(data)*.3))   # Creates a Dataset with at most count elements from this dataset.
test_data=test_data.batch(4) # Combines consecutive elements of this dataset into batches.
test_data=test_data.prefetch(2) # allows later elements to be prepared while the current element is being processed

In [71]:
test_data

<PrefetchDataset element_spec=(TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [72]:
# test_samples = test_data.as_numpy_iterator()
# test_sample =test_samples.next()
# test_sample[2]

Model Engineering

In [2]:
# function 

def make_embedding():
   inp = Input(shape = (299,299,3),name = 'input_image') # passing in the input 
   base_model = InceptionResNetV2(weights='imagenet', include_top=False,input_tensor=inp) # imports the InceptionResnetV2 model that is trained on the image net data without classifying
   l_1 = GlobalAveragePooling2D()(base_model.output) # the output of the InceptionResnetV2 model is flattened 
   return Model(inputs=[inp],outputs=[l_1],name='feature_extractor_cnn') # the model is returned 

In [4]:
embedding = make_embedding() # call the till embedding layer model that extracts the features

embedding.summary()
print(len(embedding.layers))

Model: "feature_extractor_cnn"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_image (InputLayer)       [(None, 299, 299, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_20 (Conv2D)             (None, 149, 149, 32  864         ['input_image[0][0]']            
                                )                                                                 
                                                                                                  
 batch_normalization_20 (BatchN  (None, 149, 149, 32  96         ['conv2d_20[0][0]']              
 ormalization)                  )                                             

In [6]:
# making the concat layer which is build on top of the inception model 
class L2Concat(Layer):
    def __init__(self,**kwargs):
        super().__init__()

    def call(self,image_1_embedding,image_2_embedding):
        return  tf.concat([image_1_embedding,image_2_embedding],-1) # concatenates the features of the image pairs as the last layer 

In [7]:
def make_siamese_model():
    
    input_image_1 = Input(name ='input_img_1',shape = (299,299,3)) # gets feature of the first image in the image pair
    input_image_2 = Input(name ='input_img_2',shape = (299,299,3)) # gets feature of the second image in the image pair

    siamese_layer = L2Concat()  # calls the layer component that concatenates
    siamese_layer._name = 'total_features'  # naming the layer feature
    
    features = siamese_layer(embedding(input_image_1),embedding(input_image_2)) # extract the features of both images in a pair and is flattened

    # classifier = Dense(1,activation='sigmoid')(features)

    classifier = Dense(2,activation='softmax')(features) # Applies the softmax functions on the final layer of the network

    return Model(inputs = [input_image_1,input_image_2],outputs=classifier,name = 'SiameseNetwork') # the model is returned

In [8]:
siamese_model = make_siamese_model() # calling the complete model 

siamese_model.summary()

Model: "SiameseNetwork"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_img_1 (InputLayer)       [(None, 299, 299, 3  0           []                               
                                )]                                                                
                                                                                                  
 input_img_2 (InputLayer)       [(None, 299, 299, 3  0           []                               
                                )]                                                                
                                                                                                  
 feature_extractor_cnn (Functio  (None, 1536)        54336736    ['input_img_1[0][0]',            
 nal)                                                             'input_img_2[0][0]'

Training

In [78]:
# loss function 
categorical_cross_loss = tf.losses.SparseCategoricalCrossentropy() #  using sparse categorical cross entropy to compute the errors

# optimizer 
opt = tf.keras.optimizers.Adam(3e-4) # optimizer function reduces the losses that is generated and optmize the network parameters

In [79]:
# establishing  checkpoints 
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir,'ckpt')
checkpoint = tf.train.Checkpoint(opt =opt,siamese_model = siamese_model)

In [26]:
# test_batch=train_data.as_numpy_iterator()
# batch_1=test_batch.next()
# len(batch_1)

In [27]:
# len(batch_1[0])
# X=batch_1[:2]

In [28]:
# np.array(X).shape

In [29]:
# y=batch_1[2]
# y

In [80]:
#Build Train Step function
@tf.function
def train_step(batch):

    with tf.GradientTape() as tape: 

        X = batch[:2] # passes the first two images in a batch of 4  

        y = batch[2]  # passes the label  


        yhat = siamese_model(X,training =True)

        loss = categorical_cross_loss(y,yhat)

    print(loss)

    grad = tape.gradient(loss,siamese_model.trainable_variables) # Record operations for automatic differentiation.

    opt.apply_gradients(zip(grad,siamese_model.trainable_variables))   # Apply gradients to variables.

    return loss # returns the loss 

In [81]:
# build training loop 

def train(data,EPOCHS):

    for epoch in range(1,EPOCHS+1):
        print('\n Epoch {}/{}'.format(epoch,EPOCHS))
        progbar = tf.keras.utils.Progbar(len(data))  # shows the progress bar 

        for idx,batch in enumerate(data):

            train_step(batch)
            progbar.update(idx+1) 

        #checkpoints 
        if epoch%10 == 0:
            checkpoint.save(file_prefix = checkpoint_prefix)  

In [82]:
EPOCHS = 8

train(train_data,EPOCHS) # calling the training function


 Epoch 1/8
Tensor("sparse_categorical_crossentropy/weighted_loss/value:0", shape=(), dtype=float32)
Tensor("sparse_categorical_crossentropy/weighted_loss/value:0", shape=(), dtype=float32)

 Epoch 2/8

 Epoch 3/8

 Epoch 4/8

 Epoch 5/8

 Epoch 6/8

 Epoch 7/8

 Epoch 8/8


Evaluation 

In [33]:
# test_img1,test_img2,y_true = test_data.as_numpy_iterator().next() 

In [34]:
# y_true

In [35]:
# y_hat = siamese_model.predict([test_img1,test_img2])
# y_hat

In [36]:
# y_hat_round = []

# for i in range (0,len(y_hat)):
#     y_index = 0
#     y_element= y_hat[i][0]
#     for j in range(0,len(y_hat[0])):
#        if(y_hat[i][j]>y_element):
#          y_index = j 
#     y_hat_round.append(y_index)

# y_hat_round
        

In [37]:
# r = Recall()

# r.update_state(y_true,y_hat_round)

# recall_batch = r.result().numpy()


In [38]:
# p= Precision()

# p.update_state(y_true,y_hat)

# precision_batch = p.result().numpy()



In [83]:
# setting arrays for precsion ,recall for validation 

precision_values = []  
recall_values = []
y_true_total = []
y_hat_total = []

In [84]:
for i in range(0,79):
    
  test_img1,test_img2,y_true = test_data.as_numpy_iterator().next()  # iterator that iterates for numpy arrays having our data
    
  y_hat = siamese_model.predict([test_img1,test_img2])  # using the built model to predict the inputs 

  y_hat_round = [] 

  for i in range (0,len(y_hat)):
    y_index = np.argmax(y_hat[i]) # finding the category of the classification
    y_hat_round.append(y_index)   

     
  print(y_true)
  print(y_hat_round)


  y_true_total.extend(y_true)
  y_hat_total.extend(y_hat_round)



  r = Recall()
  r.update_state(y_true,y_hat_round)
  recall_batch = r.result().numpy() # computes recall of batch

  p = Precision()
  p.update_state(y_true,y_hat_round)   # computes precision of batch
  precision_batch = p.result().numpy()
    
  precision_values.append(precision_batch)
  recall_values.append(recall_batch)



[3 1 2 2]
[3, 1, 2, 2]
[0 3 0 3]
[0, 3, 0, 3]
[0 3 1 3]
[0, 3, 1, 3]
[1 2 0 0]
[1, 2, 0, 0]
[0 2 1 3]
[0, 2, 1, 3]
[1 3 3 2]
[1, 3, 3, 2]
[3 1 1 1]
[3, 1, 1, 1]
[1 0 1 3]
[1, 0, 1, 3]
[1 0 1 0]
[1, 0, 1, 0]
[0 1 0 3]
[0, 1, 0, 3]
[3 1 0 1]
[3, 1, 0, 1]
[3 0 2 3]
[3, 0, 2, 3]
[2 0 2 1]
[2, 0, 2, 1]
[0 0 2 0]
[0, 0, 2, 0]
[2 1 2 0]
[2, 1, 2, 0]
[2 2 2 0]
[2, 2, 2, 0]
[2 1 1 1]
[2, 1, 1, 1]
[1 3 0 0]
[1, 3, 0, 0]
[1 0 1 0]
[1, 0, 1, 0]
[2 1 3 0]
[2, 1, 0, 0]
[3 0 2 1]
[3, 0, 2, 1]
[3 2 3 2]
[3, 2, 3, 2]
[0 2 1 0]
[0, 2, 1, 0]
[1 3 0 3]
[1, 3, 0, 3]
[3 3 0 1]
[3, 3, 0, 1]
[3 3 0 1]
[3, 3, 0, 1]
[3 0 1 2]
[3, 0, 1, 2]
[1 1 0 3]
[1, 1, 0, 3]
[3 1 3 1]
[3, 1, 3, 1]
[1 0 2 0]
[1, 0, 2, 0]
[0 3 2 1]
[0, 3, 2, 0]
[2 0 2 1]
[2, 0, 2, 1]
[2 3 1 0]
[2, 3, 1, 1]
[0 3 0 3]
[0, 3, 0, 3]
[1 1 2 1]
[1, 1, 2, 1]
[0 1 2 1]
[0, 1, 2, 1]
[0 0 3 2]
[0, 0, 3, 2]
[3 1 3 0]
[3, 1, 3, 0]
[2 2 3 2]
[2, 2, 3, 2]
[3 0 1 0]
[3, 0, 1, 0]
[1 3 0 0]
[1, 3, 0, 0]
[2 3 0 1]
[2, 3, 0, 1]
[0 2 2 2]
[0, 2, 2, 2]
[0 1 3 2]
[

In [85]:
print(len(recall_values))
print(len(precision_values))
print(len(y_true_total))
print(len(y_hat_total))

79
79
316
316


In [86]:
# function to calculate average values 
def calc_avg(lst):
    return sum(lst) / len(lst) 

In [87]:
# function to calculate validation accuracy 

def calc_accuracy(true_values,predicted_values):
    res=0
    for i in range(0,len(true_values)):
        if(true_values[i]==predicted_values[i]):
            res+=1

    return ((res/len(true_values))*100)

In [88]:
recall_avg = calc_avg(recall_values)
precision_avg = calc_avg(precision_values)
accuracy = calc_accuracy(y_true_total,y_hat_total)

In [89]:
print('Precision = {}'.format(precision_avg))

Precision = 0.9968354430379747


In [90]:
print('Recall = {}'.format(recall_avg))

Recall = 0.9915611819375919


In [91]:
print('Accuracy = {}%'.format(accuracy))

Accuracy = 99.0506329113924%


In [92]:
# siamese_model.save('siamesemodel_multi.h5') # saving the model 



In [4]:
model =tf.keras.models.load_model('siamesemodel_multi.h5',compile=False,custom_objects={'L2Concat':L2Concat,'SparseCategoricalCrossentropy':tf.losses.SparseCategoricalCrossentropy}) # loading the model 

In [4]:
input_border_length =299 
# folder path
dir_path = './whiteboard_clips/S2_T10_Ambady/'
vosaic_buffer = "00:00:00"
res = []
# Iterate directory
for file in os.listdir(dir_path):
    # check only mp4 files
    if file.endswith('.mp4'):
        res.append(file)
print(res)

fields = ['timestamp_A','timestamp_B','action_code'] # make required columns in the csv file
filename = dir_path+"/{}.csv".format(str(res[clip_id-1][0:-4]))   # file name of the csv

with open(filename, 'w') as csvfile: 
    
    csvwriter = csv.writer(csvfile)# creating a csv writer object 
    
    csvwriter.writerow(fields)  

video_path = dir_path + res[clip_id-1]

['S2_T10_Ambady_wb_clip_1.mp4', 'S2_T10_Ambady_wb_clip_2.mp4', 'S2_T10_Ambady_wb_clip_3.mp4', 'S2_T10_Ambady_wb_clip_4.mp4', 'S2_T10_Ambady_wb_clip_5.mp4', 'S2_T10_Ambady_wb_clip_6.mp4', 'S2_T10_Ambady_wb_clip_7.mp4', 'S2_T10_Ambady_wb_clip_8.mp4']


In [5]:
def compare_frame(frameA, frameB):
  grayA = cv2.cvtColor(frameA, cv2.COLOR_BGR2GRAY) # convert to grayscale
  grayB = cv2.cvtColor(frameB, cv2.COLOR_BGR2GRAY) # convert to grayscale
   
  score, diff = structural_similarity(grayA, grayB, full=True) # find SSIM of the image pairs 
  diff = (diff * 255).astype("uint8")
  
  print("SSIM: {}".format(score)) # prints similarity score 

  thresh = cv2.threshold(diff, 180, 255, cv2.THRESH_BINARY_INV)[1]    # if pixel values is greater than threshold (180) mapped to 0, else to 255 (THRESH_BINARY_INV)
  cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # find the binary contours
  cnts=cnts[0] # returns the list of countour boundaries 

  return diff, thresh, cnts, score

def convert_box(cnts):
  box = []
  for c in cnts:
    x, y, w, h = cv2.boundingRect(c) # put a bounding box in contour areas
    if w > 5 and h > 16: 
      box.append([x, y, x+w, y+h]) # checks if change has happended in area greater than 16px * 5px size  ,and appends them to the array of box coordinates 
  box = np.array(box) #  making the array of boxes to a numpy array 
  # print(box)
  return box

def non_max_suppression(boxes, overlapThresh):
  if len(boxes) == 0:
    return []
  if boxes.dtype.kind == "i":  
    boxes = boxes.astype("float")  # checks the data type of the boxed array that is passed and if an integer makes it to float

  pick = []
  x1 = boxes[:,0]
  y1 = boxes[:,1]
  x2 = boxes[:,2]
  y2 = boxes[:,3]
  area = (x2 - x1 + 1) * (y2 - y1 + 1) # computes area of all boxes  using the  coordinates available 
  idxs = np.argsort(y2) # returns the indexes in ascending sorted order (it according to top to bottom depending on y axis) 

  while len(idxs) > 0:
   last = len(idxs) - 1
   i = idxs[last]
   pick.append(i)
   xx1 = np.maximum(x1[i], x1[idxs[:last]])
   yy1 = np.maximum(y1[i], y1[idxs[:last]])
   xx2 = np.minimum(x2[i], x2[idxs[:last]])
   yy2 = np.minimum(y2[i], y2[idxs[:last]])
   w = np.maximum(0, xx2 - xx1 + 1)
   h = np.maximum(0, yy2 - yy1 + 1)
   overlap = (w * h) / area[idxs[:last]] 
   idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0]))) # removes the overlapping boxes  and has boxes with less than 0.3 threshold factor intersection

  return boxes[pick].astype("int")
  


def find_max(boxes_nms):
  if len(boxes_nms) == 0:
    return []
  boxes = []
  for box_nms in boxes_nms:
    box_nms = np.append(box_nms, (box_nms[2]-box_nms[0])*(box_nms[3]-box_nms[1]))
    boxes.append(box_nms) 
  boxes = np.array(boxes) # gets the area of the boxes and is appended as andditional column
  idx = np.argsort(boxes[:,4]) # sorts based on area 
  x_center = boxes[idx[-1]][0] + (boxes[idx[-1]][2] - boxes[idx[-1]][0]) / 2  
  y_center = boxes[idx[-1]][1] + (boxes[idx[-1]][3] - boxes[idx[-1]][1]) / 2
  box_max = np.append(boxes[idx[-1]], [x_center, y_center]) # stores coordinates, cneter points of x,y and area of the largest
  box_max = np.array(box_max, dtype = np.int32)
  return box_max

def find_max_region(boxes_nms):
  if len(boxes_nms) == 0:
    return []
  x1 = boxes_nms[:,0]
  y1 = boxes_nms[:,1]
  x2 = boxes_nms[:,2]
  y2 = boxes_nms[:,3]  
  xx1 = min(x1)
  yy1 = min(y1)
  xx2 = max(x2)
  yy2 = max(y2)
  max_region = np.array([xx1, yy1, xx2, yy2]) # computes the coordinates of  maximum sized box that can include the largest share of changes
  return max_region

def to_canvas(region):
  canvas = np.zeros((input_border_length, input_border_length, 3), np.uint8) # defines a canvas of 299x 299 
  print(region.shape)
  if region.shape[0] > region.shape[1]:
    if region.shape[1] % 2:
      canvas[:, int(input_border_length / 2 - (region.shape[1] / 2)):int(input_border_length / 2 + (region.shape[1] / 2) + 1)-1] = region
    else:
      canvas[:, int(input_border_length / 2 - (region.shape[1] / 2)):int(input_border_length / 2 + (region.shape[1] / 2))] = region
  else:
    if region.shape[0] % 2:
      canvas[int(input_border_length/ 2 - (region.shape[0] / 2)):int(input_border_length / 2 + (region.shape[0] / 2) + 1)-1, :] = region
    else:
      canvas[int(input_border_length / 2 - (region.shape[0] / 2)):int(input_border_length / 2 + (region.shape[0] / 2)), :] = region
  return canvas

In [6]:
# function to the predict the label of image pairs 
def tag(img_A,img_B):
 probs = model.predict(list(np.expand_dims([img_A,img_B],axis=1)))
 return np.argmax(probs) # gets the label (index is used) 

In [7]:
def change_region(frame_no_A,frame_no_B):
 
 cap = cv2.VideoCapture(video_path) #reads video from the path 
 w_frame, h_frame = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # computes the dimension of the video frame
 h_min=round(0.11*h_frame) 
 h_max=round(0.79*h_frame) # cropping the area of interest
 w_min=round(0)
 w_max=round(w_frame)
 w=w_max-w_min
 h=h_max-h_min

 cap.set(1,frame_no_A) # sets the video in the required frame number 
 ret,frame1 = cap.read() 
 frameA = frame1[h_min:h_max, w_min:w_max] # reads  the area of interest 

 cap.set(1,frame_no_B) # sets the video in the required frame number 
 ret,frame2 = cap.read() 
 frameB = frame2[h_min:h_max, w_min:w_max] # reads the area of interest 
 
 diff, thresh, cnts, score= compare_frame(frameA, frameB) # gets the image differences,threshold value
 mask_region = np.zeros((frameB.shape[0], frameB.shape[1]), np.uint8) #  produces a black image of the dimensions of the frame
 boxes = convert_box(cnts) 
 boxes_nms = non_max_suppression(boxes, 0.3) # removes overlaps 
 max_box = find_max(boxes_nms) # optional code
 max_region_box = find_max_region(boxes_nms) 
 for box in boxes_nms:
    cv2.rectangle(mask_region, (box[0], box[1]), (box[2], box[3]), (255, 255, 255), -1) # draws rectangles on the non intersecting boxes

 frameC = frameB.copy()
 frameD = frameB.copy()
 label =-1

 if len(boxes_nms):
    cv2.rectangle(frameC, (max_region_box[0], max_region_box[1]), (max_region_box[2], max_region_box[3]), (0, 255, 0), 2) # draws a rectangle
    max_region_A = frameA[max_region_box[1]:max_region_box[3], max_region_box[0]:max_region_box[2]].copy() # applies the rectangle box mask on frameA
    mask_region = mask_region[max_region_box[1]:max_region_box[3], max_region_box[0]:max_region_box[2]].copy()
    max_region_D = frameD[max_region_box[1]:max_region_box[3], max_region_box[0]:max_region_box[2]].copy() # applies rectangle box mask on frameB
    region_A = cv2.bitwise_and(max_region_A, max_region_A, mask = mask_region)
    region_D = cv2.bitwise_and(max_region_D, max_region_D, mask = mask_region) # compute the bitwise_and to get the image cutout
    f_rate =  input_border_length* 1.0 / max(region_A.shape[0], region_A.shape[1]) # scaling the image propotionally 
    region_A = cv2.resize(region_A, (0,0), fx = f_rate, fy = f_rate) 
    region_D = cv2.resize(region_D, (0,0), fx = f_rate, fy = f_rate) 
    img_A = to_canvas(region_A)
    img_D = to_canvas(region_D)

    ## Model predictions 
    label = tag(img_A,img_D)
    ## figuring out the index label 
    ## classification based on label 
    
    ## to csv 
 else:
    
    label = 0


 return label 
   #  filename_1 = str(frame_no_A) +"_"+ str(frame_no_B)+ "_"+ "1"
   #  filename_2 = str(frame_no_A) +"_"+ str(frame_no_B)+ "_"+ "2"
   #  cv2.imwrite("./clip_data/%s.jpg" % filename_1,img_A)
   #  cv2.imwrite("./clip_data/%s.jpg" % filename_2,img_D)

    

In [8]:
# code to convert milliseconds to seconds

def mil_convert(milliseconds):
  seconds, milliseconds = divmod(milliseconds, 1000)
  minutes, seconds = divmod(seconds, 60)
  hours ,minutes =divmod(minutes,60)
  
  hours_str   =  str(round(hours))
  minutes_str = str(round(minutes))
  seconds_str = str(round(seconds))

  if(round(hours)<10):
    hours_str = "0" + hours_str
  
  if(round(minutes)<10):
    minutes_str = "0" + minutes_str

  if(round(seconds)<10):
    seconds_str = "0" + seconds_str  

  time = hours_str +":"+ minutes_str +":"+seconds_str
  return time

In [9]:
def string_to_millis(start_time):
   hours = start_time[0:2]
   minutes = start_time[3:5]
   seconds = start_time[6:8]
   
   if(hours[0]=='0'):
      hours = hours[1]

   if(minutes[0]=='0'):
      minutes = minutes[1]

   if(seconds[0]=='0'):
      seconds =seconds[1]
 
   hours = int(hours)
   minutes =int(minutes)
   seconds =int(seconds)

   millis = (hours*3600 + minutes*60 + seconds)*1000

   return millis 

In [10]:
df = pd.read_csv(dir_path+'/white_board_time.csv')
arr = df.to_numpy()
video_start =string_to_millis(arr[clip_id-1][0]) + string_to_millis(vosaic_buffer)
print(video_start)

4846000


In [11]:
cap = cv2.VideoCapture(video_path) # reading testing video
frame_count= int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # frame counts
fps    = cap.get(cv2.CAP_PROP_FPS) # fps
print(frame_count)
print(int(fps))

12271
30


In [12]:
# loop to analyse frame data and write actions to 
i=0
while(int(fps)*(i+1)<=int(frame_count)):
 
 action_code = change_region(int(fps)*(i),int(fps)*(i+1)) # passes a frame pair ,one from the beginning of a 1 second interval and other from the end of a the interval
 timestamp_A =  mil_convert((i)*1000+video_start)
 timestamp_B =  mil_convert((i+1)*1000+video_start)
 data = { 
          'timestamp_A' : [timestamp_A],  
          'timestamp_B' : [timestamp_B],
          'action_code': [action_code],
          } 
 df = pd.DataFrame(data)
 df.to_csv(filename,mode ='a' , index=False, header=False)  # write the data into the csv
 
 i+=1

SSIM: 0.8774240471645784
(114, 299, 3)
(114, 299, 3)
SSIM: 0.9952582284132637
SSIM: 0.9999877898157784
SSIM: 0.998974852711349
SSIM: 0.9926667783161682
(148, 299, 3)
(148, 299, 3)
SSIM: 0.9999827185104151
SSIM: 0.9992175754138883
(119, 299, 3)
(119, 299, 3)
SSIM: 0.9929267555293061
(160, 299, 3)
(160, 299, 3)
SSIM: 0.9976445776352872
(299, 299, 3)
(299, 299, 3)
SSIM: 0.9989677019817271
(299, 108, 3)
(299, 108, 3)
SSIM: 0.9994846393071678
(299, 245, 3)
(299, 245, 3)
SSIM: 0.9977816438516736
(29, 299, 3)
(29, 299, 3)
SSIM: 0.9949400297066401
(39, 299, 3)
(39, 299, 3)
SSIM: 0.9999922116437606
SSIM: 0.9999948281816567
SSIM: 0.9999956231666765
SSIM: 0.9987182086990898
SSIM: 0.775758059526852
(114, 299, 3)
(114, 299, 3)
SSIM: 0.9441916562946435
(140, 299, 3)
(140, 299, 3)
SSIM: 0.9992217664708475
(299, 187, 3)
(299, 187, 3)
SSIM: 0.767327210344302
(114, 299, 3)
(114, 299, 3)
SSIM: 0.9889219492804409
(39, 299, 3)
(39, 299, 3)
SSIM: 0.9992740622762787
(299, 224, 3)
(299, 224, 3)
SSIM: 0.995368

TypeError: 'NoneType' object is not subscriptable

In [None]:
# value = change_region(1410,1440)

# print(value)

In [None]:
# from numba import cuda 
# device = cuda.get_current_device()
# device.reset()