In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from tensorflow.keras import utils
from tensorflow.keras.models import Sequential,model_from_yaml
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, LSTM, GRU
from tensorflow.keras.layers import TimeDistributed, Conv3D,MaxPooling3D, ZeroPadding3D
from tensorflow.keras import backend as K

import numpy as np
import cv2
import math
import random
import mediapipe as mp

In [None]:
mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh

In [None]:
def preprocessing(cap):
 
    euclid_dist = np.empty(shape=(25,40))
    terminate_flag,count,inner_count = 0,0,0

    #randomizing the 25 frames.
    rand_list = [0,1,2,3,25,26,27,28]
    key = random.choice(rand_list)
    if key>24:
        no_frame = list(range(key - 25,key))
    else:
        no_frame = list(range(key,key+25))

    lips = [0, 13, 14, 17, 37, 39, 40, 61, 78, 80, 81, 82, 84, 87, 88, 91, 95, 146, 178, 181, 185, 191, 267, 269, 270, 291, 308, 310, 311, 312, 314, 317, 318, 321, 324, 375, 402, 405, 409, 415]

    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret == True:
            if count in no_frame:
                mean_x,mean_y = 0,0
                with mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, min_detection_confidence=0.5) as face_mesh:
                  # Convert the BGR image to RGB before processing.
                  results = face_mesh.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                  # Print and draw face mesh landmarks on the image.
                  if results.multi_face_landmarks:
                      for n in lips:
                          x_mouth = results.multi_face_landmarks[0].landmark[n].x
                          y_mouth = results.multi_face_landmarks[0].landmark[n].y

                          shape = frame.shape 
                          relative_x = int(x_mouth * shape[1])
                          relative_y = int(y_mouth * shape[0])
                          mean_x = mean_x + relative_x
                          mean_y = mean_y + relative_y
                      mean_x = mean_x/40
                      mean_y = mean_y/40
                      int_count = 0
                      for n in lips:
                          x_mouth = results.multi_face_landmarks[0].landmark[n].x
                          y_mouth = results.multi_face_landmarks[0].landmark[n].y

                          shape = frame.shape 
                          relative_x = int(x_mouth * shape[1])
                          relative_y = int(y_mouth * shape[0])
                          euclid_dist[inner_count,int_count] = math.sqrt(math.pow((mean_x-relative_x),2)+math.pow((mean_y-relative_y),2))
                          int_count += 1
                inner_count +=1
            count+=1
        else:
            break  

    cap.release()
    cv2.destroyAllWindows()
    
    return euclid_dist,1

In [None]:
def data_generator(word,n_train,n_val,n_test,class_dict):
    #TRAINING SET 
    first_flag,actual_count = 0,0
    for vid in range(n_train):
        if vid<9:
            cap = cv2.VideoCapture('/content/drive/MyDrive/Project/LRW/project_dataset/{0}/train/{1}_0000{2}.mp4'.format(word,word,str(vid+1)))
        elif vid>=9 and vid<99:
            cap = cv2.VideoCapture('/content/drive/MyDrive/Project/LRW/project_dataset/{0}/train/{1}_000{2}.mp4'.format(word,word,str(vid+1)))
        elif vid>=99 and vid<999:
            cap = cv2.VideoCapture('/content/drive/MyDrive/Project/LRW/project_dataset/{0}/train/{1}_00{2}.mp4'.format(word,word,str(vid+1)))
        elif vid==999:
            cap = cv2.VideoCapture('/content/drive/MyDrive/Project/LRW/project_dataset/{0}/train/{1}_01000.mp4'.format(word,word))
        
        #ONLY FOR FIRST VIDEO
        temp,bool_flag = preprocessing(cap)  
        if bool_flag == 1 and first_flag == 0:
            X_train = temp
            first_flag = 1
            actual_count += 1
        
        #FOR THE REST OF THE VIDEOS
        elif bool_flag == 1:
            X_train = np.append(X_train,temp,axis=0)
            actual_count += 1

        print("{}/{}".format(actual_count,350))

    X_train = X_train.reshape(actual_count,25,40).astype('float32')

    y_train = [None]*actual_count 
    for i in range(actual_count):
        y_train[i] = class_dict[word]
    

    #VALIDATION SET
    first_flag,actual_count = 0,0
    for vid in range(n_val):
        cap = cv2.VideoCapture('/content/drive/MyDrive/Project/LRW/project_dataset/{0}/train/{1}_00{2}.mp4'.format(word,word,str(vid+1+750)))
            
        temp,bool_flag = preprocessing(cap)  
        
        #ONLY FOR FIRST VIDEO
        if bool_flag == 1 and first_flag == 0:
            X_val = temp
            first_flag = 1
            actual_count += 1
          
        #FOR THE REST OF THE VIDEOS
        elif bool_flag == 1:
            X_val = np.append(X_val,temp,axis=0)
            actual_count += 1
        
        print("{}/{}".format(actual_count,50))

    X_val = X_val.reshape(actual_count,25,40).astype('float32')
  
    y_val = [None]*actual_count 
    for i in range(actual_count):
        y_val[i] = class_dict[word]


    #TEST SET
    first_flag,actual_count = 0,0
    for vid in range(n_test):
        if vid<9:
            cap = cv2.VideoCapture('/content/drive/MyDrive/Project/LRW/project_dataset/{0}/test/{1}_0000{2}.mp4'.format(word,word,str(vid+1)))
        elif vid>=9 and vid<50:
            cap = cv2.VideoCapture('/content/drive/MyDrive/Project/LRW/project_dataset/{0}/test/{1}_000{2}.mp4'.format(word,word,str(vid+1)))

        temp,bool_flag = preprocessing(cap)  
        
        if bool_flag == 1 and first_flag == 0:
            X_test = temp
            first_flag = 1
            actual_count += 1
          
        elif bool_flag == 1:
            X_test = np.append(X_test,temp,axis=0)
            actual_count += 1

        print("{}/{}".format(actual_count,50))


    X_test = X_test.reshape(actual_count,25,40).astype('float32')
  
    y_test = [None]*actual_count 
    for i in range(actual_count):
        y_test[i] = class_dict[word]


    y_train = np.asarray(y_train)
    y_test = np.asarray(y_test)
    y_val = np.asarray(y_val)
    
    return X_train,y_train,X_val,y_val,X_test,y_test

In [None]:
def create_dataset(class_dict):
    first_flag, counter = 0,0
    for word in class_dict.keys():
        trainX,trainY,valX,valY,testX,testY = data_generator(word,350,50,50,class_dict)

        if first_flag == 0:
            X_train = trainX
            X_test = testX
            X_val = valX
            y_train = trainY
            y_test = testY
            y_val = valY
            first_flag = 1
        else:
            X_train = np.append(X_train,trainX,axis=0)
            X_test = np.append(X_test,testX,axis=0)
            X_val = np.append(X_val,valX,axis=0)
            y_train = np.append(y_train,trainY,axis=0)
            y_test = np.append(y_test,testY,axis=0)
            y_val = np.append(y_val,valY,axis=0)

        counter+=1        
        print("Words processed:{}/{}".format(counter,4))

    y_train = utils.to_categorical(y_train)
    y_test = utils.to_categorical(y_test)
    y_val = utils.to_categorical(y_val)

    return X_train,X_test,X_val,y_val,y_train,y_test

In [None]:
class_dict = {'ABUSE':1,'BLACK':2,'CRIME':3,'EXACTLY':4}

In [None]:
X_train,X_test,X_val,y_val,y_train,y_test = create_dataset(class_dict) 

In [None]:
path='/content/drive/MyDrive/Project/LRW/main_file/'

In [None]:
np.save(path+'X_train3.npy',X_train)
np.save(path+'X_test3.npy',X_test)
np.save(path+'X_val3.npy',X_val)
np.save(path+'y_train3.npy',y_train)
np.save(path+'y_test3.npy',y_test)
np.save(path+'y_val3.npy',y_val)