In [1]:
import torch
from torchvision import transforms, models ,datasets
from torch import nn,optim
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import LogSoftmax
from torch import flatten
from torch.optim import Adam
from torch import nn


import numpy as np
import matplotlib.pyplot as plt
import glob
from mpl_toolkits.axes_grid1 import ImageGrid
from PIL import Image

In [2]:
#Section 2
import time

num_groups = 4

class SVHN_NN(nn.Module):
    """CNN for the SVHN Datset"""
    def __init__(self):
        """CNN Builder."""
        super(SVHN_NN, self).__init__()
        self.conv_layer = nn.Sequential(
            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            # `paading=1` is the same as `padding='same'` for 3x3 kernels size
            nn.GroupNorm(num_groups, 32, eps=1e-05, affine=True, device=None, dtype=None),
            #nn.BatchNorm2d(32),
            nn.PReLU(),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.PReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Conv Layer block 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.GroupNorm(num_groups*num_groups, 128, eps=1e-05, affine=True, device=None, dtype=None),
            #nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.PReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(p=0.05),
            # Conv Layer block 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.GroupNorm(8*num_groups, 256, eps=1e-05, affine=True, device=None, dtype=None),
            #nn.BatchNorm2d(256),
            nn.PReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=256, out_channels=128, kernel_size=7, padding=1),
            nn.PReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(p=0.05),
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=15, padding=1),
            nn.GroupNorm(8*num_groups, 256, eps=1e-05, affine=True, device=None, dtype=None),
            )
        
        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(1024, 512),
            nn.PReLU(),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),
            nn.Linear(256,1024),
            nn.Dropout(p=0.1),
            nn.PReLU(),
            nn.Linear(1024,256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),
            nn.Linear(256,128),
            nn.Dropout(p=0.1),
            nn.Linear(128,27)
            )
        

    def forward(self, x):
            """Perform forward."""

            # conv layers
            x = self.conv_layer(x)

            # flatten
            x = x.view(x.size(0), -1)

            # fc layer
            x = self.fc_layer(x)
            return x



In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()
model = SVHN_NN()
# change the path to your path
model.load_state_dict(torch.load('/kaggle/input/final3-classifier/classifier.pth'))
model.to(device)

SVHN_NN(
  (conv_layer): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): GroupNorm(4, 32, eps=1e-05, affine=True)
    (2): PReLU(num_parameters=1)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): PReLU(num_parameters=1)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): GroupNorm(16, 128, eps=1e-05, affine=True)
    (8): ReLU(inplace=True)
    (9): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): PReLU(num_parameters=1)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Dropout2d(p=0.05, inplace=False)
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): GroupNorm(32, 256, eps=1e-05, affine=True)
    (15): PReLU(num_parameters=1)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=

In [5]:
classes = {
    0: "A",
    1: "B",
    2: "BLANK",
    3: "C",
    4: "D",
    5: "E",
    6: "F",
    7: "G",
    8: "H",
    9: "I",
    10: "J",
    11: "K",
    12: "L",
    13: "M",
    14: "N",
    15: "O",
    16: "P",
    17: "Q",
    18: "R",
    19: "S",
    20: "T",
    21: "U",
    22: "V",
    23: "W",
    24: "X",
    25: "Y",
    26: "Z",

}

In [6]:

def predict_sign(sign_path):
    
    test_img = Image.open(sign_path)



    # Define transformations
    t_r = transforms.Resize((256,256))
    t_t = transforms.ToTensor()

    image = test_img

    # Apply transformations
    test_img = t_r(test_img)
    test_img = t_t(test_img)
    


    image_predicted = test_img
    # Convert tensor to numpy array
    image_predicted_np = image_predicted.cpu().numpy()
    # Transpose dimensions to match the expected shape
    image_predicted_np = np.transpose(image_predicted_np, (1, 2, 0))


    # Move the tensor to CUDA if available
    if torch.cuda.is_available():
        test_img = test_img.cuda()

    
    test_img = test_img.unsqueeze(0)  # Add an extra dimension for the batch size

    # Move the tensor to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    test_img = test_img.to(device)

    #classify image using our model
    res = torch.exp(model(test_img))

    _, predicted_label = torch.max(res, 1)
    
    prediction = classes[res.argmax().item()]
    return prediction,image,image_predicted_np


**cut video to frames**

In [91]:
!pip install opencv-python


[0m

In [318]:
! mkdir video_frames

In [7]:
from collections import Counter

def get_majority_label(labels):
    label_counts = Counter(labels)
    majority_label = label_counts.most_common(1)[0][0]
    return majority_label

In [8]:
import cv2
from IPython.display import Image, display
import numpy as np
from PIL import Image
import cv2
import torch
from torchvision import transforms
from collections import Counter

def detect_motion6(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError("Error opening video file.")

    frame_count = 0
    prev_frame_gray = None
    black_flag = 0
    b_im_displayed = 0
    sign_arr = []
    predictions_arr = []
    sign_cnt = 0
    num_signs = 1
    word = []
    while True:
        ret, curr_frame = cap.read()
        if not ret:
            break

        # Convert the current frame to grayscale
        curr_frame_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)

        # Perform background subtraction
        if prev_frame_gray is not None:
            diff_frame = cv2.absdiff(curr_frame_gray, prev_frame_gray)

            # Apply thresholding to obtain binary image
            _, threshold = cv2.threshold(diff_frame, 30, 255, cv2.THRESH_BINARY)

            # Perform contour detection
            contours, _ = cv2.findContours(threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            motion_detected = False

            # Iterate over detected contours
            for contour in contours:
                # Filter out small contours
                if cv2.contourArea(contour) > 1000:
                    x, y, w, h = cv2.boundingRect(contour)
                    cv2.rectangle(curr_frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    motion_detected = True

            if not motion_detected:
                # Display the frame with bounding boxes indicating motion
                #display(Image(data=cv2.imencode('.jpg', curr_frame)[1].tobytes()))
                
                # Save the frame to the output directory
                #output_path = f"{output_dir}/frame_{frame_count}.jpg"
                sign_arr.append(curr_frame)
                sign_cnt +=1
                #cv2.imwrite(output_path, curr_frame)
                black_flag = 1
                b_im_displayed = 0
            else : 
                black_flag = 0
                
                if sign_cnt > 10:
                    #directory_name = os.path.join(output_dir, str(num_signs))
                    #os.mkdir(directory_name)
                    tmp_frame_num = 0
                    for saved_frame in sign_arr :
                        tmp_frame_path = "/kaggle/working/tmp_frame.jpg"
                        tmp_frame_num += 1
                        cv2.imwrite(tmp_frame_path, saved_frame)
                        predictions_arr.append(predict_sign(tmp_frame_path)[0])
                        #print(predict_sign(tmp_frame_path)[0])
                    sign_prediction = get_majority_label(predictions_arr)
                    word.append(sign_prediction)
                    #print(sign_prediction)
                    num_signs += 1
                    sign_cnt = 0
                    sign_arr = []
                    predictions_arr = []

                
            if black_flag == 0 :
                height, width, channels = curr_frame.shape
                # Create a black image with the same shape
                black_image = np.zeros((height, width, channels))#, dtype=np.uint8)
                if b_im_displayed == 0 :
                    #print("**********************************************")
                    #display(Image(data=cv2.imencode('.jpg', black_image)[1].tobytes()))
                    #output_path = f"{output_dir}/frame_{frame_count}.jpg"
                    #cv2.imwrite(output_path, curr_frame)
                    b_im_displayed = 1
                black_flag = 1
                
                

        prev_frame_gray = curr_frame_gray
        frame_count += 1

    if sign_cnt > 10:
        #directory_name = os.path.join(output_dir, str(num_signs))
        #os.mkdir(directory_name)
        tmp_frame_num = 0
        for saved_frame in sign_arr :
            tmp_frame_path = "/kaggle/working/tmp_frame.jpg"
            tmp_frame_num += 1
            cv2.imwrite(tmp_frame_path, saved_frame)
            predictions_arr.append(predict_sign(tmp_frame_path)[0])
            #print(predict_sign(tmp_frame_path)[0])
        sign_prediction = get_majority_label(predictions_arr)
        #print(sign_prediction)
        word.append(sign_prediction)
        num_signs += 1
        sign_cnt = 0
        sign_arr = []
        predictions_arr = []
        
    cap.release()
    return word
    #cv2.destroyAllWindows()

In [9]:
def change_label(list_data):
    old_label = "BLANK"
    new_label = " "
    for i in range(len(list_data)):
        if list_data[i] == old_label:
            list_data[i] = new_label
    return list_data

In [12]:
import os
#export yourn video to a new directory, and replace tha path below with its path
directory = '/kaggle/input/test-videos/videos/videos'
i = 1
# Iterate over the files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.mp4'):
        video_file = os.path.join(directory, filename)
        name_without_extension = os.path.splitext(filename)[0]
        word = detect_motion6(video_file)
        word = change_label(word)
        word_alligned = ''.join(word)
        #print(word)
        print(f"{i} - real word : \033[4m{name_without_extension}\033[0m")
        print(f"{i} - predicted word : \033[4m{word_alligned}\033[0m")
        print("\n")
        i += 1


1 - real word : [4mSAMER[0m
1 - predicted word : [4mSANER[0m


2 - real word : [4mAPPLE[0m
2 - predicted word : [4mAPPLE[0m


3 - real word : [4mALAM[0m
3 - predicted word : [4mALAM[0m


4 - real word : [4mJOBA[0m
4 - predicted word : [4mJOBA[0m


5 - real word : [4mDEEP[0m
5 - predicted word : [4mDEEP[0m


6 - real word : [4mNyquist[0m
6 - predicted word : [4mNYQUIST[0m


7 - real word : [4mWATER[0m
7 - predicted word : [4mWATER[0m


8 - real word : [4mGym[0m
8 - predicted word : [4mGYM[0m


9 - real word : [4mTECHNION[0m
9 - predicted word : [4mTECHNOEN[0m


10 - real word : [4mNoam[0m
10 - predicted word : [4mNOAM[0m


