# This project
Clean up the project such that only the MHI with VGG version exists

## Import Dependencies

In [1]:
!pip install opencv-python numpy fastdtw tensorflow scikit-learn matplotlib

Defaulting to user installation because normal site-packages is not writeable
Collecting fastdtw
  Downloading fastdtw-0.3.4.tar.gz (133 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 KB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting tensorflow
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m589.8/589.8 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting scikit-learn
  Downloading scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
Collecting wrapt>=1.11.0
  Downloading wrapt-1.16.0-cp310-cp31

In [4]:
#all the required dependencies of the project
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time

# Motion History Image Method - CNN Shape
Motion History Image (MHI) represent motion in sequence frames as a single image. The intensity of each pixel in the MHI corresponds to the recency of motion at that location - brighter the pixel, more recent the location.

Algorithm generates MHI that captures the temporal aspects of motion by decaying older movements and highlight new ones - using further analysis like action recognition on 2DCNN.

In [3]:
def update_mhi(prev_frame, current_frame, mhi, decay=0.7):
    """
    Update the motion history image based on the current frame and the previous frame.
    """
    # Compute the absolute difference between the current and the previous frame
    frame_diff = cv2.absdiff(current_frame, prev_frame)
    gray_diff = cv2.cvtColor(frame_diff, cv2.COLOR_BGR2GRAY)
    _, binary_diff = cv2.threshold(gray_diff, 30, 255, cv2.THRESH_BINARY)

    # Convert binary_diff to float32 to match mhi's type
    binary_diff = np.float32(binary_diff / 255)  # Normalize to [0, 1] to maintain consistency

    # Update MHI: decay existing MHI values and increase values where motion is detected
    mhi = cv2.add(mhi * decay, binary_diff)
    return mhi


In [4]:
# Function to update MHI, adapted to handle color (BGR) frames directly
def update_mhi(prev_frame, current_frame, mhi, decay=0.02):
    frame_diff = cv2.absdiff(current_frame, prev_frame)
    # Applying threshold to get binary motion detection, works on grayscale
    gray_diff = cv2.cvtColor(frame_diff, cv2.COLOR_BGR2GRAY)
    _, binary_diff = cv2.threshold(gray_diff, 30, 255, cv2.THRESH_BINARY)
    binary_diff = np.repeat(binary_diff[:, :, np.newaxis], 3, axis=2)  # Make it 3-channel
    binary_diff = np.float32(binary_diff / 255)  # Normalize to [0, 1]

    # Update the MHI
    mhi = cv2.add(mhi * decay, binary_diff)
    return mhi

In [56]:
#USE THIS - decay rate of 0.09
def update_mhi(prev_frame, current_frame, mhi, decay=0.09):
    """
    Update the motion history image (MHI) to retain motion across the entire video,
    adapted for a 3-channel (color) MHI.
    
    - prev_frame: The previous frame in the video (BGR).
    - current_frame: The current frame in the video (BGR).
    - mhi: The current state of the MHI, a 3-channel image.
    - decay: The rate at which previous motion history fades.
    """
    # Compute the absolute difference between the current and the previous frame
    frame_diff = cv2.absdiff(current_frame, prev_frame)
    
    # Convert the frame difference to grayscale and threshold it
    gray_diff = cv2.cvtColor(frame_diff, cv2.COLOR_BGR2GRAY)
    _, motion_mask = cv2.threshold(gray_diff, 25, 1, cv2.THRESH_BINARY)
    
    # Convert the single-channel motion mask to a 3-channel mask
    motion_mask_3ch = cv2.merge([motion_mask, motion_mask, motion_mask])
    
    # Update the MHI: New motion is added with a value of 1, and existing motion history decays
    # Ensure that both mhi and motion_mask_3ch are floats for correct operation
    mhi = (mhi * (1 - decay)) + np.float32(motion_mask_3ch)
    
    # Ensure MHI values are capped at 1
    mhi = np.clip(mhi, 0, 1)
    
    return mhi

In [57]:
# Capture video from input video
cap = cv2.VideoCapture('bsl_dataset/manual-script/sorry/sorry_28.mp4')

# Read the first frame
ret, prev_frame = cap.read()
if not ret:
    print("Failed to read video")
    cap.release()
    cv2.destroyAllWindows()
    exit()

# Initialize MHI with 3 channels to keep color information
mhi = np.zeros((*prev_frame.shape[:2], 3), dtype=np.float32)

# List to store MHIs
mhis = []

# Process video to generate MHI
while True:
    ret, current_frame = cap.read()
    if not ret:
        break  # End of video
    
    # Update MHI
    mhi = update_mhi(prev_frame, current_frame, mhi)
    
    # Prepare for next iteration
    prev_frame = current_frame.copy()

    # Optionally, visualize the MHI
    normalized_mhi = np.uint8(np.clip((mhi / mhi.max()) * 255, 0, 255))
    cv2.imshow("MHI", normalized_mhi)
    
    resized_mhi = cv2.resize(normalized_mhi, (224, 224))

    # Store the MHI for this frame
    mhis.append(resized_mhi.copy())

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Save the last MHI image to a file
output_path = 'mhi_image28_alt_0.09.png'  # Adjust the output path as needed
cv2.imwrite(output_path, resized_mhi)

# Clean up
cap.release()
cv2.destroyAllWindows()

# NOTE: Further processing to fit MHIs into MobileNetV2 would follow here,
# including resizing and normalization as needed for your application.

In [59]:
output_path
mhi_array = np.array(mhi)
normalized_mhi_array = np.array(resized_mhi)
print("Shape of the mhi_array:", mhi_array.shape)
#print("Shape of stacked_mhi:", stacked_mhis.shape)
print("Shape of normalized:", resized_mhi.shape)

Shape of the mhi_array: (720, 1280, 3)
Shape of normalized: (224, 224, 3)


## Motion History Image - 3 channel decay method
Idea: Instead of duplicating the MHI into 3 channel for RBG into mobile net, set different decay rate and log it into the shape.
Then the input should be fed into MobileNetv2.

This is still in progress - might need to expand this experiment later on.

In [None]:
def update_mhi(prev_frame, current_frame, mhi, decay_rate):
    """
    Update the motion history image (MHI) to retain motion across the entire video,
    adapted for a 3-channel (color) MHI.
    
    - prev_frame: The previous frame in the video (BGR).
    - current_frame: The current frame in the video (BGR).
    - mhi: The current state of the MHI, a 3-channel image.
    - decay: The rate at which previous motion history fades.
    """
    # Compute the absolute difference between the current and the previous frame
    frame_diff = cv2.absdiff(current_frame, prev_frame)
    
    # Convert the frame difference to grayscale and threshold it
    gray_diff = cv2.cvtColor(frame_diff, cv2.COLOR_BGR2GRAY)
    _, motion_mask = cv2.threshold(gray_diff, 25, 1, cv2.THRESH_BINARY)
    
    # Convert the single-channel motion mask to a 3-channel mask
    #motion_mask_3ch = cv2.merge([motion_mask, motion_mask, motion_mask])
    
    # Update the MHI: New motion is added with a value of 1, and existing motion history decays
    # Ensure that both mhi and motion_mask_3ch are floats for correct operation
    mhi_1 = (mhi_1 * (1 - decay_rate)) + np.float32(motion_mask)
    
    mhi_2 = (mhi_2 * (1 - decay_rate)) + np.float32(motion_mask)
    
    mhi_3 = (mhi_3 * (1 - decay_rate)) + np.float32(motion_mask)
    
    mhi_combined = cv2.merge([mhi_1, mhi_2, mhi_3])
    
    # Ensure MHI values are capped at 1
    mhi_combined = np.clip(mhi_combined, 0, 1)
    
    return mhi_combined

# Setup Folders for Collection

In [8]:
#Defining Data
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MHI_Data') 

# Actions that we try to detect
actions = np.array(['thanks', 'sorry'])
#no_sequences = 100


# Collect Extracted Matrix Values for Training

Loop through the videos in the video folder, they are labelled with ids that separate them.
Then for each subfolder get the actions and loop through it, collecting their frame difference matrices.

In [9]:
#Folder of dataset
dataset_folder = "bsl_dataset/manual-script"
#D:\BSL_project\bsl_dataset\manual-script

In [34]:
#data from BSL code version
from tqdm import tqdm
if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)

for action_folder in os.listdir(dataset_folder):
    action_path = os.path.join(dataset_folder, action_folder)
    if os.path.isdir(action_path) and (actions is None or action in actions):
        print(f"Processing action: {action}")
        for video_file in tqdm(os.listdir(action_path)):
            if video_file.endswith(".mp4"):
                video_path = os.path.join(action_path, video_file)
                cap = cv2.VideoCapture(video_path)

                ret, prev_frame = cap.read()
                if not ret:
                    print("Failed to read video", video_path)
                    cap.release()
                    continue

                mhi = np.zeros((*prev_frame.shape[:2], 3), dtype=np.float32)
                mhis = []

                while True:
                    ret, current_frame = cap.read()
                    if not ret:
                        break

                    mhi = update_mhi(prev_frame, current_frame, mhi)
                    prev_frame = current_frame.copy()
                    
                    normalized_mhi = np.uint8(np.clip((mhi / mhi.max()) * 255, 0, 255))
                    

                    #Resize MHI here - for MobileNetV2
                    resized_mhi = cv2.resize(normalized_mhi, (200, 200))
                    
                    #store MHI for this frame
                    mhis.append(resized_mhi.copy())

                

                # Generate an output path for the MHI npy file
                output_npy_path = os.path.join(DATA_PATH, action_folder,  f"{video_file.split('.')[0]}.npy")
                # Save the MHI list as a .npy file
                np.save(output_npy_path, np.array(resized_mhi))
                #output_path = os.path.join(DATA_PATH, action_folder,  f"{video_file.split('.')[0]}.png")
                #cv2.imwrite(output_path, resized_mhi)

cap.release()
cv2.destroyAllWindows()  # Make sure to destroy all cv2 windows outside the loop

Processing action: thanks


  normalized_mhi = np.uint8(np.clip((mhi / mhi.max()) * 255, 0, 255))
  normalized_mhi = np.uint8(np.clip((mhi / mhi.max()) * 255, 0, 255))
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [02:03<00:00,  1.24s/it]


Processing action: thanks


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:57<00:00,  1.18s/it]


test the shape of the npy file saved fits MobileNetv2 expected shape of (200, 200, 3) shape.

In [9]:
npy_file_path = 'MHI_Data/thanks/thanks_02.npy'  # Adjust the path as needed

# Load the npy file
test_array = np.load(npy_file_path)

# Output the shape of the array
print(test_array.shape)

(200, 200, 3)


In [60]:
DATA_PATH = os.path.join('MHI_ASL_DATA') 

In [None]:
from tqdm import tqdm

# dataset from asl:
dataset_folder = "asl_dataset_word/archive (1)/extracted_videos_asl"  # Updated to point to your dataset directory

# Defining Data
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MHI_ASL_DATA') 

# Actions will be dynamically populated based on folder names
actions = []

# Check and create DATA_PATH if not exists
if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)

# Populate actions based on folder names in dataset_folder
for entry in os.listdir(dataset_folder):
    if os.path.isdir(os.path.join(dataset_folder, entry)):
        actions.append(entry)

actions = np.array(actions)  # Convert list of actions to numpy array

print(f"Detected actions: {actions}")

for action in actions:
    action_folder = os.path.join(dataset_folder, action)
    action_data_path = os.path.join(DATA_PATH, action)
    if not os.path.exists(action_data_path):
        os.makedirs(action_data_path)
    
    print(f"Processing action: {action}")
    for video_file in tqdm(os.listdir(action_folder)):
        if video_file.endswith(".mp4"):
            # Generate an output path for the MHI npy file
            output_npy_path = os.path.join(action_data_path, f"{video_file.split('.')[0]}.npy")
            
            

            video_path = os.path.join(action_folder, video_file)
            cap = cv2.VideoCapture(video_path)

            ret, prev_frame = cap.read()
            if not ret:
                print("Failed to read video", video_path)
                cap.release()
                continue

            mhi = np.zeros((*prev_frame.shape[:2], 3), dtype=np.float32)
            mhis = []

            while True:
                ret, current_frame = cap.read()
                if not ret:
                    break

                mhi = update_mhi(prev_frame, current_frame, mhi)
                prev_frame = current_frame.copy()
                #normalized_mhi = np.uint8(np.clip((mhi / mhi.max()) * 255, 0, 255))
                normalized_mhi = np.uint8(np.clip((mhi / mhi.max()) * 255, 0, 255))
                resized_mhi = cv2.resize(normalized_mhi, (224, 224))
                mhis.append(resized_mhi.copy())

            # Save the MHI list as a .npy file
            np.save(output_npy_path, np.array(resized_mhi.copy()))
            #output_path_img = os.path.join(action_data_path, f"{video_file.split('.')[0]}.png")
            #cv2.imwrite(output_path_img, resized_mhi)
            cap.release()

            

cv2.destroyAllWindows()  # Make sure to destroy all cv2 windows


Detected actions: ['book' 'drink' 'computer' ... 'weigh' 'wheelchair' 'whistle']
Processing action: book


  normalized_mhi = np.uint8(np.clip((mhi / mhi.max()) * 255, 0, 255))
  normalized_mhi = np.uint8(np.clip((mhi / mhi.max()) * 255, 0, 255))
100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:07<00:00,  1.22s/it]


Processing action: drink


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [00:18<00:00,  1.23s/it]


Processing action: computer


100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:35<00:00,  2.54s/it]


Processing action: before


100%|██████████████████████████████████████████████████████████████████████████████████| 16/16 [00:23<00:00,  1.50s/it]


Processing action: chair


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:09<00:00,  1.31s/it]


Processing action: go


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [00:11<00:00,  1.28it/s]


Processing action: clothes


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:06<00:00,  1.23s/it]


Processing action: who


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:22<00:00,  1.77s/it]


Processing action: candy


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:25<00:00,  1.99s/it]


Processing action: cousin


100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:15<00:00,  1.13s/it]


Processing action: deaf


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:16<00:00,  1.46s/it]


Processing action: fine


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:08<00:00,  1.02it/s]


Processing action: help


100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:19<00:00,  1.37s/it]


Processing action: no


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:12<00:00,  1.14s/it]


Processing action: thin


100%|██████████████████████████████████████████████████████████████████████████████████| 16/16 [00:27<00:00,  1.69s/it]


Processing action: walk


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:11<00:00,  1.03s/it]


Processing action: year


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:14<00:00,  1.42s/it]


Processing action: yes


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:12<00:00,  1.10s/it]


Processing action: all


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:15<00:00,  1.99s/it]


Processing action: black


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:13<00:00,  1.35s/it]


Processing action: cool


100%|██████████████████████████████████████████████████████████████████████████████████| 16/16 [00:23<00:00,  1.48s/it]


Processing action: finish


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:08<00:00,  1.05it/s]


Processing action: hot


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:13<00:00,  1.31s/it]


Processing action: like


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:11<00:00,  1.12s/it]


Processing action: many


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:15<00:00,  1.58s/it]


Processing action: mother


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:14<00:00,  1.27s/it]


Processing action: now


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:07<00:00,  1.26it/s]


Processing action: orange


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:12<00:00,  1.26s/it]


Processing action: table


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00,  1.72s/it]


Processing action: thanksgiving


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:14<00:00,  1.12s/it]


Processing action: what


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:14<00:00,  1.27s/it]


Processing action: woman


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:11<00:00,  1.00s/it]


Processing action: bed


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:16<00:00,  1.23s/it]


Processing action: blue


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:08<00:00,  1.09s/it]


Processing action: bowling


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:19<00:00,  1.53s/it]


Processing action: can


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:09<00:00,  1.08s/it]


Processing action: dog


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:11<00:00,  1.06s/it]


Processing action: family


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:12<00:00,  1.15s/it]


Processing action: fish


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:11<00:00,  1.19s/it]


Processing action: graduate


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:13<00:00,  1.38s/it]


Processing action: hat


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:10<00:00,  1.20s/it]


Processing action: hearing


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:08<00:00,  1.11s/it]


Processing action: kiss


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:12<00:00,  1.41s/it]


Processing action: language


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:19<00:00,  1.92s/it]


Processing action: later


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:18<00:00,  1.58s/it]


Processing action: man


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:16<00:00,  1.35s/it]


Processing action: shirt


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:10<00:00,  1.12it/s]


Processing action: study


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.09s/it]


Processing action: tall


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:18<00:00,  1.45s/it]


Processing action: white


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:12<00:00,  1.41s/it]


Processing action: wrong


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:12<00:00,  1.56s/it]


Processing action: accident


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:25<00:00,  1.96s/it]


Processing action: apple


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:13<00:00,  1.23s/it]


Processing action: bird


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:18<00:00,  1.84s/it]


Processing action: change


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:14<00:00,  1.24s/it]


Processing action: color


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00,  1.47s/it]


Processing action: corn


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:17<00:00,  1.46s/it]


Processing action: cow


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:12<00:00,  1.41s/it]


Processing action: dance


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:11<00:00,  1.68s/it]


Processing action: dark


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:20<00:00,  1.69s/it]


Processing action: doctor


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.10it/s]


Processing action: eat


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:06<00:00,  1.07it/s]


Processing action: enjoy


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:15<00:00,  1.99s/it]


Processing action: forget


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:14<00:00,  2.00s/it]


Processing action: give


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:16<00:00,  1.66s/it]


Processing action: last


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:10<00:00,  1.16it/s]


Processing action: meet


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:16<00:00,  1.83s/it]


Processing action: pink


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:12<00:00,  1.34s/it]


Processing action: pizza


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:21<00:00,  1.83s/it]


Processing action: play


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:12<00:00,  1.17s/it]


Processing action: school


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:13<00:00,  1.46s/it]


Processing action: secretary


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:13<00:00,  1.32s/it]


Processing action: short


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:15<00:00,  1.16s/it]


Processing action: time


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:05<00:00,  1.19it/s]


Processing action: want


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:07<00:00,  1.02it/s]


Processing action: work


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:14<00:00,  1.42s/it]


Processing action: africa


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:20<00:00,  2.24s/it]


Processing action: basketball


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:20<00:00,  1.70s/it]


Processing action: birthday


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:05<00:00,  1.04it/s]


Processing action: brown


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:09<00:00,  1.16s/it]


Processing action: but


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:05<00:00,  1.26it/s]


Processing action: cheat


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:12<00:00,  1.27s/it]


Processing action: city


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:16<00:00,  1.84s/it]


Processing action: cook


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:09<00:00,  1.14s/it]


Processing action: decide


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:08<00:00,  1.06it/s]


Processing action: full


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:11<00:00,  1.13s/it]


Processing action: how


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:16<00:00,  1.84s/it]


Processing action: jacket


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:13<00:00,  1.87s/it]


Processing action: letter


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:11<00:00,  1.06s/it]


Processing action: medicine


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:09<00:00,  1.24s/it]


Processing action: need


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:10<00:00,  1.68s/it]


Processing action: paint


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:11<00:00,  1.60s/it]


Processing action: paper


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:09<00:00,  1.13s/it]


Processing action: pull


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:19<00:00,  2.17s/it]


Processing action: purple


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:08<00:00,  1.12s/it]


Processing action: right


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:10<00:00,  1.22s/it]


Processing action: same


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:12<00:00,  1.37s/it]


Processing action: son


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:16<00:00,  1.67s/it]


Processing action: tell


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:07<00:00,  1.42it/s]


Processing action: thursday


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:13<00:00,  1.22s/it]


Processing action: visit


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:14<00:00,  1.64s/it]


Processing action: wait


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:15<00:00,  1.50s/it]


Processing action: water


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:06<00:00,  1.44it/s]


Processing action: wife


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.27s/it]


Processing action: yellow


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:06<00:00,  1.09it/s]


Processing action: backpack


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:18<00:00,  2.26s/it]


Processing action: bar


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:13<00:00,  1.25s/it]


Processing action: brother


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:21<00:00,  1.92s/it]


Processing action: cat


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:10<00:00,  1.20s/it]


Processing action: check


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:12<00:00,  1.13s/it]


Processing action: class


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:08<00:00,  1.04it/s]


Processing action: cry


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.12it/s]


Processing action: different


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:06<00:00,  1.28it/s]


Processing action: door


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:11<00:00,  1.61s/it]


Processing action: green


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:06<00:00,  1.11it/s]


Processing action: hair


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:12<00:00,  1.35s/it]


Processing action: have


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:09<00:00,  1.65s/it]


Processing action: headache


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:09<00:00,  1.04s/it]


Processing action: inform


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:07<00:00,  1.13it/s]


Processing action: knife


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:04<00:00,  1.27it/s]


Processing action: laugh


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:18<00:00,  1.70s/it]


Processing action: learn


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00,  1.38s/it]


Processing action: movie


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:07<00:00,  1.44s/it]


Processing action: rabbit


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00,  1.49s/it]


Processing action: read


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:08<00:00,  1.45s/it]


Processing action: red


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:07<00:00,  1.00s/it]


Processing action: room


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:15<00:00,  1.40s/it]


Processing action: run


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:07<00:00,  1.14s/it]


Processing action: show


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:14<00:00,  1.62s/it]


Processing action: sick


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00,  1.43s/it]


Processing action: snow


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:16<00:00,  1.61s/it]


Processing action: take


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:10<00:00,  1.05it/s]


Processing action: tea


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:17<00:00,  2.45s/it]


Processing action: teacher


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:09<00:00,  1.22s/it]


Processing action: week


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:07<00:00,  1.25it/s]


Processing action: why


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:14<00:00,  1.36s/it]


Processing action: with


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:11<00:00,  1.67s/it]


Processing action: write


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:11<00:00,  1.33s/it]


Processing action: yesterday


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:18<00:00,  1.82s/it]


Processing action: again


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:09<00:00,  1.14s/it]


Processing action: bad


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.04it/s]


Processing action: ball


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:12<00:00,  1.42s/it]


Processing action: bathroom


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:06<00:00,  1.00it/s]


Processing action: blanket


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:16<00:00,  1.66s/it]


Processing action: buy


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:20<00:00,  2.24s/it]


Processing action: call


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:13<00:00,  1.16s/it]


Processing action: coffee


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:09<00:00,  1.38s/it]


Processing action: cold


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:15<00:00,  1.27s/it]


Processing action: college


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:09<00:00,  1.37s/it]


Processing action: copy


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00,  1.45s/it]


Processing action: cute


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:07<00:00,  1.05s/it]


Processing action: daughter


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:18<00:00,  1.84s/it]


Processing action: example


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:16<00:00,  1.54s/it]


Processing action: far


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:24<00:00,  2.26s/it]


Processing action: first


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:07<00:00,  1.27it/s]


Processing action: friend


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:06<00:00,  1.07it/s]


Processing action: good


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.08it/s]


Processing action: happy


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:07<00:00,  1.10it/s]


Processing action: home


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:06<00:00,  1.03it/s]


Processing action: know


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:09<00:00,  1.17s/it]


Processing action: late


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00,  1.41s/it]


Processing action: leave


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:17<00:00,  1.57s/it]


Processing action: list


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:10<00:00,  1.18s/it]


Processing action: lose


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:10<00:00,  1.14s/it]


Processing action: name


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:09<00:00,  1.50s/it]


Processing action: old


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:09<00:00,  1.36s/it]


Processing action: person


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:06<00:00,  1.04it/s]


Processing action: police


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:08<00:00,  1.12s/it]


Processing action: problem


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:10<00:00,  1.13s/it]


Processing action: remember


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:09<00:00,  1.31s/it]


Processing action: share


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:10<00:00,  1.47s/it]


Processing action: soon


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:13<00:00,  1.20s/it]


Processing action: stay


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:14<00:00,  1.65s/it]


Processing action: sunday


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:17<00:00,  2.15s/it]


Processing action: test


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:08<00:00,  1.10s/it]


Processing action: tired


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:15<00:00,  2.28s/it]


Processing action: trade


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:25<00:00,  1.94s/it]


Processing action: travel


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:12<00:00,  1.34s/it]


Processing action: window


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:05<00:00,  1.08it/s]


Processing action: you


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:06<00:00,  1.07it/s]


Processing action: about


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:08<00:00,  1.19s/it]


Processing action: approve


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:06<00:00,  1.23it/s]


Processing action: arrive


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:12<00:00,  1.60s/it]


Processing action: balance


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:12<00:00,  1.23s/it]


Processing action: banana


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:17<00:00,  1.92s/it]


Processing action: beard


100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:10<00:00,  1.18s/it]


Processing action: because


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:12<00:00,  1.26s/it]


Processing action: boy


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:12<00:00,  1.50s/it]


Processing action: business


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:06<00:00,  1.02it/s]


Processing action: careful


 44%|█████████████████████████████████████▎                                              | 4/9 [00:01<00:02,  2.10it/s]

In [25]:
#code that loops through folders to get actions
dataset_folder = "asl_dataset_word/archive (1)/extracted_videos_asl"  # Update this to your dataset directory

# Path for exported data (not used for video processing here, but kept for structure)
DATA_PATH = os.path.join('MHI_ASL_DATA') 

# Ensure DATA_PATH exists
if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)

# List to hold the names of actions based on folder names in the dataset_folder
actions = []
action_count = 100
counter = 0;
#action count - limit the amount of labels.

# Populate actions list with the names of directories in dataset_folder
for entry in os.listdir(dataset_folder):
    if counter >= action_count:
        break
    if os.path.isdir(os.path.join(dataset_folder, entry)):
        actions.append(entry)
    counter += 1
    

# Convert the list of actions to a numpy array (optional, depending on further use)
actions = np.array(actions)

# Print detected actions
print(f"Detected actions: {actions}")

# Optionally, prepare folders for each action in a separate data path
for action in actions:
    action_data_path = os.path.join(DATA_PATH, action)
    if not os.path.exists(action_data_path):
        os.makedirs(action_data_path)
    print(f"Prepared directory for action: {action}")


Detected actions: ['book' 'drink' 'computer' 'before' 'chair' 'go' 'clothes' 'who' 'candy'
 'cousin' 'deaf' 'fine' 'help' 'no' 'thin' 'walk' 'year' 'yes' 'all'
 'black' 'cool' 'finish' 'hot' 'like' 'many' 'mother' 'now' 'orange'
 'table' 'thanksgiving' 'what' 'woman' 'bed' 'blue' 'bowling' 'can' 'dog'
 'family' 'fish' 'graduate' 'hat' 'hearing' 'kiss' 'language' 'later'
 'man' 'shirt' 'study' 'tall' 'white' 'wrong' 'accident' 'apple' 'bird'
 'change' 'color' 'corn' 'cow' 'dance' 'dark' 'doctor' 'eat' 'enjoy'
 'forget' 'give' 'last' 'meet' 'pink' 'pizza' 'play' 'school' 'secretary'
 'short' 'time' 'want' 'work' 'africa' 'basketball' 'birthday' 'brown'
 'but' 'cheat' 'city' 'cook' 'decide' 'full' 'how' 'jacket' 'letter'
 'medicine' 'need' 'paint' 'paper' 'pull' 'purple' 'right' 'same' 'son'
 'tell' 'thursday']
Prepared directory for action: book
Prepared directory for action: drink
Prepared directory for action: computer
Prepared directory for action: before
Prepared directory for action

In [63]:
#test the size of the npy file has the same image size frame
npy_file_path = 'MHI_ASL_Data/drink/17709.npy'  # Adjust the path as needed

# Load the npy file
test_array = np.load(npy_file_path)

# Output the shape of the array
print(test_array.shape)

#list out all the action labels we have
print(actions.shape)

(224, 224, 3)
(2000,)


# Preprocess Data, Create Labels and Features

In [9]:
#train_test is a function to split dataset into training and testing set
from sklearn.model_selection import train_test_split

#Converts class vectors to binary class matrix for categorial crossentropy
from tensorflow.keras.utils import to_categorical

In [26]:
#Create Label Map
label_map = {label:num for num, label in enumerate(actions)}

In [27]:
label_map

{'book': 0,
 'drink': 1,
 'computer': 2,
 'before': 3,
 'chair': 4,
 'go': 5,
 'clothes': 6,
 'who': 7,
 'candy': 8,
 'cousin': 9,
 'deaf': 10,
 'fine': 11,
 'help': 12,
 'no': 13,
 'thin': 14,
 'walk': 15,
 'year': 16,
 'yes': 17,
 'all': 18,
 'black': 19,
 'cool': 20,
 'finish': 21,
 'hot': 22,
 'like': 23,
 'many': 24,
 'mother': 25,
 'now': 26,
 'orange': 27,
 'table': 28,
 'thanksgiving': 29,
 'what': 30,
 'woman': 31,
 'bed': 32,
 'blue': 33,
 'bowling': 34,
 'can': 35,
 'dog': 36,
 'family': 37,
 'fish': 38,
 'graduate': 39,
 'hat': 40,
 'hearing': 41,
 'kiss': 42,
 'language': 43,
 'later': 44,
 'man': 45,
 'shirt': 46,
 'study': 47,
 'tall': 48,
 'white': 49,
 'wrong': 50,
 'accident': 51,
 'apple': 52,
 'bird': 53,
 'change': 54,
 'color': 55,
 'corn': 56,
 'cow': 57,
 'dance': 58,
 'dark': 59,
 'doctor': 60,
 'eat': 61,
 'enjoy': 62,
 'forget': 63,
 'give': 64,
 'last': 65,
 'meet': 66,
 'pink': 67,
 'pizza': 68,
 'play': 69,
 'school': 70,
 'secretary': 71,
 'short': 72,
 '

In [12]:
#For MHI - use this.
sequences, labels = [], []
for action_folder in os.listdir(DATA_PATH):
    action_path = os.path.join(DATA_PATH, action_folder)
    if os.path.isdir(action_path):
        for file_name in os.listdir(action_path):
            if file_name.endswith('.npy'):
                # Load MHI data
                mhi_path = os.path.join(action_path, file_name)
                mhi_data = np.load(mhi_path)
                
                sequences.append(mhi_data)
                
                # Use the label_map to convert action names to integers
                labels.append(label_map[action_folder])

KeyError: 'deaf'

In [28]:
#modify code such that it only takes the actions - and preprocess it.
# Initialize lists to store sequences and labels
sequences, labels = [], []

# Process each folder in DATA_PATH
for action_folder in os.listdir(DATA_PATH):
    if action_folder in actions:
        action_path = os.path.join(DATA_PATH, action_folder)
        if os.path.isdir(action_path):
            for file_name in os.listdir(action_path):
                if file_name.endswith('.npy'):
                    # Load MHI data
                    mhi_path = os.path.join(action_path, file_name)
                    mhi_data = np.load(mhi_path)
                    
                    sequences.append(mhi_data)
                    
                    # Use the label_map to convert action names to integers
                    labels.append(label_map[action_folder])

In [29]:
np.array(sequences).shape

(1007, 200, 200, 3)

In [30]:
np.array(labels).shape

(1007,)

In [31]:
X = np.array(sequences)
X.shape

(1007, 200, 200, 3)

In [32]:
#from sklearn.preprocessing import LabelEncoder
#label_encoder = LabelEncoder()
#integer_encoded = label_encoder.fit_transform(labels)
y = to_categorical(labels).astype(int)
#y = to_categorical(integer_encoded)

In [33]:
y

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [34]:
#Here, split into train, test, and dev (validate) dataset
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10)

# First, split into temp training and test sets
X_temp, X_dev, y_temp, y_dev = train_test_split(X, y, test_size=0.05)

# Then, split the temp training set into final training and dev (validation) sets
X_train, X_test, y_train, y_test = train_test_split(X_temp, y_temp, test_size=0.05)

In [35]:
X_train.shape, X_test.shape, X_dev.shape

((908, 200, 200, 3), (48, 200, 200, 3), (51, 200, 200, 3))

In [36]:
y_train.dtype

dtype('int32')

# Build and Train CNN Neural Network
Taking from pre-trained image CNN
Create base model from pre-trained model: MobileNet V3Small
Step 1: Create Base Model from MobileNet V2 model developed at Google.
Step 2: Freeze layers
Step 3: Train new layers on dataset
Step 4: Improve model via fine-tuning

In [47]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.callbacks import TensorBoard
from keras.layers import Dense, GlobalAveragePooling2D, Conv2D, MaxPooling2D , Flatten


In [50]:
#create logs to view learning in real time
log_dir = os.path.join('Logs_MHI_ASL_MobileNetV2')
#tb_callback = TensorBoard(log_dir=log_dir)

tb_callback = [TensorBoard(log_dir=log_dir,
                         histogram_freq=1,
                         write_graph=True,
                         write_images=True,
                         update_freq='epoch',
                         profile_batch=2,
                         embeddings_freq=1)]

In [54]:
#FD_SHAPE = (444, 444, 3)
#MobileNetV3 uses input shape of (224, 224, 3)
MHI_SHAPE = (224,224,3)
#reshaped_data = data.reshape(-1, 480, 640, 1)
# get the base model, exclude final dense layers - we will modify/output this
base_model = tf.keras.applications.MobileNetV3Small(input_shape = MHI_SHAPE,
                                               include_top = False,
                                               weights = 'imagenet',
                                               pooling='max')

In [40]:
# Freeze the convolutional base
base_model.trainable = False

In [41]:
#get the base model summary
base_model.summary()

Model: "MobilenetV3small"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 200, 200, 3)]        0         []                            
                                                                                                  
 rescaling (Rescaling)       (None, 200, 200, 3)          0         ['input_1[0][0]']             
                                                                                                  
 Conv (Conv2D)               (None, 100, 100, 16)         432       ['rescaling[0][0]']           
                                                                                                  
 Conv/BatchNorm (BatchNorma  (None, 100, 100, 16)         64        ['Conv[0][0]']                
 lization)                                                                         

 ion)                                                                                             
                                                                                                  
 re_lu_4 (ReLU)              (None, 25, 25, 72)           0         ['expanded_conv_1/depthwise/Ba
                                                                    tchNorm[0][0]']               
                                                                                                  
 expanded_conv_1/project (C  (None, 25, 25, 24)           1728      ['re_lu_4[0][0]']             
 onv2D)                                                                                           
                                                                                                  
 expanded_conv_1/project/Ba  (None, 25, 25, 24)           96        ['expanded_conv_1/project[0][0
 tchNorm (BatchNormalizatio                                         ]']                           
 n)       

                                                                     'tf.math.multiply_3[0][0]']  
                                                                                                  
 expanded_conv_3/squeeze_ex  (None, 1, 1, 96)             0         ['multiply_2[0][0]']          
 cite/AvgPool (GlobalAverag                                                                       
 ePooling2D)                                                                                      
                                                                                                  
 expanded_conv_3/squeeze_ex  (None, 1, 1, 24)             2328      ['expanded_conv_3/squeeze_exci
 cite/Conv (Conv2D)                                                 te/AvgPool[0][0]']            
                                                                                                  
 expanded_conv_3/squeeze_ex  (None, 1, 1, 24)             0         ['expanded_conv_3/squeeze_exci
 cite/Relu

                                                                                                  
 tf.math.add_7 (TFOpLambda)  (None, 1, 1, 240)            0         ['expanded_conv_4/squeeze_exci
                                                                    te/Conv_1[0][0]']             
                                                                                                  
 re_lu_12 (ReLU)             (None, 1, 1, 240)            0         ['tf.math.add_7[0][0]']       
                                                                                                  
 tf.math.multiply_7 (TFOpLa  (None, 1, 1, 240)            0         ['re_lu_12[0][0]']            
 mbda)                                                                                            
                                                                                                  
 expanded_conv_4/squeeze_ex  (None, 13, 13, 240)          0         ['multiply_4[0][0]',          
 cite/Mul 

 expanded_conv_5/squeeze_ex  (None, 13, 13, 240)          0         ['multiply_6[0][0]',          
 cite/Mul (Multiply)                                                 'tf.math.multiply_10[0][0]'] 
                                                                                                  
 expanded_conv_5/project (C  (None, 13, 13, 40)           9600      ['expanded_conv_5/squeeze_exci
 onv2D)                                                             te/Mul[0][0]']                
                                                                                                  
 expanded_conv_5/project/Ba  (None, 13, 13, 40)           160       ['expanded_conv_5/project[0][0
 tchNorm (BatchNormalizatio                                         ]']                           
 n)                                                                                               
                                                                                                  
 expanded_

 expanded_conv_7/expand (Co  (None, 13, 13, 144)          6912      ['expanded_conv_6/project/Batc
 nv2D)                                                              hNorm[0][0]']                 
                                                                                                  
 expanded_conv_7/expand/Bat  (None, 13, 13, 144)          576       ['expanded_conv_7/expand[0][0]
 chNorm (BatchNormalization                                         ']                            
 )                                                                                                
                                                                                                  
 tf.math.add_14 (TFOpLambda  (None, 13, 13, 144)          0         ['expanded_conv_7/expand/Batch
 )                                                                  Norm[0][0]']                  
                                                                                                  
 re_lu_19 

                                                                                                  
 re_lu_22 (ReLU)             (None, 13, 13, 288)          0         ['tf.math.add_17[0][0]']      
                                                                                                  
 tf.math.multiply_17 (TFOpL  (None, 13, 13, 288)          0         ['re_lu_22[0][0]']            
 ambda)                                                                                           
                                                                                                  
 multiply_11 (Multiply)      (None, 13, 13, 288)          0         ['expanded_conv_8/expand/Batch
                                                                    Norm[0][0]',                  
                                                                     'tf.math.multiply_17[0][0]'] 
                                                                                                  
 expanded_

 (DepthwiseConv2D)                                                                                
                                                                                                  
 expanded_conv_9/depthwise/  (None, 7, 7, 576)            2304      ['expanded_conv_9/depthwise[0]
 BatchNorm (BatchNormalizat                                         [0]']                         
 ion)                                                                                             
                                                                                                  
 tf.math.add_21 (TFOpLambda  (None, 7, 7, 576)            0         ['expanded_conv_9/depthwise/Ba
 )                                                                  tchNorm[0][0]']               
                                                                                                  
 re_lu_26 (ReLU)             (None, 7, 7, 576)            0         ['tf.math.add_21[0][0]']      
          

 re_lu_29 (ReLU)             (None, 7, 7, 576)            0         ['tf.math.add_24[0][0]']      
                                                                                                  
 tf.math.multiply_24 (TFOpL  (None, 7, 7, 576)            0         ['re_lu_29[0][0]']            
 ambda)                                                                                           
                                                                                                  
 multiply_16 (Multiply)      (None, 7, 7, 576)            0         ['expanded_conv_10/depthwise/B
                                                                    atchNorm[0][0]',              
                                                                     'tf.math.multiply_24[0][0]'] 
                                                                                                  
 expanded_conv_10/squeeze_e  (None, 1, 1, 576)            0         ['multiply_16[0][0]']         
 xcite/Avg

In [42]:
#MobileNetV3 without additional dense layer - add softmax classification layer
model = Sequential()
model.add(base_model)
model.add(Dense(actions.shape[0], activation='softmax'))

In [44]:
from tensorflow.keras.models import Model

# Add classification head/own layers
z = base_model.output
# If you want to include GlobalAveragePooling2D, you can directly use 'pooling=max' in the base model as done above
z = Dense(1024, activation='relu')(z)  # Dense layer 1
z = Dense(1024, activation='relu')(z)  # Dense layer 2
z = Dense(512, activation='relu')(z)   # Dense layer 3
preds = Dense(actions.shape[0], activation='softmax')(z)  # Final layer with softmax activation

# Create the full model
model = Model(inputs=base_model.input, outputs=preds)

In [43]:
#Fixed learning rate: adam_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003) #0.001 can be changed.

#learning rate decay for Adam
initial_learning_rate = 0.0003  # Starting learning rate
decay_steps = 100000           # After how many steps to apply decay
decay_rate = 0.96              # Decay rate
staircase = True               # Apply decay in a staircase fashion

lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps,
    decay_rate,
    staircase=staircase
)

adam_optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)  # Use the learning rate schedule here

In [44]:
model.compile(optimizer=adam_optimizer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [46]:
#Train the model - MobileNetV3Small as pretrained model. Tensorboard callback.
model.fit(X_train, y_train, epochs=1000, validation_data = (X_dev, y_dev), batch_size = 8, callbacks=[tb_callback])

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000

KeyboardInterrupt: 

In [74]:
#save the model (for future load and training purposes)
model.save('MobileNetV3Small_first.keras')

## Try out ResNet

## VGG From Scratch
VGG16 is a CNN architecture. 

In [26]:
MHI_SHAPE = (200,200,3)

model_VGG = Sequential()
#Block 1
model_VGG.add(Conv2D(input_shape=MHI_SHAPE,filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model_VGG.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
#Block 2
model_VGG.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
#Block 3
model_VGG.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
#Block 4
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
#Block 5
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model_VGG.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
#Fully Connected Layers
model_VGG.add(Flatten())
model_VGG.add(Dense(units=4096,activation="relu"))
model_VGG.add(Dense(units=4096,activation="relu"))
#softmax units is dependent on the labels we want to extract out.
model_VGG.add(Dense(units=actions.shape[0], activation="softmax"))

#then, generate model summary
model_VGG.summary()

  super().__init__(
2024-04-07 03:32:12.055439: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-07 03:32:12.057118: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2024-04-07 03:32:12.234082: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 301989888 exceeds 10% of free system memory.
2024-04-07 03:32:12.310465: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 

In [27]:
#custom learning rate for Adam

initial_learning_rate = 0.0003  # Starting learning rate
decay_steps = 100000           # After how many steps to apply decay
decay_rate = 0.96              # Decay rate
staircase = True               # Apply decay in a staircase fashion

lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps,
    decay_rate,
    staircase=staircase
)

adam_opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule)  # Use the learning rate schedule here

In [28]:
model_VGG.compile(optimizer=adam_opt, loss='categorical_crossentropy', metrics=['categorical_accuracy'], run_eagerly=True)

In [31]:
# Callbacks, implement model checkpoint and early stopping
from keras.callbacks import ModelCheckpoint, EarlyStopping
#val - represent validation metrics. therefore getting val_accuracy as metrics.
checkpoint = ModelCheckpoint("vgg16_best.keras", monitor='val_categorial_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto')
#early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=20, verbose=1, mode='auto')


#Tensorboard
#create logs to view learning in real time
log_path = os.path.join('Logs_MHI_ASL_VGG')
tb_callback = TensorBoard(log_dir=log_path)

my_callbacks = [
    #keras.callbacks.EarlyStopping(patience=2),
    keras.callbacks.ModelCheckpoint("vgg16_best.keras", monitor='val_categorial_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto'),
    keras.callbacks.TensorBoard(log_dir=log_path),
]

In [32]:
# Finally, train the model using fit, callback for modelsavepoint and tensorboard (to visualize ML training)
model_VGG.fit(X_train, y_train, epochs=1000, validation_data=(X_dev, y_dev), batch_size=32, validation_steps = 10, callbacks=[tb_callback, checkpoint])

Epoch 1/1000
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3173s[0m 9s/step - categorical_accuracy: 3.5496e-04 - loss: 7.6668 - val_categorical_accuracy: 0.0000e+00 - val_loss: 7.6033
Epoch 2/1000


  self._save_model(epoch=epoch, batch=None, logs=logs)


[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3166s[0m 9s/step - categorical_accuracy: 0.0017 - loss: 7.5982 - val_categorical_accuracy: 0.0000e+00 - val_loss: 7.6085
Epoch 3/1000


2024-04-07 05:18:57.813682: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
  self.gen.throw(typ, value, traceback)


[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3185s[0m 9s/step - categorical_accuracy: 7.2651e-04 - loss: 7.5907 - val_categorical_accuracy: 0.0000e+00 - val_loss: 7.6264
Epoch 4/1000
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3181s[0m 9s/step - categorical_accuracy: 0.0017 - loss: 7.5700 - val_categorical_accuracy: 0.0036 - val_loss: 7.6710
Epoch 5/1000


2024-04-07 07:05:03.019732: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3189s[0m 9s/step - categorical_accuracy: 0.0010 - loss: 7.5565 - val_categorical_accuracy: 0.0000e+00 - val_loss: 7.6743
Epoch 6/1000
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3163s[0m 9s/step - categorical_accuracy: 8.3488e-04 - loss: 7.5544 - val_categorical_accuracy: 0.0036 - val_loss: 7.7149
Epoch 7/1000


2024-04-07 08:50:55.119266: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3167s[0m 9s/step - categorical_accuracy: 0.0013 - loss: 7.5453 - val_categorical_accuracy: 0.0000e+00 - val_loss: 7.6763
Epoch 8/1000
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3167s[0m 9s/step - categorical_accuracy: 0.0015 - loss: 7.5519 - val_categorical_accuracy: 0.0000e+00 - val_loss: 7.7096
Epoch 9/1000


2024-04-07 10:36:28.843219: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3176s[0m 9s/step - categorical_accuracy: 0.0011 - loss: 7.5508 - val_categorical_accuracy: 0.0000e+00 - val_loss: 7.6822
Epoch 10/1000
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3173s[0m 9s/step - categorical_accuracy: 0.0011 - loss: 7.5501 - val_categorical_accuracy: 0.0000e+00 - val_loss: 7.7055
Epoch 11/1000


2024-04-07 12:22:17.721709: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m 84/337[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m39:34[0m 9s/step - categorical_accuracy: 0.0034 - loss: 7.5409

KeyboardInterrupt: 

# Test the Values

In [None]:
trained_cnn = load_model('MobileNetV2_first.keras')

In [None]:
#Optional: continue to train the model (when increased the dataset)

In [75]:
res = model.predict(X_test)



In [78]:
actions[np.argmax(res[0])]

'sorry'

In [79]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
yhat = model.predict(X_test)

ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
print(ytrue)
len(yhat)




[1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1]


20

In [80]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[6, 4],
        [4, 6]],

       [[6, 4],
        [4, 6]]], dtype=int64)

In [81]:
accuracy_score(ytrue, yhat)

0.6