In [27]:
import tensorflow as tf
import os
import mediapipe as mp
import numpy as np
import cv2
from tqdm import tqdm

In [28]:
mp_hands = mp.solutions.hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

In [29]:
def extract_pose_features(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = mp_hands.process(image_rgb)
    if results.multi_hand_landmarks:
        # Extract landmarks from the first detected hand
        hand_landmarks = results.multi_hand_landmarks[0]
        # Normalize landmarks to use as features
        landmarks = np.array([(landmark.x, landmark.y) for landmark in hand_landmarks.landmark]).flatten()
        return landmarks
    return np.zeros(42)

In [30]:
def process_image_batch(img_paths):
    features = []
    for img_path in img_paths:
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, (32, 32))
            pose_feature = extract_pose_features(img)
            features.append(pose_feature)
        else:
            features.append(None)
    return features

In [31]:
def process_directory(directory, batch_size=100):
    label = os.path.basename(directory)
    img_paths = [os.path.join(directory, img_name) for img_name in sorted(os.listdir(directory))]
    total_batches = (len(img_paths) + batch_size - 1) // batch_size
    features = []
    
    print(f"Processing {label}:")
    for i in tqdm(range(total_batches), desc=f"{label} Progress"):
        batch_paths = img_paths[i*batch_size:(i+1)*batch_size]
        batch_features = process_image_batch(batch_paths)
        features.extend(batch_features)
    
    return label, features

In [32]:
def load_and_process_images_with_dict(directory):
    data_dict = {}
    label_map = {chr(i + ord('a')): i for i in range(26)}  # Mapping 'a' to 'z' to 0-25
    labels = sorted([name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))])
    
    print("Starting batch processing of images...")
    for label in labels:
        directory_path = os.path.join(directory, label)
        label, features = process_directory(directory_path)
        data_dict[label] = [feature for feature in features if feature is not None]
    
    return data_dict

In [33]:
directory = os.path.join('..','dataset','trainingData')
train_data = load_and_process_images_with_dict(directory)

Starting batch processing of images...
Processing a:


a Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [02:25<00:00,  1.82s/it]


Processing b:


b Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:28<00:00,  2.60s/it]


Processing c:


c Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [02:49<00:00,  2.12s/it]


Processing d:


d Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [02:17<00:00,  1.72s/it]


Processing e:


e Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [02:53<00:00,  2.17s/it]


Processing f:


f Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [02:36<00:00,  1.96s/it]


Processing g:


g Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [02:36<00:00,  1.96s/it]


Processing h:


h Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [02:54<00:00,  2.18s/it]


Processing i:


i Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:20<00:00,  2.51s/it]


Processing j:


j Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:24<00:00,  2.55s/it]


Processing k:


k Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:18<00:00,  2.48s/it]


Processing l:


l Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:16<00:00,  2.46s/it]


Processing m:


m Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:07<00:00,  2.35s/it]


Processing n:


n Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:20<00:00,  2.50s/it]


Processing o:


o Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:20<00:00,  2.51s/it]


Processing p:


p Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:06<00:00,  2.34s/it]


Processing q:


q Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:20<00:00,  2.51s/it]


Processing r:


r Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:32<00:00,  2.65s/it]


Processing s:


s Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:35<00:00,  2.70s/it]


Processing t:


t Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:24<00:00,  2.55s/it]


Processing u:


u Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:02<00:00,  2.28s/it]


Processing v:


v Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:27<00:00,  2.60s/it]


Processing w:


w Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:11<00:00,  2.40s/it]


Processing x:


x Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:05<00:00,  2.32s/it]


Processing y:


y Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:10<00:00,  2.38s/it]


Processing z:


z Progress: 100%|██████████████████████████████████████████████████████████████████████| 80/80 [03:28<00:00,  2.60s/it]


In [40]:
import pandas as pd
train_df = pd.DataFrame(train_data.items(), columns=['Alphabet', 'Landmarks'])

In [55]:
expanded_landmarks = df['Landmarks'].apply(pd.Series)

In [61]:
train_data_df = pd.concat([train_df['Alphabet'], expanded_landmarks], axis=1)

In [63]:
train_data_df.columns = ['alphabets']+ [f'landmark_{i}' for i in range(8000)]
train_data_df.head(1)

Unnamed: 0,alphabets,landmark_0,landmark_1,landmark_2,landmark_3,landmark_4,landmark_5,landmark_6,landmark_7,landmark_8,...,landmark_7990,landmark_7991,landmark_7992,landmark_7993,landmark_7994,landmark_7995,landmark_7996,landmark_7997,landmark_7998,landmark_7999
0,a,"[0.4838511645793915, 0.6284142732620239, 0.369...","[0.48559656739234924, 0.6370050311088562, 0.37...","[0.4946433901786804, 0.6508243083953857, 0.376...","[0.48447081446647644, 0.6698311567306519, 0.37...","[0.49145880341529846, 0.6666201949119568, 0.36...","[0.4821345806121826, 0.6677781343460083, 0.358...","[0.48449715971946716, 0.6721535325050354, 0.35...","[0.4841638207435608, 0.6722722053527832, 0.357...","[0.4823983311653137, 0.671943187713623, 0.3552...",...,"[0.489443838596344, 0.6730363368988037, 0.3607...","[0.4863154888153076, 0.6711098551750183, 0.361...","[0.4835115075111389, 0.6675708889961243, 0.361...","[0.48151394724845886, 0.667807936668396, 0.359...","[0.486027330160141, 0.6649206280708313, 0.3578...","[0.48650282621383667, 0.6637972593307495, 0.35...","[0.47774815559387207, 0.6650660037994385, 0.35...","[0.47892794013023376, 0.6657238006591797, 0.35...","[0.4793429970741272, 0.6666581630706787, 0.356...","[0.48166096210479736, 0.6673048734664917, 0.35..."


In [64]:
train_data_df.to_parquet('train_data.parquet')

In [65]:
train_data_df.to_csv('train_data.csv',index=False)

In [66]:
df = pd.read_parquet('train_data.parquet')
df.head(1)

Unnamed: 0,alphabets,landmark_0,landmark_1,landmark_2,landmark_3,landmark_4,landmark_5,landmark_6,landmark_7,landmark_8,...,landmark_7990,landmark_7991,landmark_7992,landmark_7993,landmark_7994,landmark_7995,landmark_7996,landmark_7997,landmark_7998,landmark_7999
0,a,"[0.4838511645793915, 0.6284142732620239, 0.369...","[0.48559656739234924, 0.6370050311088562, 0.37...","[0.4946433901786804, 0.6508243083953857, 0.376...","[0.48447081446647644, 0.6698311567306519, 0.37...","[0.49145880341529846, 0.6666201949119568, 0.36...","[0.4821345806121826, 0.6677781343460083, 0.358...","[0.48449715971946716, 0.6721535325050354, 0.35...","[0.4841638207435608, 0.6722722053527832, 0.357...","[0.4823983311653137, 0.671943187713623, 0.3552...",...,"[0.489443838596344, 0.6730363368988037, 0.3607...","[0.4863154888153076, 0.6711098551750183, 0.361...","[0.4835115075111389, 0.6675708889961243, 0.361...","[0.48151394724845886, 0.667807936668396, 0.359...","[0.486027330160141, 0.6649206280708313, 0.3578...","[0.48650282621383667, 0.6637972593307495, 0.35...","[0.47774815559387207, 0.6650660037994385, 0.35...","[0.47892794013023376, 0.6657238006591797, 0.35...","[0.4793429970741272, 0.6666581630706787, 0.356...","[0.48166096210479736, 0.6673048734664917, 0.35..."


In [67]:
df2 = pd.read_csv('train_data.csv')

In [68]:
df2.head(1)

Unnamed: 0,alphabets,landmark_0,landmark_1,landmark_2,landmark_3,landmark_4,landmark_5,landmark_6,landmark_7,landmark_8,...,landmark_7990,landmark_7991,landmark_7992,landmark_7993,landmark_7994,landmark_7995,landmark_7996,landmark_7997,landmark_7998,landmark_7999
0,a,[0.48385116 0.62841427 0.36930937 0.58170748 0...,[0.48559657 0.63700503 0.37010401 0.58281869 0...,[0.49464339 0.65082431 0.37655279 0.58630484 0...,[0.48447081 0.66983116 0.37541434 0.58903718 0...,[0.4914588 0.66662019 0.36118621 0.59223318 0...,[0.48213458 0.66777813 0.35807601 0.59006488 0...,[0.48449716 0.67215353 0.35762829 0.59155536 0...,[0.48416382 0.67227221 0.35702679 0.59087288 0...,[0.48239833 0.67194319 0.35522836 0.5920341 0...,...,[0.48944384 0.67303634 0.36077929 0.58941799 0...,[0.48631549 0.67110986 0.36124492 0.5886299 0...,[0.48351151 0.66757089 0.3611235 0.58922809 0...,[0.48151395 0.66780794 0.3595365 0.58900297 0...,[0.48602733 0.66492063 0.3578341 0.59189183 0...,[0.48650283 0.66379726 0.35887694 0.59170502 0...,[0.47774816 0.665066 0.35399839 0.59159082 0...,[0.47892794 0.6657238 0.3536199 0.5903883 0...,[0.479343 0.66665816 0.35681063 0.58615822 0...,[0.48166096 0.66730487 0.35899919 0.58521336 0...
