# 1. Import and Install Dependencies

In [184]:
!pip install tensorflow==2.12.0 opencv-python mediapipe sklearn matplotlib

Defaulting to user installation because normal site-packages is not writeable




In [185]:
pip install tensorflow

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.




In [186]:
pip install pandas

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.




In [187]:
pip install pyspark

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.




In [188]:
pip install pandas scikit-learn pyarrow

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.




In [222]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import pandas as pd

# 2. Collect Keypoint Values for Training and Testing

In [223]:

df_from_parquet = pd.read_parquet(r"C:\Users\ranjan.patra\OneDrive - Lingaro Sp. z o. o\DATA\IITJ\Course\Projects\ISL\ISL_CSLRT_Corpus\Mp_Data\LandMarks")

In [226]:
df_from_parquet.head()

Unnamed: 0,Label,sample_type,Sample_Number,frame_shape,Frame_Number,Keypoints
0,are you free today,Video,0,"[1080, 1920, 3]",1.0,"[0.47676751017570496, 0.3017379641532898, -0.6..."
1,are you free today,Video,0,"[1080, 1920, 3]",2.0,"[0.47681325674057007, 0.30156728625297546, -0...."
2,are you free today,Video,0,"[1080, 1920, 3]",3.0,"[0.4772616922855377, 0.3015567362308502, -0.63..."
3,are you free today,Video,0,"[1080, 1920, 3]",4.0,"[0.477945476770401, 0.30148565769195557, -0.62..."
4,are you free today,Video,0,"[1080, 1920, 3]",5.0,"[0.47859013080596924, 0.30141815543174744, -0...."


In [256]:
df=df_from_parquet.groupby(["Label","Sample_Number"])["Frame_Number"].count()

In [257]:
display(df)

Label                               Sample_Number
He is going into the room           0                130
                                    1                146
                                    2                114
                                    3                128
                                    4                113
                                                    ... 
you need a medicine, take this one  0                118
                                    1                155
                                    2                111
                                    3                115
                                    4                118
Name: Frame_Number, Length: 487, dtype: int64

## 2.1 Label indexing

In [192]:

df_label=df_from_parquet["Label"].unique()
label_map = {label:num for num, label in enumerate(df_label)}
df_label_index = pd.DataFrame(list(label_map.items()), columns=['Label', 'Label_id'])

## 2.2 sample_type indexing

In [193]:
df_sample_type=df_from_parquet["sample_type"].unique()
sample_type_map = {sample_type:num for num, sample_type in enumerate(df_sample_type)}
df_sample_type_index = pd.DataFrame(list(sample_type_map.items()), columns=['sample_type', 'sample_type_id'])

## 2.3 Data Frame Clean up

In [194]:
inner_join = df_from_parquet.merge(df_label_index, on='Label', how='left')
df_raw=inner_join.merge(df_sample_type_index, on='sample_type', how='left')


In [195]:
df_clean=df_raw[["Label_id","sample_type_id","Sample_Number","Frame_Number","Keypoints"]]


In [196]:
df_clean.head()

Unnamed: 0,Label_id,sample_type_id,Sample_Number,Frame_Number,Keypoints
0,0,0,0,1.0,"[0.47676751017570496, 0.3017379641532898, -0.6..."
1,0,0,0,2.0,"[0.47681325674057007, 0.30156728625297546, -0...."
2,0,0,0,3.0,"[0.4772616922855377, 0.3015567362308502, -0.63..."
3,0,0,0,4.0,"[0.477945476770401, 0.30148565769195557, -0.62..."
4,0,0,0,5.0,"[0.47859013080596924, 0.30141815543174744, -0...."


# 3. Train Test Split

In [197]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [198]:
X=df_clean[["Label_id","sample_type_id","Sample_Number"]].drop_duplicates()

In [199]:
x.shape

(487, 3)

In [200]:
y=df_raw[["Label_id","Sample_Number","Frame_Number","Keypoints"]]

In [201]:
y.head()

Unnamed: 0,Label_id,Sample_Number,Frame_Number,Keypoints
0,0,0,1.0,"[0.47676751017570496, 0.3017379641532898, -0.6..."
1,0,0,2.0,"[0.47681325674057007, 0.30156728625297546, -0...."
2,0,0,3.0,"[0.4772616922855377, 0.3015567362308502, -0.63..."
3,0,0,4.0,"[0.477945476770401, 0.30148565769195557, -0.62..."
4,0,0,5.0,"[0.47859013080596924, 0.30141815543174744, -0...."


In [202]:

y_new=y.groupby(["Label_id","Sample_Number"])["Keypoints"].apply(list).reset_index(name='Holistic_landmarks')

In [203]:
y=y_new["Holistic_landmarks"]

In [216]:
print(y)

0      [[0.47676751017570496, 0.3017379641532898, -0....
1      [[0.47763150930404663, 0.29973751306533813, -0...
2      [[0.4733687937259674, 0.2801719903945923, -0.6...
3      [[0.4928557276725769, 0.3106054663658142, -0.6...
4      [[0.5247958302497864, 0.30657368898391724, -0....
                             ...                        
482    [[0.4479582607746124, 0.2940359115600586, -0.7...
483    [[0.4725050926208496, 0.30356115102767944, -0....
484    [[0.4662730097770691, 0.22107839584350586, -0....
485    [[0.49099552631378174, 0.21547842025756836, -0...
486    [[0.4966232478618622, 0.3735995292663574, -0.6...
Name: Holistic_landmarks, Length: 487, dtype: object


In [221]:
y.apply(len).max()

98

In [209]:
new_column_names = [f'{"col"}_{i}' for i in range(1, max_length + 1)]

In [None]:
df[new_column_names] = pd.DataFrame(y.tolist())


In [214]:
display(df)

Unnamed: 0,ID,Column1,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,...,col_183,col_184,col_185,col_186,col_187,col_188,col_189,col_190,col_191,col_192
0,1,"[1, 2, 3]","[0.47676751017570496, 0.3017379641532898, -0.6...","[0.47681325674057007, 0.30156728625297546, -0....","[0.4772616922855377, 0.3015567362308502, -0.63...","[0.477945476770401, 0.30148565769195557, -0.62...","[0.47859013080596924, 0.30141815543174744, -0....","[0.47888728976249695, 0.30137962102890015, -0....","[0.47923916578292847, 0.3013561964035034, -0.6...","[0.47959932684898376, 0.30111372470855713, -0....",...,,,,,,,,,,
1,1,"[4, 5, 6]","[0.47763150930404663, 0.29973751306533813, -0....","[0.4782756567001343, 0.2994726300239563, -0.67...","[0.47850731015205383, 0.29945608973503113, -0....","[0.4785272479057312, 0.3004192113876343, -0.68...","[0.47854700684547424, 0.30042022466659546, -0....","[0.47854405641555786, 0.3005640208721161, -0.6...","[0.47845259308815, 0.30022111535072327, -0.667...","[0.4783184826374054, 0.29987362027168274, -0.6...",...,,,,,,,,,,
2,1,"[7, 8, 9]","[0.4733687937259674, 0.2801719903945923, -0.63...","[0.4735981523990631, 0.28375244140625, -0.6696...","[0.47362133860588074, 0.28485777974128723, -0....","[0.47345322370529175, 0.2850269675254822, -0.7...","[0.4734543263912201, 0.2849951386451721, -0.69...","[0.4735454022884369, 0.2849554419517517, -0.69...","[0.4737289547920227, 0.28500890731811523, -0.7...","[0.4738963842391968, 0.2853124737739563, -0.78...",...,,,,,,,,,,


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [None]:
y.shape

(487,)

# 4. Build and Train LSTM Neural Network

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [None]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(label.shape[0], activation='softmax'))

NameError: name 'label' is not defined

In [None]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])

In [None]:
model.summary()

# 8. Make Predictions

In [None]:
res = model.predict(X_test)

In [None]:
actions[np.argmax(res[4])]

In [None]:
actions[np.argmax(y_test[4])]

# 9. Save Weights

In [None]:
model.save('action.h5')

In [None]:
del model

In [None]:
model.load_weights('action.h5')

# 10. Evaluation using Confusion Matrix and Accuracy

In [None]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [None]:
yhat = model.predict(X_test)

In [None]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [None]:
multilabel_confusion_matrix(ytrue, yhat)

In [None]:
accuracy_score(ytrue, yhat)

# 11. Test in Real Time

In [None]:
from scipy import stats

In [None]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [None]:
plt.figure(figsize=(18,18))
plt.imshow(prob_viz(res, actions, image, colors))

In [None]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()