In [1]:
import os
import numpy as np
import tensorflow as tf
import librosa
import tensorflow_hub as hub
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, classification_report

# Load the VGGish model for audio feature extraction
# # Load VGGish model
vggish_model = hub.load('https://www.kaggle.com/models/google/vggish/TensorFlow2/vggish/1')

def load_and_preprocess_audio(file_path, target_duration=3, target_sr=44100):
    audio, sr = librosa.load(file_path, sr=None)
    if sr != target_sr:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
    target_length = int(target_sr * target_duration)
    if len(audio) < target_length:
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        audio = audio[:target_length]
    embeddings = vggish_model(audio)
    return embeddings.numpy()















In [2]:
# Define the path to your saved audio model based on the provided directory
model_path = "E:/cse499b/vggish_model/vggish_multimodal_correction.h5"

# Load the trained audio model from the specified path
audio_model = load_model(model_path)



In [3]:
def predict_audio_probabilities(audio_files):
    audio_features = np.array([load_and_preprocess_audio(file) for file in audio_files])
    probabilities = audio_model.predict(audio_features)
    return probabilities


In [4]:
# Example audio file paths
audio_test_files = ['audio_mod/maa/maa_adib.wav', 'audio_mod/sahajjo/sahajjo_adib.wav']
audio_predictions = predict_audio_probabilities(audio_test_files)
print("Predicted probabilities:", audio_predictions)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
Predicted probabilities: [[0.31195778 0.05451614 0.08916745 0.45903382 0.08532491]
 [0.02022971 0.0137376  0.02984658 0.00514909 0.931037  ]]


In [5]:
# import os
# import numpy as np
# import tensorflow as tf
# from tensorflow.keras.models import load_model
# from tensorflow.keras.preprocessing.image import img_to_array, load_img

# # Load the trained frame model from the specified path
# frame_model_path = 'E:/cse499b/correction_model/final_model_openpose_v1_correction.keras'  # Adjust this path
# frame_model = load_model(frame_model_path)

import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.models import load_model

# Define the path to the trained frame model
frame_model_path = 'frames_model_five/sign_language_final_model_correction.keras'  # Adjust this path as necessary
frame_model = load_model(frame_model_path)


In [6]:
def load_and_preprocess_video_sequence(video_path, sequence_length=30, img_width=64, img_height=64):
    """
    Load video frames, ensuring we collect the exact sequence length expected by the model.
    """
    frames = sorted([os.path.join(video_path, fname) for fname in os.listdir(video_path)])
    if len(frames) < sequence_length:
        print("Warning: Not enough frames in video to match the expected sequence length.")
        return None  # Or consider padding strategies

    # Select the first 'sequence_length' frames
    frames = frames[:sequence_length]
    frame_array = np.array([img_to_array(load_img(frame, target_size=(img_width, img_height))) for frame in frames])
    frame_array = np.expand_dims(frame_array, axis=0)  # Add an extra dimension for batch (model expects batches)
    return frame_array / 255.0  # Normalize the frames

def predict_video_probabilities(video_paths):
    """
    Predict probabilities for a list of video paths.
    """
    predictions = []
    for video_path in video_paths:
        video_sequence = load_and_preprocess_video_sequence(video_path)
        if video_sequence is not None:
            probabilities = frame_model.predict(video_sequence)
            predictions.append(probabilities[0])  # Append the first (and only) batch result
    return np.array(predictions)

In [13]:
# Example usage
video_test_paths = ['E:/openpose_ten/bondhu/p12_c_bondhu.mp4', 'E:/openpose_ten/sahajjo/p9_f_sahajjo.mp4']
video_predictions = predict_video_probabilities(video_test_paths)
print("Predicted probabilities:", video_predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Predicted probabilities: [[5.1959499e-04 9.9889529e-01 1.4265205e-05 3.7874738e-04 1.9207477e-04]
 [5.2799780e-05 1.0947231e-03 1.8673112e-04 2.1462294e-05 9.9864417e-01]]


In [14]:

# # Example usage
# audio_test_files = ['audio_mod/dhonnobad/dhonnobad_mahim.wav', 'audio_mod/sahajjo/sahajjo_adib.wav']
# video_test_paths = ['E:/openpose_ten/dhonnobad/p1_c_dhonnobad.mp4', 'E:/openpose_ten/sahajjo/p9_f_sahajjo.mp4']

# # Call the late fusion function
# fused_probabilities, final_predictions = late_fusion(audio_test_files, video_test_paths)
# print("Fused probabilities:", fused_probabilities)
# print("Final predictions:", final_predictions)
# Manually specified test files

In [20]:
def late_fusion(audio_test_files, video_test_paths):
    # Predict probabilities using the audio model
    audio_probs = predict_audio_probabilities(audio_test_files)
    
    # Predict probabilities using the frame model
    video_probs = predict_video_probabilities(video_test_paths)
    
    # Ensure both predictions are of the same length
    if len(audio_probs) != len(video_probs):
        raise ValueError("The number of audio and video predictions must match")
    
    # Perform averaging of probabilities from both models
    fused_probs = (audio_probs + video_probs) / 2
    
    # Determine final predictions based on maximum probability
    final_predictions = np.argmax(fused_probs, axis=1)
    
    return fused_probs, final_predictions

audio_test_files = [
    'audio_mod/baba/baba_mahim.wav',
    'audio_mod/baba/baba_adib.wav',
    'audio_mod/baba/baba_ramim.wav',
    'audio_mod/baba/baba_riaz.wav',
    'audio_mod/bondhu/bondhu_adib.wav',
    'audio_mod/bondhu/bondhu_mahim.wav',
    'audio_mod/bondhu/bondhu_riaz.wav',
    'audio_mod/bondhu/bondhu_ramim.wav',
    'audio_mod/dhonnobad/dhonnobad_adib.wav',
    'audio_mod/dhonnobad/dhonnobad_mahim.wav',
    'audio_mod/dhonnobad/dhonnobad_ramim.wav',
    'audio_mod/dhonnobad/dhonnobad_riaz.wav',
    'audio_mod/maa/maa_riaz.wav',
    'audio_mod/maa/maa_adib.wav',
    'audio_mod/maa/maa_ramim.wav',
    'audio_mod/maa/maa_mahim.wav',
    'audio_mod/sahajjo/sahajjo_ramim.wav',
    'audio_mod/sahajjo/sahajjo_riaz.wav',
    'audio_mod/sahajjo/sahajjo_adib.wav',
    'audio_mod/sahajjo/sahajjo_mahim.wav'
]
video_test_paths = [
    'E:/openpose_ten/baba/p1_c_baba.mp4',
    'E:/openpose_ten/baba/p10_c_baba.mp4',
    'E:/openpose_ten/baba/p7_c_baba.mp4',
    'E:/openpose_ten/baba/p11_c_baba.mp4',
    'E:/openpose_ten/bondhu/p1_c_bondhu.mp4',
    'E:/openpose_ten/bondhu/p10_c_bondhu.mp4',
    'E:/openpose_ten/bondhu/p5_c_bondhu.mp4',
    'E:/openpose_ten/bondhu/p7_c_bondhu.mp4',
    'E:/openpose_ten/dhonnobad/p1_c_dhonnobad.mp4',
    'E:/openpose_ten/dhonnobad/p10_c_dhonnobad.mp4',
    'E:/openpose_ten/dhonnobad/p11_c_dhonnobad.mp4',
    'E:/openpose_ten/dhonnobad/p9_c_dhonnobad.mp4',
    'E:/openpose_ten/maa/p1_c_maa.mp4',
    'E:/openpose_ten/maa/p10_c_maa.mp4',
    'E:/openpose_ten/maa/p7_c_maa.mp4',
    'E:/openpose_ten/maa/p11_c_maa.mp4',
    'E:/openpose_ten/sahajjo/p9_f_sahajjo.mp4',
    'E:/openpose_ten/sahajjo/p1_c_sahajjo.mp4',
    'E:/openpose_ten/sahajjo/p5_c_sahajjo.mp4',
    'E:/openpose_ten/sahajjo/p6_c_sahajjo.mp4'
]

# Call the late fusion function
fused_probabilities, final_predictions = late_fusion(audio_test_files, video_test_paths)
print("Fused probabilities:", fused_probabilities)
print("Final predictions:", final_predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35

In [15]:
# true_labels = [2, 4, 2, 4]  # Adjust these labels according to your actual class assignment

# # Call the late fusion function
# fused_probabilities, final_predictions = late_fusion(audio_test_files, video_test_paths)

# # Calculate accuracy
# accuracy = accuracy_score(true_labels, final_predictions)
# print("Accuracy of the model:", accuracy)

# # Get a classification report
# report = classification_report(true_labels, final_predictions, target_names=['Class 0', 'Class 1','Class 2','Class 3','Class 4'])
# print("Classification Report:\n", report)

# from sklearn.metrics import classification_report, accuracy_score

# # Define labels for all classes expected in the model
# all_class_labels = [0, 1, 2, 3, 4]  # Assuming classes are 0-indexed and you have 5 classes total

# # Provide names for these classes (if these are specific signs, replace 'Class X' with the actual sign names)
# target_names = ['Class 0', 'Class 1', 'Class 2', 'Class 3', 'Class 4']

# # Make sure the true labels are correct based on your description
# true_labels = [2, 4, 2, 4]  # Adjust these labels if necessary

# # Assuming you have already executed your late fusion and obtained 'final_predictions'
# # Call the classification report with all classes
# report = classification_report(
#     true_labels, final_predictions,
#     labels=all_class_labels,
#     target_names=target_names
# )

# print("Classification Report:\n", report)



In [21]:
# True labels for the test data
true_labels = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4]  # Adjust these as needed to match your data

# Calculate accuracy, recall, and F1-score
accuracy = accuracy_score(true_labels, final_predictions)
recall = recall_score(true_labels, final_predictions, average='macro')
f1 = f1_score(true_labels, final_predictions, average='macro')

print("Accuracy of the model:", accuracy)
print("Recall of the model:", recall)
print("F1 Score of the model:", f1)

# Define labels and target names for all classes in the model
all_class_labels = [0, 1, 2, 3, 4]  # Assuming classes are 0-indexed and you have 5 classes total
target_names = ['Class 0', 'Class 1', 'Class 2', 'Class 3', 'Class 4']

# Generate the classification report
report = classification_report(
    true_labels, final_predictions,
    labels=all_class_labels,
    target_names=target_names
)
print("Classification Report:\n", report)

Accuracy of the model: 1.0
Recall of the model: 1.0
F1 Score of the model: 1.0
Classification Report:
               precision    recall  f1-score   support

     Class 0       1.00      1.00      1.00         4
     Class 1       1.00      1.00      1.00         4
     Class 2       1.00      1.00      1.00         4
     Class 3       1.00      1.00      1.00         4
     Class 4       1.00      1.00      1.00         4

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20

