In [10]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

def generate_fusion_inputs(model_gesture, emotion_model, scaler, X_lstm, X, y_gesture):
    gesture_probs = model_gesture.predict(X_lstm)
    gesture_probs = gesture_probs.reshape(gesture_probs.shape[0], -1)
    emotion_probs = emotion_model.predict_proba(scaler.transform(X))
    X_fused = np.concatenate([gesture_probs, emotion_probs], axis=1)
    final_labels = y_gesture
    return X_fused, final_labels

X_fused, final_labels = generate_fusion_inputs(
    model_gesture,
    emotion_model,
    scaler,
    X_lstm,
    X,
    y_gesture
)

num_classes = len(np.unique(final_labels))
y = to_categorical(final_labels, num_classes)

X_train, X_test, y_train, y_test = train_test_split(
    X_fused,
    y,
    test_size=0.2,
    random_state=42,
    stratify=final_labels
)

input_layer = Input(shape=(X_fused.shape[1],))

x = Dense(128, activation="relu")(input_layer)
x = Dropout(0.3)(x)

x = Dense(64, activation="relu")(x)
x = Dropout(0.2)(x)

output_layer = Dense(num_classes, activation="softmax")(x)

fusion_model = Model(inputs=input_layer, outputs=output_layer)

fusion_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

fusion_model.summary()


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [11]:
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)


fusion_model.fit(
    X_train,
    y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stop]
)

Epoch 1/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - accuracy: 0.2194 - loss: 1.3879 - val_accuracy: 0.1833 - val_loss: 1.3889
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.2693 - loss: 1.3847 - val_accuracy: 0.2167 - val_loss: 1.3853
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.1951 - loss: 1.3924 - val_accuracy: 0.2833 - val_loss: 1.3850
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.3000 - loss: 1.3818 - val_accuracy: 0.2833 - val_loss: 1.3857
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.2796 - loss: 1.3833 - val_accuracy: 0.2667 - val_loss: 1.3864
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.2673 - loss: 1.3878 - val_accuracy: 0.3000 - val_loss: 1.3864
Epoch 7/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7d9952ae1820>

In [14]:
fusion_model.save("fusion_model_optimized.h5")



In [18]:
import numpy as np

# Define example input data for prediction. Using a single sample from existing data.
sensor_input = X_lstm[0:1] # Assuming X_lstm is available and shaped for model_gesture
emotion_input_scaled = scaler.transform(X[0:1]) # Assuming X and scaler are available

# Stage-1: Predict gesture probabilities
gesture_out = model_gesture.predict(sensor_input)
gesture_out = gesture_out.reshape(gesture_out.shape[0], -1) # Reshape to 2D

# Stage-2: Predict emotion probabilities
emotion_out = emotion_model.predict_proba(emotion_input_scaled)

# Fusion: Concatenate the outputs from both models
fused_input = np.concatenate([gesture_out, emotion_out], axis=1)

# Final prediction using the fusion model
final_output = fusion_model.predict(fused_input)

# Get the final class decision
final_class = np.argmax(final_output)
print("Final Decision Class:", final_class)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
Final Decision Class: 1


In [26]:
import json
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split # Added this import

emotion_profiles = {
    "Neutral": {
        "pitch": 1.0,
        "speed": 1.0,
        "loudness": 1.0
    },
    "Happy": {
        "pitch": 1.2,
        "speed": 1.15,
        "loudness": 1.1
    },
    "Sad": {
        "pitch": 0.85,
        "speed": 0.9,
        "loudness": 0.85
    },
    "Angry": {
        "pitch": 1.3,
        "speed": 1.2,
        "loudness": 1.25
    },
    "Fear": {
        "pitch": 1.4,
        "speed": 1.3,
        "loudness": 1.15
    }
}


with open("emotion_speech_profiles.json", "w") as f:
    json.dump(emotion_profiles, f, indent=4)

def apply_speech_modulation(text, emotion, profiles):
    params = profiles[emotion]

    pitch = params["pitch"]
    speed = params["speed"]
    loudness = params["loudness"]

    print(f"Speaking: '{text}'")
    print(f"Pitch: {pitch}, Speed: {speed}, Loudness: {loudness}")

    # This is where TTS API / engine call goes


apply_speech_modulation(
    text="I need help",
    emotion="Fear",
    profiles=emotion_profiles
)

# The output 'Speaking: 'I need help'\nPitch: 1.4, Speed: 1.3, Loudness: 1.15' was part of the original cell content and not generated by execution.

# Split X_lstm and y_gesture to create a test set for the gesture model
# Use the same random_state and test_size as the fusion model split for consistency
# Note: We only need the test sets here, so _ are used for train sets.
_, Xg_test, _, yg_test = train_test_split(
    X_lstm,
    y_gesture,
    test_size=0.2, # Matching the fusion model split
    random_state=42, # Matching the fusion model split
    stratify=y_gesture # Stratify based on gesture labels
)

# Prepare Xg_test for the LSTM model.
# Since X_lstm (and thus Xg_test) is already 3D (samples, timesteps, features),
# no reshaping is needed if timesteps=1.
# The original line `Xg_test.values.reshape(...)` was incorrect for a numpy array.
Xg_test_reshaped = Xg_test # Xg_test should already be in the correct shape (samples, 1, features)

# Predict probabilities using the LSTM gesture model
y_pred_probs_gesture = model_gesture.predict(Xg_test_reshaped)
# Corrected argmax to get a 1D array of predicted class labels
y_pred_gesture = np.argmax(y_pred_probs_gesture, axis=2).flatten()

# yg_test already contains the integer labels directly from the split.
y_true_gesture_labels = yg_test

gesture_accuracy = accuracy_score(y_true_gesture_labels, y_pred_gesture)
print("Gesture Model (LSTM) Accuracy:", gesture_accuracy)


Speaking: 'I need help'
Pitch: 1.4, Speed: 1.3, Loudness: 1.15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Gesture Model (LSTM) Accuracy: 0.2833333333333333


In [38]:
import time
import numpy as np

start_time = time.time()

gesture_out = model_gesture.predict(sensor_input)
gesture_out = gesture_out.reshape(gesture_out.shape[0], -1) # Reshape to 2D
emotion_out = emotion_model.predict_proba(emotion_input_scaled)
fusion_out = fusion_model.predict(
    np.concatenate([gesture_out, emotion_out], axis=1)
)

end_time = time.time()

latency_ms = (end_time - start_time) * 1000
print("System Latency (ms):", latency_ms)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
System Latency (ms): 201.34782791137695


In [42]:
latencies = [] # Initialize the list to store latencies
for _ in range(50):
    start = time.time()
    model_gesture.predict(sensor_input) # Changed from gesture_model to model_gesture
    emotion_model.predict(emotion_input_scaled) # Changed from emotion_input to emotion_input_scaled
    end = time.time()
    latencies.append((end - start) * 1000)

print("Average Latency (ms):", sum(latencies)/len(latencies))
emotion_accuracy = 0.0 # Placeholder for missing y_emotion data

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43

In [43]:
metrics = {
    "gesture_accuracy": gesture_accuracy,
    "emotion_accuracy": emotion_accuracy,
    "average_latency_ms": sum(latencies)/len(latencies)
}

print(metrics)

{'gesture_accuracy': 0.2833333333333333, 'emotion_accuracy': 0.0, 'average_latency_ms': 117.52631664276123}
