### Pair the Emoton Prediction Model with Musical Logic

In [9]:
import cv2
import dlib
import numpy as np
import pygame
import joblib
import pandas as pd
from collections import deque
from keras.models import load_model
import tensorflow as tf
from scipy.special import softmax
import os
import random
from pygame import mixer
# Initialize pygame mixer for music
pygame.mixer.init()


In [11]:
# Load pre-trained models
rf_model = joblib.load('/Users/nixi/Desktop/Final-thesis-folder-24/AI-for-Media-project-23-24/random_forest_model.pkl')
scaler = joblib.load('/Users/nixi/Desktop/Final-thesis-folder-24/AI-for-Media-project-23-24/scaler.pkl')
cnn_model = load_model('/Users/nixi/Desktop/Final-thesis-folder-24/AI-for-Media-project-23-24/model.h5', custom_objects={'swish': tf.nn.swish})

# Compile the CNN model (not necessary for prediction, but useful for debugging)
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Load Haar Cascade for face detection
face_cascade = cv2.CascadeClassifier('/Users/nixi/Desktop/Final-thesis-folder-24/AI-for-Media-project-23-24/haar/haarcascade_frontalface_alt2.xml')
if face_cascade.empty():
    raise IOError("Failed to load Haar Cascade file. Please check the file path.")

# Load Dlib's shape predictor for facial landmarks
shape_predictor = dlib.shape_predictor('/Users/nixi/Desktop/Final-thesis-folder-24/AI-for-Media-project-23-24/shape_predictor_68_face_landmarks.dat')

# Define emotions map with integer keys
emotion_map = {0: 'angry-face', 1: 'happy-face', 2: 'neutral-face', 3: 'sad-face'}

# Emotion buffer for smoothing predictions
emotion_buffer = deque(maxlen=20)  # Keep last 20 frames

# Map each emotion to a music folder
emotion_music_map = {
    'happy-face': '/Users/nixi/Desktop/Final-thesis-folder-24/AI-for-Media-project-23-24/Music/happy',
    'sad-face': '/Users/nixi/Desktop/Final-thesis-folder-24/AI-for-Media-project-23-24/Music/sad',
    'angry-face': '/Users/nixi/Desktop/Final-thesis-folder-24/AI-for-Media-project-23-24/Music/angry',
    'neutral-face': '/Users/nixi/Desktop/Final-thesis-folder-24/AI-for-Media-project-23-24/Music/neutral'
}

# Function to play a random song from a specified folder
def play_random_song_from_folder(folder_path):
    songs = [f for f in os.listdir(folder_path) if f.endswith('.mp3')]
    if songs:
        song = random.choice(songs)
        mixer.music.load(os.path.join(folder_path, song))
        mixer.music.play()
    else:
        print(f"No songs found in folder: {folder_path}")

# Initialize video capture
cap = cv2.VideoCapture(0) 

if not cap.isOpened():
    print("Error: Could not open video capture.")
    exit()

current_emotion = 'neutral-face'

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    # Convert to grayscale for face detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

    for (x, y, w, h) in faces:
        # Draw bounding box around the face
        cv2.rectangle(frame, (x, y), (x + w, y + h), (36, 255, 12), 2)

        # Get the landmarks
        dlib_rect = dlib.rectangle(x, y, x + w, y + h)
        landmarks = shape_predictor(gray, dlib_rect)

        # Extract facial features for Random Forest
        left_eye = np.array([landmarks.part(36).x, landmarks.part(36).y])
        right_eye = np.array([landmarks.part(45).x, landmarks.part(45).y])
        nose_tip = np.array([landmarks.part(30).x, landmarks.part(30).y])
        mouth_left = np.array([landmarks.part(48).x, landmarks.part(48).y])
        mouth_right = np.array([landmarks.part(54).x, landmarks.part(54).y])
        chin = np.array([landmarks.part(8).x, landmarks.part(8).y])
        left_eyebrow = np.array([landmarks.part(21).x, landmarks.part(21).y])
        right_eyebrow = np.array([landmarks.part(22).x, landmarks.part(22).y])
        
        # Calculate distances and angles (features used during training)
        total_face_area = w * h
        left_eyebrow_angle = np.arctan2(left_eyebrow[1] - left_eye[1], left_eyebrow[0] - left_eye[0])
        right_eyebrow_angle = np.arctan2(right_eyebrow[1] - right_eye[1], right_eyebrow[0] - right_eye[0])
        eyebrow_distance = np.linalg.norm(left_eyebrow - right_eyebrow)
        eyes_dist = np.linalg.norm(left_eye - right_eye)
        eyes_to_nose_dist = np.linalg.norm((left_eye + right_eye) / 2 - nose_tip)
        nose_to_mouth_dist = np.linalg.norm(nose_tip - (mouth_left + mouth_right) / 2)
        mouth_angle = np.arctan2(mouth_right[1] - mouth_left[1], mouth_right[0] - mouth_left[0])
        nose_angle = np.arctan2(nose_tip[1] - chin[1], nose_tip[0] - chin[0])

        # Prepare the feature vector for Random Forest
        features = np.array([[total_face_area, left_eyebrow_angle, right_eyebrow_angle, eyebrow_distance, 
                              eyes_dist, eyes_to_nose_dist, nose_to_mouth_dist, mouth_angle, nose_angle]])

        # Use the original feature names used during fitting
        original_feature_names = ['TotalFaceArea', 'LeftEyebrowAngle', 'RightEyebrowAngle', 'EyebrowDistance', 
                                  'EyesDist', 'EyesToNoseDist', 'NoseToMouthDist', 'MouthAngle', 'NoseAngle']
        features_df = pd.DataFrame(features, columns=original_feature_names)

        # Scale the features
        features_scaled = scaler.transform(features_df)

        # Predict the emotion with Random Forest
        rf_emotion_label = rf_model.predict(features_scaled)[0]
        rf_emotion_text = emotion_map.get(rf_emotion_label, "Unknown")
        print("RF Emotion Label:", rf_emotion_label, "RF Emotion Text:", rf_emotion_text)  # Debugging: Print RF label and text

        # Prepare the image for CNN (150x150 as per your earlier configuration)
        face_img = cv2.resize(frame[y:y+h, x:x+w], (150, 150))
        face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
        face_img = face_img / 255.0  # Normalize to [0, 1]
        face_img = np.expand_dims(face_img, axis=-1)  # Add channel dimension for grayscale
        face_img = np.expand_dims(face_img, axis=0)  # Add batch dimension

        # Predict the emotion with CNN
        cnn_predictions = cnn_model.predict(face_img)
        cnn_probabilities = softmax(cnn_predictions, axis=1)
        cnn_emotion_label = int(np.argmax(cnn_probabilities, axis=1)[0])
        cnn_emotion_text = emotion_map.get(cnn_emotion_label, "Unknown")
        print("CNN Emotion Label:", cnn_emotion_label, "CNN Emotion Text:", cnn_emotion_text)  # Debugging: Print CNN label and text

        # Determine the final emotion text
        if rf_emotion_text == "Unknown":
            final_emotion_text = cnn_emotion_text
        elif rf_emotion_text == cnn_emotion_text:
            final_emotion_text = rf_emotion_text
        else:
            # Use softmax probabilities to weigh the decision
            rf_weight = 0.5  # Example weight for Random Forest
            cnn_weight = cnn_probabilities[0][cnn_emotion_label]  # Use the probability of the predicted class

            # Apply bias correction for "happy-face"
            if cnn_emotion_text == 'happy-face':
                cnn_weight *= 0.8  # Reduce the weight for "happy-face" predictions

            if cnn_weight > rf_weight:
                final_emotion_text = cnn_emotion_text
            else:
                final_emotion_text = rf_emotion_text

        print("Final Emotion Text:", final_emotion_text)  # Debugging: Print final emotion decision

        # Display the emotion on the frame
        cv2.putText(frame, final_emotion_text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

        # Add the predicted emotion to the buffer
        emotion_buffer.append(final_emotion_text)

        # Get the most common emotion in the buffer
        common_emotion = max(set(emotion_buffer), key=emotion_buffer.count)

        # Change the music only if the emotion has stabilized
        if common_emotion != current_emotion and emotion_buffer.count(common_emotion) > 15:
            current_emotion = common_emotion
            if current_emotion in emotion_music_map:
                music_folder = emotion_music_map[current_emotion]
                play_random_song_from_folder(music_folder)
            else:
                print(f"Emotion {current_emotion} not recognized.")

    # Display the frame with face and emotion overlay
    cv2.imshow('Emotion Recognition', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close all windows
cap.release()
cv2.destroyAllWindows()
pygame.mixer.quit()



RF Emotion Label: happy-face RF Emotion Text: Unknown
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
CNN Emotion Label: 2 CNN Emotion Text: neutral-face
Final Emotion Text: neutral-face
RF Emotion Label: happy-face RF Emotion Text: Unknown
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
CNN Emotion Label: 2 CNN Emotion Text: neutral-face
Final Emotion Text: neutral-face
RF Emotion Label: happy-face RF Emotion Text: Unknown
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
CNN Emotion Label: 2 CNN Emotion Text: neutral-face
Final Emotion Text: neutral-face
RF Emotion Label: happy-face RF Emotion Text: Unknown
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
CNN Emotion Label: 2 CNN Emotion Text: neutral-face
Final Emotion Text: neutral-face
RF Emotion Label: happy-face RF Emotion Text: Unknown
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
CNN Emotion Label: 2 CNN Emotion

KeyboardInterrupt: 

: 