Step 1: Install Requirements

In [None]:
pip install opencv-python-headless tensorflow keras librosa transformers streamlit

1. Facial Emotion Recognition (OpenCV + CNN)

In [None]:
import cv2
from tensorflow.keras.models import load_model
import numpy as np

model = load_model('emotion_model.h5')  # Pretrained model (FER2013)
emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

def detect_emotion_from_image(image_path):
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)

    for (x, y, w, h) in faces:
        roi = gray[y:y+h, x:x+w]
        roi = cv2.resize(roi, (48, 48))
        roi = roi.astype("float") / 255.0
        roi = np.expand_dims(roi, axis=0)
        roi = np.expand_dims(roi, axis=-1)

        prediction = model.predict(roi)
        emotion = emotion_labels[np.argmax(prediction)]
        return emotion
    return "No Face Detected"

2. Voice Emotion Detection (Librosa + SVM)

In [None]:
import librosa
import joblib
import numpy as np

model = joblib.load('voice_emotion_model.pkl')  # Pretrained SVM

def extract_features(file):
    y, sr = librosa.load(file)
    features = np.hstack([
        np.mean(librosa.feature.mfcc(y, sr), axis=1),
        np.mean(librosa.feature.chroma_stft(y, sr), axis=1),
        np.mean(librosa.feature.melspectrogram(y, sr), axis=1)
    ])
    return features.reshape(1, -1)

def detect_emotion_from_voice(file):
    features = extract_features(file)
    emotion = model.predict(features)[0]
    return emotion

3. Text Sentiment Detection (BERT or VADER)

In [None]:
# text_emotion.py
from transformers import pipeline

classifier = pipeline("sentiment-analysis")

def detect_emotion_from_text(text):
    result = classifier(text)[0]
    return f"{result['label']} ({round(result['score']*100, 2)}%)"

4. GUI and Multi-Modal Fusion (Streamlit)

In [None]:
# app.py
import streamlit as st
from facial_emotion import detect_emotion_from_image
from voice_emotion import detect_emotion_from_voice
from text_emotion import detect_emotion_from_text

st.title("🎭 Multi-Modal Emotion Detection System")

tab = st.sidebar.radio("Select Input Type", ["Image", "Voice", "Text"])

if tab == "Image":
    img = st.file_uploader("Upload an image", type=["jpg", "png"])
    if img:
        with open("temp.jpg", "wb") as f:
            f.write(img.read())
        emotion = detect_emotion_from_image("temp.jpg")
        st.success(f"Detected Emotion: {emotion}")

elif tab == "Voice":
    audio = st.file_uploader("Upload a voice file", type=["wav", "mp3"])
    if audio:
        with open("temp.wav", "wb") as f:
            f.write(audio.read())
        emotion = detect_emotion_from_voice("temp.wav")
        st.success(f"Detected Emotion: {emotion}")

elif tab == "Text":
    text = st.text_input("Enter text:")
    if text:
        emotion = detect_emotion_from_text(text)
        st.success(f"Sentiment Analysis: {emotion}")

 Notes:
You'll need a pretrained facial emotion model (emotion_model.h5) and voice emotion SVM model (voice_emotion_model.pkl). I can help you train or find them.

This is a simple prototype — for fusion, you could combine predictions using majority voting or a meta-classifier.