In [None]:
import sys
import subprocess

print("Installing core packages...")

packages = [
    'opencv-python',
    'SpeechRecognition', 
    'transformers',
    'torch',
    'numpy',
    'pandas',
    'pillow',
    'gdown',
    'tqdm'
]

for package in packages:
    try:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', package])
        print(f"‚úì {package}")
    except:
        print(f"‚úó {package}")

print("\nInstalling TensorFlow...")
try:
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'tensorflow-cpu'])
    print("‚úì tensorflow-cpu")
except:
    print("‚úó tensorflow-cpu")

print("\nInstalling DeepFace...")
try:
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'deepface'])
    print("‚úì deepface")
except:
    print("‚úó deepface")

print("\nInstalling PyAudio...")
try:
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'PyAudio'])
    print("‚úì PyAudio")
except:
    print("‚ö† PyAudio (optional - needs compiler, microphone might not work)")

print("\n‚úÖ Installation complete!")


Installing core packages...
‚úì opencv-python
‚úì opencv-python
‚úì SpeechRecognition
‚úì SpeechRecognition
‚úì transformers
‚úì torch
‚úì numpy
‚úì pandas
‚úì pillow
‚úì gdown
‚úì tqdm

Installing TensorFlow...
‚úó tensorflow-cpu

Installing DeepFace...
‚úó deepface

Installing PyAudio...
‚úì PyAudio

‚úÖ Installation complete!


In [None]:
import cv2
import speech_recognition as sr
from transformers import pipeline
import threading
import time
import torch

print("üöÄ Initializing Multimodal Emotion Recognition System...")
print("=" * 60)

print("üì¶ Loading emotion detection models...")

try:
    from fer import FER
    face_detector = FER(mtcnn=False)
    print("‚úì Video emotion detector loaded (FER)")
    USE_VIDEO = True
except ImportError:
    print("‚ö† FER not available. Installing...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'fer'])
    from fer import FER
    face_detector = FER(mtcnn=False)
    print("‚úì Video emotion detector loaded (FER)")
    USE_VIDEO = True

try:
    emotion_classifier = pipeline(
        "text-classification", 
        model="bhadresh-savani/distilbert-base-uncased-emotion",
        device=0 if torch.cuda.is_available() else -1
    )
    print("‚úì Audio emotion detector loaded (DistilBERT)")
    USE_AUDIO = True
except Exception as e:
    print(f"‚ö† Audio emotion detector failed: {e}")
    USE_AUDIO = False

print("=" * 60)
print()

def recognize_speech():
    if not USE_AUDIO:
        return
        
    recognizer = sr.Recognizer()
    
    try:
        mic = sr.Microphone()
    except OSError as e:
        print(f"‚ö† Microphone not available: {e}")
        return

    with mic as source:
        print("üé§ Listening for speech...")
        recognizer.adjust_for_ambient_noise(source, duration=1)
        
        while True:
            try:
                audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)
                print("üîÑ Analyzing Speech...")
                
                text = recognizer.recognize_google(audio)
                print(f"üìù Recognized: '{text}'")
                
                emotions = emotion_classifier(text)[0]
                print(f"üòä Speech Emotion: {emotions['label']} ({emotions['score']:.2%} confidence)")
                print("-" * 60)
                
            except sr.WaitTimeoutError:
                continue
            except sr.RequestError as e:
                print(f"‚ö† Speech recognition service error: {e}")
                time.sleep(2)
            except sr.UnknownValueError:
                print("‚ö† Could not understand audio")
            except Exception as e:
                print(f"‚ö† Error: {e}")
            
            time.sleep(0.5)


def detect_video_emotion(stop_event):
    if not USE_VIDEO:
        return
        
    print("üìπ Starting video emotion detection...")
    print("Press 'q' to quit video window")
    
    video_capture = cv2.VideoCapture(0)
    
    if not video_capture.isOpened():
        print("‚ö† Cannot open webcam")
        return

    frame_count = 0
    
    while not stop_event.is_set():
        ret, frame = video_capture.read()
        
        if not ret:
            print("‚ö† Failed to grab frame")
            break

        frame_count += 1
        
        if frame_count % 5 == 0:
            try:
                emotions = face_detector.detect_emotions(frame)
                
                if emotions:
                    for face in emotions:
                        box = face["box"]
                        x, y, w, h = box
                        
                        emotion_scores = face["emotions"]
                        dominant_emotion = max(emotion_scores, key=emotion_scores.get)
                        confidence = emotion_scores[dominant_emotion]
                        
                        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
                        
                        text = f"{dominant_emotion}: {confidence:.2%}"
                        cv2.putText(frame, text, (x, y-10), 
                                  cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
                        
            except Exception as e:
                pass

        cv2.putText(frame, "Multimodal Emotion Recognition", (10, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        cv2.putText(frame, "Press 'q' to quit", (10, 60),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

        cv2.imshow("Emotion Detection - Video", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            stop_event.set()
            break

    video_capture.release()
    cv2.destroyAllWindows()
    print("üìπ Video detection stopped")


def main():
    print("\n" + "=" * 60)
    print("üé≠ MULTIMODAL EMOTION RECOGNITION SYSTEM")
    print("=" * 60)
    print("This system analyzes emotions from:")
    print("  üìπ Video (facial expressions)")
    print("  üé§ Audio (speech content)")
    print()
    print("Instructions:")
    print("  - Speak naturally into your microphone")
    print("  - Look at your webcam for facial emotion detection")
    print("  - Press 'q' in video window to stop")
    print("  - Press Ctrl+C in console to force stop")
    print("=" * 60)
    print()
    
    stop_event = threading.Event()

    threads = []
    
    if USE_AUDIO:
        speech_thread = threading.Thread(target=recognize_speech, daemon=True)
        speech_thread.start()
        threads.append(speech_thread)
        print("‚úì Audio detection thread started")
    
    if USE_VIDEO:
        video_thread = threading.Thread(target=detect_video_emotion, args=(stop_event,))
        video_thread.start()
        threads.append(video_thread)
        print("‚úì Video detection thread started")
    
    print()

    try:
        while not stop_event.is_set():
            time.sleep(0.1)
    except KeyboardInterrupt:
        print("\n\n‚èπ Stopping...")
        stop_event.set()
    finally:
        stop_event.set()
        time.sleep(1)
        cv2.destroyAllWindows()
        print("\n‚úÖ System shutdown complete")
        print("=" * 60)


if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm


üöÄ Initializing Multimodal Emotion Recognition System...
üì¶ Loading emotion detection models...
‚ö† FER not available. Installing...


CalledProcessError: Command '['d:\\Multimodal Emotion Recognition Integrating Audio and Video Analysis\\env_emotion\\Scripts\\python.exe', '-m', 'pip', 'install', '-q', 'fer']' returned non-zero exit status 1.