In [2]:
import sys
import os
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from scipy.io import wavfile
from scipy.fftpack import dct
from moviepy.editor import AudioFileClip, AudioFileClip
from decord import VideoReader
import tensorflow as tf
from tensorflow import keras
#from The `keras.layers` module in the code snippet you provided is importing layers from the Keras library. Keras is a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano.
from keras.layers import TFSMLayer
from keras import Model, Input
from PyQt5.QtWidgets import (
    QApplication, QWidget, QLabel, QPushButton,
    QFileDialog, QVBoxLayout, QHBoxLayout
)
from PyQt5.QtCore import Qt, QUrl
from PyQt5.QtMultimedia import QMediaPlayer, QMediaContent
from PyQt5.QtMultimediaWidgets import QVideoWidget

# Settings
input_size = 224
num_frame = 8
sampling_rate = 6

# Load model
model_path = "D:/GUI-Emotion-Detection/saved_model"
input_tensor = Input(shape=(None, 224, 224, 3))
tfsm_layer = TFSMLayer(model_path, call_endpoint='serving_default')
model = Model(inputs=input_tensor, outputs=tfsm_layer(input_tensor))

uc_id2label = {0: 'anger', 1: 'happiness', 2: 'surprise', 3: 'disgust', 4: 'fear', 5: 'sadness'}

def normalize_audio(audio):
    return audio / np.max(np.abs(audio))

def MFCC(signal, sample_rate):
    pre_emphasis = 0.97
    emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
    frame_size = 0.025
    frame_stride = 0.01
    frame_length, frame_step = int(round(frame_size * sample_rate)), int(round(frame_stride * sample_rate))
    signal_length = len(emphasized_signal)
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))
    pad_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(emphasized_signal, z)
    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \
              np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    frames *= np.hamming(frame_length)
    NFFT = 512
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))
    pow_frames = (1.0 / NFFT) * ((mag_frames) ** 2)
    nfilt = 40
    low_freq_mel = 0
    high_freq_mel = 2595 * np.log10(1 + (sample_rate / 2) / 700)
    mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2)
    hz_points = 700 * (10 ** (mel_points / 2595) - 1)
    bin = np.floor((NFFT + 1) * hz_points / sample_rate)
    fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])
        f_m = int(bin[m])
        f_m_plus = int(bin[m + 1])
        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    filter_banks = np.dot(pow_frames, fbank.T)
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)
    filter_banks = 20 * np.log10(filter_banks)
    mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1:14]
    return mfcc

def process_video(input_video_path, output_video_path):
    cap = cv2.VideoCapture(input_video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, 16.0, (224, 224))

    audio_clip = AudioFileClip(input_video_path)
    temp_wav = "temp.wav"
    audio_clip.write_audiofile(temp_wav, verbose=False, logger=None)
    fs, Audiodata = wavfile.read(temp_wav)
    Audiodata = normalize_audio(Audiodata)

    step = int(len(Audiodata) / 9)
    tx = np.arange(0, len(Audiodata), step)

    for i in range(8):
        signal = Audiodata[tx[i]:tx[i + 1]]
        mfcc = MFCC(signal, fs)
        fig, ax = plt.subplots(figsize=(4, 4))
        ax.matshow(np.transpose(mfcc), interpolation="nearest", aspect="auto", origin="lower")
        plt.axis('off')
        plt.savefig("mfcc_temp.jpg")
        plt.close()
        img = Image.open("mfcc_temp.jpg").resize((224, 224))
        out.write(np.array(img))

    frame_count = 0
    while cap.isOpened() and frame_count < 8:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        out.write(frame)
        frame_count += 1

    cap.release()
    out.release()
    os.remove("mfcc_temp.jpg")
    os.remove(temp_wav)
    return output_video_path

def read_video(file_path):
    vr = VideoReader(file_path)
    frames = vr.get_batch(range(len(vr))).asnumpy()
    return format_frames(frames, output_size=(224, 224))

def format_frames(frames, output_size):
    frames = tf.image.convert_image_dtype(frames, tf.float32)
    frames = tf.image.resize(frames, size=list(output_size))
    return frames

def load_video(file_path):
    video = read_video(file_path)
    video = tf.convert_to_tensor(video, dtype=tf.float32)
    video = tf.expand_dims(video, axis=0)
    return video

class VideoProcessorApp(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Real-time Emotion Detection")
        self.resize(600, 400)
        self.setStyleSheet("background-color : #3399FF")

        layout = QVBoxLayout()

        self.label = QLabel(
            "Welcome to the Real-Time Emotion Detection System. You can either upload a video file to analyze emotions or activate your camera for live emotion detection"
        )
        self.label.setAlignment(Qt.AlignCenter)
        self.label.setWordWrap(True)
        self.label.setStyleSheet("font-size: 16px; font-weight: bold; color: #001f3f;")
        layout.addWidget(self.label)

        btn_layout = QHBoxLayout()

        choose_layout = QVBoxLayout()
        self.btn = QPushButton("Choose Video")
        self.btn.clicked.connect(self.load_video)
        self.btn.setFixedSize(180, 50)
        self.btn.setStyleSheet(
            "background-color: #001f3f; color: white; font-size: 15px; font-weight: bold;"
        )
        choose_layout.addWidget(self.btn)
        choose_layout.addSpacing(20)

        self.extra_btn1 = QPushButton("Launch Camera")
        self.extra_btn1.setFixedSize(180, 50)
        self.extra_btn1.setStyleSheet(
            "background-color: #001f3f; color: white; font-size: 15px; font-weight: bold;"
        )
        self.extra_btn1.clicked.connect(self.capture_from_camera)
        choose_layout.addWidget(self.extra_btn1)

        detect_layout = QVBoxLayout()
        self.detect_btn = QPushButton("Detect Emotion")
        self.detect_btn.clicked.connect(self.detect_emotion)
        self.detect_btn.setFixedSize(220, 50)
        self.detect_btn.setStyleSheet(
            "background-color: #001f3f; color: white; font-size: 15px; font-weight: bold;"
        )
        detect_layout.addWidget(self.detect_btn)
        detect_layout.addSpacing(20)

        self.extra_btn2 = QPushButton("Detect Emotion from Camera")
        self.extra_btn2.setFixedSize(220, 50)
        self.extra_btn2.setStyleSheet(
            "background-color: #001f3f; color: white; font-size: 15px; font-weight: bold;"
        )
        self.extra_btn2.clicked.connect(self.capture_from_camera)
        detect_layout.addWidget(self.extra_btn2)

        btn_layout.addLayout(choose_layout)
        btn_layout.addSpacing(100)
        btn_layout.addLayout(detect_layout)

        layout.addLayout(btn_layout)

        self.emotion_label = QLabel("")
        self.emotion_label.setAlignment(Qt.AlignCenter)
        self.emotion_label.setStyleSheet("font-size: 20px; font-weight: bold; color: white;")
        layout.addWidget(self.emotion_label)

        self.video_widget = QVideoWidget()
        layout.addWidget(self.video_widget)

        self.setLayout(layout)

        self.media_player = QMediaPlayer(None, QMediaPlayer.VideoSurface)
        self.media_player.setVideoOutput(self.video_widget)
        self.processed_video_path = ""

    def load_video(self):
        input_dir = r"D:/GUI-Emotion-Detection/input_videos"
        output_dir = r"D:/GUI-Emotion-Detection/output_videos"

        file_name, _ = QFileDialog.getOpenFileName(self, "Open AVI File", "", "AVI Files (*.avi)")
        if file_name:
            base_name = os.path.basename(file_name)
            input_path = os.path.join(input_dir, base_name)

            os.makedirs(input_dir, exist_ok=True)
            os.makedirs(output_dir, exist_ok=True)

            if not os.path.exists(input_path):
                shutil.copy(file_name, input_path)

            output_path = os.path.join(output_dir, base_name.replace('.avi', '_processed.mp4'))
            self.processed_video_path = process_video(input_path, output_path)

            self.label.setText(f"Processed: {base_name}")
            self.media_player.setMedia(QMediaContent(QUrl.fromLocalFile(self.processed_video_path)))
            self.media_player.play()
            self.emotion_label.setText("")

    def detect_emotion(self):
        if self.processed_video_path:
            video_tensor = load_video(self.processed_video_path)
            prediction = model.predict(video_tensor)
            predicted_label = uc_id2label[np.argmax(prediction)]
            self.emotion_label.setText(f"Emotion Detected: {predicted_label}")

    def capture_from_camera(self):
        input_dir = "D:/GUI-Emotion-Detection/input_videos_camera"
        output_dir = "D:/GUI-Emotion-Detection/output_videos_camera"
        os.makedirs(input_dir, exist_ok=True)
        os.makedirs(output_dir, exist_ok=True)

        input_path = os.path.join(input_dir, "camera_input.avi")
        output_path = os.path.join(output_dir, "camera_output_processed.mp4")

        cap = cv2.VideoCapture(0)
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(input_path, fourcc, 16.0, (224, 480))  # 16 FPS

        frame_count = 0
        max_frames = 32  # 2 seconds at 16 FPS

        while cap.isOpened() and frame_count < max_frames:
            ret, frame = cap.read()
            if not ret:
                break
            out.write(frame)
            frame_count += 1
            cv2.imshow("Recording from Camera", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
               break

        cap.release()
        out.release()
        cv2.destroyAllWindows()

        # 🔁 SAME preprocessing pipeline
        processed_path = process_video(input_path, output_path)
        self.processed_video_path = processed_path
        self.media_player.setMedia(QMediaContent(QUrl.fromLocalFile(processed_path)))
        self.media_player.play()

        # 🔍 Emotion detection
        video_tensor = load_video(processed_path)  # Must be identical to load_video() in load_video()
        prediction = model.predict(video_tensor)
        predicted_label = uc_id2label[np.argmax(prediction)]
        self.emotion_label.setText(f"Detected Emotion: {predicted_label}")



if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = VideoProcessorApp()
    window.show()
    sys.exit(app.exec_())





SystemExit: 0

In [3]:
!pip install streamlit

Defaulting to user installation because normal site-packages is not writeable

ERROR: Could not find a version that satisfies the requirement streamlit (from versions: none)
ERROR: No matching distribution found for streamlit





In [None]:
import os
import shutil
import cv2
import numpy as np
import streamlit as st
import matplotlib.pyplot as plt
from PIL import Image
from scipy.io import wavfile
from scipy.fftpack import dct
from moviepy.editor import AudioFileClip
import tempfile
import tensorflow as tf
from keras.layers import Input
from keras import Model
from keras.layers import TFSMLayer
from decord import VideoReader

# Constants
input_size = 224
num_frame = 8
sampling_rate = 6

# Load model
model_path = '/home/jovyan/first-testing-workspace-upd/Emotion-recognition/Models/saved_model'
input_tensor = Input(shape=(None, 224, 224, 3))
tfsm_layer = TFSMLayer(model_path, call_endpoint='serving_default')
model = Model(inputs=input_tensor, outputs=tfsm_layer(input_tensor))

uc_id2label = {0: 'anger', 1: 'happiness', 2: 'surprise', 3: 'disgust', 4: 'fear', 5: 'sadness'}

def normalize_audio(audio):
    return audio / np.max(np.abs(audio))

def MFCC(signal, sample_rate):
    pre_emphasis = 0.97
    emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
    frame_size = 0.025
    frame_stride = 0.01
    frame_length, frame_step = int(round(frame_size * sample_rate)), int(round(frame_stride * sample_rate))
    signal_length = len(emphasized_signal)
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))
    pad_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(emphasized_signal, z)
    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \
              np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    frames *= np.hamming(frame_length)
    NFFT = 512
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))
    pow_frames = (1.0 / NFFT) * ((mag_frames) ** 2)
    nfilt = 40
    low_freq_mel = 0
    high_freq_mel = 2595 * np.log10(1 + (sample_rate / 2) / 700)
    mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2)
    hz_points = 700 * (10 ** (mel_points / 2595) - 1)
    bin = np.floor((NFFT + 1) * hz_points / sample_rate)
    fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])
        f_m = int(bin[m])
        f_m_plus = int(bin[m + 1])
        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    filter_banks = np.dot(pow_frames, fbank.T)
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)
    filter_banks = 20 * np.log10(filter_banks)
    mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1:14]
    return mfcc

def process_video(input_path):
    cap = cv2.VideoCapture(input_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
    out = cv2.VideoWriter(temp_output, fourcc, 16.0, (224, 224))

    audio_clip = AudioFileClip(input_path)
    temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
    audio_clip.write_audiofile(temp_wav, verbose=False, logger=None)
    fs, Audiodata = wavfile.read(temp_wav)
    Audiodata = normalize_audio(Audiodata)

    step = int(len(Audiodata) / 9)
    tx = np.arange(0, len(Audiodata), step)

    for i in range(8):
        signal = Audiodata[tx[i]:tx[i + 1]]
        mfcc = MFCC(signal, fs)
        fig, ax = plt.subplots(figsize=(4, 4))
        ax.matshow(np.transpose(mfcc), interpolation="nearest", aspect="auto", origin="lower")
        plt.axis('off')
        temp_img = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg").name
        plt.savefig(temp_img)
        plt.close()
        img = Image.open(temp_img).resize((224, 224))
        out.write(np.array(img))
        os.remove(temp_img)

    frame_count = 0
    while cap.isOpened() and frame_count < 8:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        out.write(frame)
        frame_count += 1

    cap.release()
    out.release()
    os.remove(temp_wav)
    return temp_output

def read_video(file_path):
    vr = VideoReader(file_path)
    frames = vr.get_batch(range(len(vr))).asnumpy()
    return format_frames(frames, output_size=(224, 224))

def format_frames(frames, output_size):
    frames = tf.image.convert_image_dtype(frames, tf.float32)
    frames = tf.image.resize(frames, size=list(output_size))
    return frames

def load_video_tensor(file_path):
    video = read_video(file_path)
    video = tf.convert_to_tensor(video, dtype=tf.float32)
    video = tf.expand_dims(video, axis=0)
    return video

def predict_emotion(video_tensor):
    predictions = model(video_tensor)
    predicted_label = tf.argmax(predictions, axis=-1).numpy()[0]
    return uc_id2label[predicted_label]

# === Streamlit Interface ===
st.set_page_config(page_title="🎥 Real-Time Emotion Detection", layout="centered")
st.title("🎥 Real-Time Emotion Detection System")
st.markdown("Upload a video file (preferably `.avi`) to detect emotions.")

uploaded_file = st.file_uploader("📁 Upload a video", type=["avi", "mp4", "mov"])

if uploaded_file:
    temp_input_path = tempfile.NamedTemporaryFile(delete=False, suffix=".avi").name
    with open(temp_input_path, "wb") as f:
        f.write(uploaded_file.read())

    st.info("🔄 Processing video and extracting MFCC + frames...")
    processed_path = process_video(temp_input_path)

    st.video(processed_path)

    if st.button("🧠 Detect Emotion"):
        st.info("Running inference...")
        video_tensor = load_video_tensor(processed_path)
        emotion = predict_emotion(video_tensor)
        st.success(f"Detected Emotion: **{emotion.upper()}**")
