In [5]:
import cv2
import numpy as np
import threading
import tkinter as tk
from tkinter import messagebox
import time
import torch
import pyaudio
import wave
import speech_recognition as sr
import pandas as pd
import random
import re
from collections import Counter
from transformers import ElectraForSequenceClassification, AutoTokenizer
from tensorflow.keras.models import load_model

# 비디오 모델 로드
video_model = load_model('MPANET.h5')

# 텍스트 모델 및 토크나이저 로드
num_classes = 4
text_model = ElectraForSequenceClassification.from_pretrained("monologg/koelectra-base-v3-discriminator", num_labels=num_classes)
tokenizer = AutoTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")
text_model.load_state_dict(torch.load("model.pt"))

# 감정 레이블
emotion_labels = ['anger', 'happiness', 'panic', 'sadness']

# 점수 초기화
emotion_score, text_score, keyword_score, total_score = 10, 0, 10, 0

# 웹캠 및 마이크 설정
recording, frames = False, []

# PyAudio 설정
FORMAT, CHANNELS, RATE, CHUNK = pyaudio.paInt16, 1, 44100, 2048
WAVE_OUTPUT_FILENAME = "output.wav"
audio = pyaudio.PyAudio()

# CSV 파일 읽기 및 질문/키워드 설정
df = pd.read_csv('Software Questions.csv')
questions, keywords_list = df['question'].tolist(), df['keyword'].tolist()
random_question, random_keywords = "", []

def select_random_question():
    global random_question, random_keywords
    random_index = random.randint(0, len(questions) - 1)
    random_question, random_keywords = questions[random_index], keywords_list[random_index].split('/')

# 초기 질문과 키워드 선택
select_random_question()

def record_video_and_audio():
    global frames, recording
    cap = cv2.VideoCapture(0)
    while not cap.isOpened():
        time.sleep(0.1)
    
    messagebox.showinfo("Info", "로딩이 완료 되었습니다. 답변을 진행해주세요.")
    out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(*'XVID'), 20.0, (640, 480))
    stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)

    while recording:
        ret, frame = cap.read()
        if ret:
            out.write(frame)
            cv2.imshow('Recording', frame)
            frames.append(stream.read(CHUNK))
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    cap.release()
    out.release()
    stream.stop_stream()
    stream.close()
    cv2.destroyAllWindows()
    save_audio()
    analyze_video_and_audio()

def save_audio():
    with wave.open(WAVE_OUTPUT_FILENAME, 'wb') as wf:
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(audio.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))

def predict_emotion_video(frame):
    predictions = video_model.predict(np.reshape(cv2.resize(frame, (224, 224)) / 255.0, (1, 224, 224, 3)))
    return predictions

def analyze_video():
    global emotion_score
    cap = cv2.VideoCapture('output.avi')
    frame_interval, frame_count = int(cap.get(cv2.CAP_PROP_FPS) * 0.5), 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % frame_interval == 0:
            predictions = predict_emotion_video(frame)
            max_index, confidence = np.argmax(predictions), predictions[0][np.argmax(predictions)]
            if confidence >= 0.75:
                update_emotion_score(emotion_labels[max_index])
        frame_count += 1
    cap.release()
    update_video_counts()
    update_score()

def update_emotion_score(emotion):
    global emotion_score, video_emotion_counts
    video_emotion_counts[emotion] += 1
    emotion_score += 1 if emotion == 'happiness' else -0.5

def analyze_audio():
    recognizer = sr.Recognizer()
    global text_score, keyword_score
    with sr.AudioFile(WAVE_OUTPUT_FILENAME) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data, language='ko-KR')
            emotion = predict_text_emotion(text)
            keyword_counts = count_keywords(text, random_keywords)
            update_text_results(text, emotion, keyword_counts)

            # Calculate text score based on emotion
            text_score = {'happiness': 10, 'panic': 5, 'sadness': 5, 'anger': 0}.get(emotion, 0)

            # Calculate keyword score
            missed_keywords = len([k for k in random_keywords if keyword_counts[k] == 0])
            if len(random_keywords) > 0:
                penalty_per_keyword = 10 / len(random_keywords)
                keyword_score = max(0, 10 - missed_keywords * penalty_per_keyword)
            else:
                keyword_score = 10

            update_score()
        except (sr.UnknownValueError, sr.RequestError) as e:
            messagebox.showerror("Error", f"Could not process audio: {e}")


def analyze_video_and_audio():
    threading.Thread(target=analyze_video).start()
    threading.Thread(target=analyze_audio).start()

def count_keywords(text, keywords):
    return {keyword: len(re.findall(re.escape(keyword), text.lower())) for keyword in keywords}

def update_video_counts():
    for label in emotion_labels:
        video_count_labels[label].config(text=f'{label}: {video_emotion_counts[label]}')

def update_text_results(text, emotion, keyword_counts):
    result_text = f"Text: {text}\nEmotion: {emotion}\n\nKeyword Counts:\n" + "\n".join(f"{keyword}: {count}" for keyword, count in keyword_counts.items())
    text_result_label.config(text=result_text)

def update_score():
    global total_score
    total_score = emotion_score + text_score + keyword_score
    score_label.config(text=f"Emotion Score: {emotion_score:.1f}\nText Score: {text_score:.1f}\nKeyword Score: {keyword_score:.1f}\nTotal Score: {total_score:.1f}")


def predict_text_emotion(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    outputs = text_model(**inputs)
    return emotion_labels[torch.argmax(torch.nn.functional.softmax(outputs.logits, dim=-1))]

def start_recording():
    global recording, frames, emotion_score, text_score, keyword_score, video_emotion_counts
    recording, frames, emotion_score, text_score, keyword_score = True, [], 10, 0, 10
    video_emotion_counts = {label: 0 for label in emotion_labels}
    threading.Thread(target=record_video_and_audio).start()

def stop_recording():
    global recording
    recording = False

def change_question():
    select_random_question()
    question_label.config(text=f"Q: {random_question}")

def create_label(frame, text, font, fg, bg):
    label = tk.Label(frame, text=text, font=font, fg=fg, bg=bg)
    label.pack(pady=10)
    return label

def create_button(frame, text, command, font, fg, bg):
    button = tk.Button(frame, text=text, command=command, font=font, fg=fg, bg=bg)
    button.pack(pady=10)
    return button

def start_gui():
    root = tk.Tk()
    root.title("Interview Practice")
    root.geometry("800x1050")
    root.resizable(False, False)

    global question_label, score_label, text_result_label, video_count_labels
    chalk_font, fg_color, bg_color = ("Comic Sans MS", 12, "bold"), "#000000", "#FFFFFF"
    root.configure(bg=bg_color)

    def create_fixed_frame(root, width, height):
        frame = tk.Frame(root, padx=10, pady=10, bd=2, relief=tk.GROOVE, bg=bg_color, width=width, height=height)
        frame.pack_propagate(False)
        frame.pack(pady=10)
        return frame

    question_frame = create_fixed_frame(root, 760, 140)
    question_label = create_label(question_frame, f"Q: {random_question}", chalk_font, fg_color, bg_color)
    change_question_button = create_button(question_frame, "질문 변경", change_question, chalk_font, fg_color, bg_color)

    button_frame = create_fixed_frame(root, 760, 140)
    start_button = create_button(button_frame, "답변 시작", start_recording, chalk_font, fg_color, bg_color)
    stop_button = create_button(button_frame, "답변 종료", stop_recording, chalk_font, fg_color, bg_color)

    result_frame = create_fixed_frame(root, 760, 700)
    create_label(result_frame, "얼굴 감정 분석:", chalk_font, fg_color, bg_color)
    video_count_labels = {label: create_label(result_frame, f'{label}: 0', chalk_font, fg_color, bg_color) for label in emotion_labels}

    create_label(result_frame, "음성 감정 분석:", chalk_font, fg_color, bg_color)
    text_result_label = create_label(result_frame, "", chalk_font, fg_color, bg_color)
    text_result_label.config(wraplength=500, justify="left")

    score_label = create_label(result_frame, "Emotion Score: 0\nText Score: 0\nKeyword Score: 0\nTotal Score: 0", chalk_font, fg_color, bg_color)

    root.mainloop()

if __name__ == "__main__":
    start_gui()


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


