In [None]:
import sys
import torch
import speech_recognition as sr
from transformers import BertTokenizer, BertForSequenceClassification
import pygame

# Initialize pygame
pygame.init()

# Load pre-trained model and tokenizer
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

# Define the recognizer
recognizer = sr.Recognizer()

# Function to start recording
def start_recording():
    # Record audio from the microphone for 10 seconds
    print("Recording...")
    with sr.Microphone() as source:
        recognizer.adjust_for_ambient_noise(source)  # Adjust for ambient noise
        audio_data = recognizer.record(source, duration=2)

    # Recognize speech using Google Speech Recognition
    try:
        text = recognizer.recognize_google(audio_data)
        print("Text:", text)
        #emo = predict_emotion(text)
        predicted_label = predict_emotion(text)
        print(predicted_label)
        recorded_text = f"Recorded Text: {text}"
        predicted_text = f"Predicted Emotion: {predicted_label}"
        print(predicted_text)
        draw_text(recorded_text, predicted_label)
    except sr.UnknownValueError:
        print("Google Speech Recognition could not understand the audio.")
        draw_text("Google Speech Recognition could not understand the audio.", "")
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))
        draw_text("Could not request results from Google Speech Recognition service.", "")

# Function to predict emotion from text
def predict_emotion(text):
    # Tokenize input text
    tokens = tokenizer(text, return_tensors="pt")

    # Perform inference
    with torch.no_grad():
        outputs = model(**tokens)

    # Get predicted label
    predicted_label = torch.argmax(outputs.logits, dim=1).item()

    # Map predicted label to emotion
    emotion_mapping = {
        0: "SAD",
        1: "SAD",
        2: "Neutral",
        3: "HAPPY",
        4: "Excited"
    }

    # Return predicted emotion
    return emotion_mapping.get(predicted_label, "Unknown")

# Function to draw text on the screen
def draw_text(recorded_text, predicted_text):
    screen.fill((255, 255, 255))
    font = pygame.font.Font(None, 30)
    text_surface1 = font.render(recorded_text, True, (0, 0, 0))
    text_surface2 = font.render(predicted_text, True, (0, 0, 0))
    screen.blit(text_surface1, (50, 50))
    screen.blit(text_surface2, (50, 100))
    pygame.display.flip()

# Set up the screen
WIDTH, HEIGHT = 600, 200
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Emotion Recognition")

# Set up the button
button_font = pygame.font.Font(None, 36)
button_text = button_font.render("Start Recording", True, (0, 0, 0))
button_rect = button_text.get_rect(center=(WIDTH // 2, HEIGHT // 2))

# Main loop
running = True
while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        elif event.type == pygame.MOUSEBUTTONDOWN:
            if button_rect.collidepoint(event.pos):
                start_recording()

    # Clear the screen
    screen.fill((255, 255, 255))

    # Draw the button
    pygame.draw.rect(screen, (200, 200, 200), button_rect)
    pygame.draw.rect(screen, (0, 0, 0), button_rect, 2)
    screen.blit(button_text, button_rect.topleft)

    # Update the display
    pygame.display.flip()

# Quit pygame
pygame.quit()
sys.exit()