In [1]:
import torch
import gradio as gr
import torch.nn as nn
import torch.nn.functional as F
from transformers import RobertaTokenizer, RobertaModel, BertTokenizer, BertModel
from torch.utils.data import DataLoader, Dataset
from datasets import load_dataset
from pytorch_metric_learning.losses import NTXentLoss, SupConLoss, CrossBatchMemory
from sklearn.metrics import adjusted_mutual_info_score, adjusted_rand_score, fowlkes_mallows_score
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import pairwise_distances
import numpy as np
import umap.umap_ as umap
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from tqdm import tqdm
from timeit import default_timer as dtimer

In [2]:
emotions_list = [
    'admiration',
    'amusement',
    'anger',
    'annoyance',
    'approval',
    'caring',
    'confusion',
    'curiosity',
    'desire',
    'disappointment',
    'disapproval',
    'disgust',
    'embarrassment',
    'excitement',
    'fear',
    'gratitude',
    'grief',
    'joy',
    'love',
    'nervousness',
    'optimism',
    'pride',
    'realization',
    'relief',
    'remorse',
    'sadness',
    'surprise',
    'neutral']

In [3]:
#device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'

In [4]:
# Contrastive RoBERTa model with projection head
class RoBertaEmo(nn.Module):
    def __init__(self, pretrained_model_name='roberta-base', projection_dim=64, drop_out_rate=0.1):
        super(RoBertaEmo, self).__init__()
        self.encoder = RobertaModel.from_pretrained(pretrained_model_name)
        self.projection = nn.Sequential(
            nn.Linear(self.encoder.config.hidden_size, projection_dim),
            nn.ReLU(),
            nn.Dropout(p=drop_out_rate),
            nn.Linear(projection_dim, projection_dim)
        )
        #self.projection = nn.Linear(self.encoder.config.hidden_size, projection_dim)

    def forward(self, input_ids, attention_mask):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        cls_embedding = outputs.last_hidden_state[:, 0]  # [CLS] token
        projection = self.projection(cls_embedding)
        return F.normalize(projection, p=2, dim=1), cls_embedding

In [5]:
class EmotionClassifier(nn.Module):
    def __init__(self, input_features, num_classes, drop_out_rate=0.5):
        super(EmotionClassifier, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_features, 384),
            nn.ReLU(),
            nn.BatchNorm1d(384),
            nn.Dropout(p=drop_out_rate),
            nn.Linear(384, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(p=drop_out_rate),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(p=drop_out_rate),
            nn.Linear(128, num_classes),
        )
        
    def forward(self, x):
        x = self.layers(x)
        return x

In [6]:
robertaEmoModel = RoBertaEmo()
robertaEmoModel.load_state_dict(torch.load('RoBertaEmoElastic.pth', map_location=device, weights_only=True))

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<All keys matched successfully>

In [7]:
input_features = 768
num_classes = 27

In [8]:
emoClassifer = EmotionClassifier(input_features, num_classes)
emoClassifer.load_state_dict(torch.load('RoBertaEmoClassifier.pth', map_location=device, weights_only=True))

<All keys matched successfully>

In [9]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")



In [10]:
def predict(sentence):
    """
        Predicts Emotion in
    
        Args:
          sentence : English sentence for which primary emotion will be predicted
    
        Returns:
          Predicted primary emotion in string format
        """
    start_time = dtimer() 
    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        output_model = robertaEmoModel.encoder(**inputs)

    embeddings = output_model.last_hidden_state[:, 0]

    emoClassifer.eval()
    with torch.inference_mode():
        outputs = emoClassifer(embeddings)
        
    _, predicted = torch.max(outputs.data, 1)
    predicted = int(predicted)
    predicted_emotion = emotions_list[predicted]
    total_time = round(dtimer() - start_time, 2)
    return predicted_emotion, total_time

In [11]:
title = "RobertaEmo based Emotion classifier"
description = "Predicts primary emotion in an english sentence"
article = "Created for CS 626 - Course Project"

In [12]:
# Create the Gradio demo
demo = gr.Interface(fn=predict, 
                    inputs=gr.Text(label="English Sentence"), 
                    outputs=[gr.Text(label="Predicted Primary Emotion"), 
                             gr.Number(label="Prediction time (seconds)")],
                    title=title,
                    description=description,
                    article=article)

In [13]:
# Launch the demo!
demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://6a999ab31a3270dd4f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


