- **TEST CASE LEADERBOARD: performance on specific datasets and their computational efficiency**

    1. Load the data from `sentiment_test_cases.csv`.

    2. Model Selection        

        * Accuracy, Precision, Recall, 
        * Weighted F1 Score For Multi Classication Models
        * Computational Efficiency
        

In [None]:
import time
import pandas as pd
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

import warnings
# Suppress the warning message
warnings.filterwarnings("ignore")

class SentimentAnalyzer:
    
    def __init__(self):
        self.sentiment_mapping = {
            "positive": ["positive", "POS", "LABEL_2"],
            "neutral": ["neutral", "NEU", "LABEL_1"],
            "negative": ["negative", "NEG", "LABEL_0"]
        }
    
    def standardize_sentiment_label(self, label):
        """
        This function standardizes the sentiment label according to the specified mapping.
        
        :param label: The input sentiment label to standardize.
        :return: The standardized sentiment label.
        """
        
        for key, value in self.sentiment_mapping.items():
            if label.lower() in [x.lower() for x in value]:
                return key
        return label

    def sentiment_analysis(self, df, path, model_selection_list):
        """
        This function performs sentiment analysis on the input DataFrame using the specified models.
        
        :param df: The input DataFrame containing the text to analyze.
        :param path: The path to the downloaded repository containing the models.
        :param model_selection_list: A list of model names to use for sentiment analysis.
        """
        
        # Create an empty list to store the computational efficiency results
        computational_efficiency = []

        # Iterate over the list of models
        for model_name in model_selection_list:
            # Start the timer
            start_time = time.time()

            # Define the path to the downloaded repository for the current model
            repo_path = f"{path}/{model_name}"
    
            # Load the tokenizer and model
            tokenizer = AutoTokenizer.from_pretrained(repo_path)
            model = AutoModelForSequenceClassification.from_pretrained(repo_path)
           
            # Define a function to perform sentiment analysis on a single text
            def analyze_text(text):
                """
                This function performs sentiment analysis on a single text using the specified model.
                
                :param text: The input text to analyze.
                :return: A tuple containing the predicted label and confidence score.
                """
                
                # Tokenize the input text
                inputs = tokenizer(text, return_tensors="pt")
                
                # Make a prediction using the model
                with torch.no_grad():
                    outputs = model(**inputs)
                
                # Get the predicted label and confidence score
                predicted_label = outputs.logits.argmax(dim=-1).item()
                confidence_score = outputs.logits.softmax(dim=-1).max().item()
                
                # Convert the predicted label to a string and standardize it
                label_str = self.standardize_sentiment_label(model.config.id2label[predicted_label])
                
                return label_str, round(confidence_score * 100, 2)

            # Apply the sentiment analysis function to each row of the input DataFrame
            output_df = df.copy()
            output_df[["model_output", "confidence_score"]] = output_df["text"].apply(lambda x: pd.Series(analyze_text(x)))

            # Reorder the columns of the output DataFrame
            output_df = output_df[["text", "expected_sentiment", "model_output", "confidence_score"]]

            # Save the resulting DataFrame to a CSV file
            output_df.to_csv(f"../dataset/output/output_{model_name}_sentiment_test.csv", index=False)

            # Stop the timer and calculate the elapsed time
            end_time = time.time()
            elapsed_time = end_time - start_time

            # Append the current model's computational efficiency to the list
            computational_efficiency.append({"model": model_name, "time": elapsed_time})

        # Convert the computational efficiency results to a DataFrame and save it to a CSV file
        pd.DataFrame(computational_efficiency).to_csv("../dataset/output/computational_efficiency.csv", index=False)

# Load the dataset
df = pd.read_csv('../dataset/test_dataset/sentiment_test_cases.csv')

# Create a bar chart of the expected_sentiment column
df['expected_sentiment'].value_counts().plot(kind='bar')

# Add labels and title
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.title('Distribution of Expected Sentiment')

# Show the plot
plt.show()

# Define the path to the downloaded repository containing the models
path = "../models"

# Define the list of models to use for sentiment analysis
model_selection_list = [
    "twitter-roberta-base-sentiment-latest",
    "bertweet-base-sentiment-analysis",
    "twitter-xlm-roberta-base-sentiment",
    "twitter-roberta-base-sentiment"
]

# Create an instance of SentimentAnalyzer and apply sentiment analysis to the input DataFrame using each model in the list
analyzer = SentimentAnalyzer()
analyzer.sentiment_analysis(df, path, model_selection_list)



# Use the model `twitter-roberta-base-sentiment-latest`


Generate final output requirements for the task

In [5]:
import os
import shutil

# List of files to be copied
files_to_copy = ['output_twitter-roberta-base-sentiment-latest_sentiment_test.csv', 'computational_efficiency.csv', 'initial_benchmark.csv']

src_dir = '../dataset/output'
des_dir = '../results/output'

# Create destination directory if it doesn't exist
if not os.path.exists(des_dir):
    os.makedirs(des_dir)

# Copy specified files from source to destination
for filename in files_to_copy:
    src = os.path.join(src_dir, filename)
    des = os.path.join(des_dir, filename)
    if os.path.exists(src):
        shutil.copy(src, des)

# Rename a specific file
src = '../results/output/output_twitter-roberta-base-sentiment-latest_sentiment_test.csv'
des = '../results/output/output_sentiment_test.csv'
if os.path.exists(src):
    os.rename(src, des)
