# **Evaluation of finetuned pre trained models with sentiment analysis as example**

In [1]:
# Importing the necessary libraries
import gradio as gr
from google.colab import drive
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding,  Trainer
from datasets import load_dataset, Dataset
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support, precision_score
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import altair as alt

# List of pretrained finetuned models
model_list = ["FT:distilbert-base-uncased-finetuned-sst-2-english", "FT:english-yelp-sentiment", "logistic-regression" ]


# Function to load the pretrained models and evaluate them
def pre_trained_model(dropdown=[]):
    drive.mount('/content/drive')

    path = ""

    if dropdown == "FT:distilbert-base-uncased-finetuned-sst-2-english":
      path = "/content/drive/My Drive/Mein_Modellverzeichnis"
    elif dropdown == "FT:english-yelp-sentiment":
      path= "/content/drive/My Drive/Mein_Modellverzeichnis_2"
    elif dropdown == "logistic-regression":
      return evaluate_logistic_regression()
    

    tokenizer = AutoTokenizer.from_pretrained(path)
    model = AutoModelForSequenceClassification.from_pretrained(path)

    
    URL_test = "https://raw.githubusercontent.com/laurenzbrahner/BigDataTask2/main/data/Sentiment_Test.csv"
    URL_training = "https://raw.githubusercontent.com/laurenzbrahner/BigDataTask2/main/data/Sentiment_training_extended.csv"
    URL_validation = "https://raw.githubusercontent.com/laurenzbrahner/BigDataTask2/main/data/Sentiment_Val.csv"

    # Load the CSV files from the URLs
    df_train = pd.read_csv(URL_training, sep=";")
    df_test = pd.read_csv(URL_test, sep=";")
    df_val = pd.read_csv(URL_validation, sep=";")

   # 0-3 vs 5 Star Binary mapping
    star_mapping = {
        0: 0,
        1: 0,
        2: 0,
        3: 0,
        4: 1
    }

    df_train['label'] = df_train['label'].map(star_mapping)
    df_test['label'] = df_test['label'].map(star_mapping)
    df_val['label'] = df_val['label'].map(star_mapping)



    raw_datasets = {}
    raw_datasets['train'] = Dataset.from_pandas(df_train)
    raw_datasets['test'] = Dataset.from_pandas(df_test)
    raw_datasets['val'] = Dataset.from_pandas(df_val)

    # Tokenize the datasets
    tokenizer_1 = tokenizer

    def tokenize_function(examples):
        return tokenizer_1(examples["text"], truncation=True, padding="max_length")



    # use tokenize_function on each dataset to tokenize the datasets
    tokenized_datasets = {x: raw_datasets[x].map(tokenize_function, batched=True) for x in raw_datasets}

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")
    trainer = Trainer(model=model, data_collator=data_collator)

    predictions = trainer.predict(tokenized_datasets["val"])

    preds = np.argmax(predictions.predictions, axis=-1)



    # Calculate Confusion Matrix
    cm = confusion_matrix(predictions.label_ids, preds)

    # map Confusion Matrix Labels as Strings 
    labels = ["less than 5 stars", "5 stars"]

    # visualize Confusion Matrix 
    # Erstellen eines Figure- und Axes-Objekts
    fig, ax = plt.subplots(figsize=(10, 7))

    # Generate Heatmap-Plot
    sns.heatmap(cm, annot=True, fmt='g', xticklabels=labels, yticklabels=labels, ax=ax)

    # Matrix labels
    ax.set_xlabel('predicted classes')
    ax.set_ylabel('actual classes')

    # Speichern des Figure-Objekts in einer Variablen
    heatmap_plot = fig

    true_labels = predictions.label_ids

  # Predictions
    preds = np.argmax(predictions.predictions, axis=-1)

  # Accuracy
    accuracy = accuracy_score(true_labels, preds)

  # Error Rate
    error_rate = 1 - accuracy

  # Precision, Recall, F1-Measure, and Support (we won't use support here)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, preds, average='macro')


    


    # Return the metrics
    metrics_data = {
        "Metric": ["Accuracy", "Error Rate", "Precision", "Recall", "F1-Measure"],
        "Value": [accuracy, error_rate, precision, recall, f1]
    }
    metrics_df = pd.DataFrame(metrics_data)


    return metrics_df, heatmap_plot;


# Function to evaluate the logistic regression model
def evaluate_logistic_regression():
    URL_test = "https://raw.githubusercontent.com/laurenzbrahner/BigDataTask2/main/data/Sentiment_Test.csv"
    URL_training = "https://raw.githubusercontent.com/laurenzbrahner/BigDataTask2/main/data/Sentiment_training_extended.csv"
    URL_validation = "https://raw.githubusercontent.com/laurenzbrahner/BigDataTask2/main/data/Sentiment_Val.csv"

    # Load the CSV files from the URLs
    df_train = pd.read_csv(URL_training, sep=";")
    df_test = pd.read_csv(URL_test, sep=";")
    df_val = pd.read_csv(URL_validation, sep=";")

    # 0-3 vs 5 Star Binary mapping
    star_mapping = {
        0: 0,
        1: 0,
        2: 0,
        3: 0,
        4: 1
    }

    df_train['label'] = df_train['label'].map(star_mapping)
    df_test['label'] = df_test['label'].map(star_mapping)
    df_val['label'] = df_val['label'].map(star_mapping)

    X_train = df_train['text'].values
    y_train = df_train['label'].values
    X_val = df_val['text'].values
    y_val = df_val['label'].values

    # CountVectorizer
    vectorizer = CountVectorizer()
    vectorizer.fit(X_train)

    # Transform data
    X_train = vectorizer.transform(X_train)
    X_test = vectorizer.transform(df_test["text"].values)
    X_val = vectorizer.transform(X_val)

    # Logistic Regression
    lr = LogisticRegression(max_iter=3500)
    lr.fit(X_train, y_train)

    # Prediction
    y_pred = lr.predict(X_test)

    # Calculate Confusion Matrix
    cm = confusion_matrix(df_test["label"], y_pred)

    # Visualize Confusion Matrix
    labels = ["less than 5 stars", "5 stars"]
    fig, ax = plt.subplots(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='g', xticklabels=labels, yticklabels=labels, ax=ax)
    ax.set_xlabel('Predicted Classes')
    ax.set_ylabel('Actual Classes')
    heatmap_plot = fig

    # Calculate Metrics
    accuracy = accuracy_score(df_test["label"], y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(df_test["label"], y_pred, average='macro')

    # Return the metrics
    metrics_data = {
        "Metric": ["Accuracy", "Precision", "Recall", "F1-Measure"],
        "Value": [accuracy, precision, recall, f1]
    }
    metrics_df = pd.DataFrame(metrics_data)

    return metrics_df, heatmap_plot



# Create the dropdown menu
dropdown = gr.Dropdown(model_list, label="Choose a pretrained model to view its evaluation measures.")


# Create the interface
demo = gr.Interface(
    fn=pre_trained_model,
    inputs=dropdown,
    outputs=[gr.Dataframe(label="Measures"), gr.Plot(label="Barplot")], 
    title="Fine-Tuned-Models -- evaluation and comparison",
    allow_flagging="never"
    )

# Launch the interface
demo.launch(debug=True)






ModuleNotFoundError: No module named 'gradio'