In [None]:
import gradio as gr
from google.colab import drive
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding,  Trainer
from datasets import load_dataset, Dataset
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import altair as alt

# List of pretrained models
model_list = ["rubert-base-cased-finetuned", "DeBERTa-v3-base-finetuned", "rubert-base-cased-not_finetuned", "DeBERTa-v3-base-not_finetuned" ]

# Function to load the pretrained model and evaluate it on the validation set
def pre_trained_model(dropdown=[]):
    drive.mount('/content/drive')

    path = ""
    
    if dropdown == "rubert-base-cased-finetuned":
      path = "/content/drive/My Drive/Mein_Modellverzeichnis_1"
    elif dropdown == "DeBERTa-v3-base-finetuned":
      path= "/content/drive/My Drive/Mein_Modellverzeichnis_2"
    elif dropdown == "rubert-base-cased-not_finetuned":
      path="cointegrated/rubert-base-cased-nli-threeway"
    elif dropdown == "DeBERTa-v3-base-not_finetuned":
      path="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
      
    

    tokenizer = AutoTokenizer.from_pretrained(path)

    model = AutoModelForSequenceClassification.from_pretrained(path, num_labels=8, ignore_mismatched_sizes=True)

    # Load the data
    URL_test = "https://raw.githubusercontent.com/laurenzbrahner/BigDataTask2/main/data/Recipes_Test.csv"
    URL_training = "https://raw.githubusercontent.com/laurenzbrahner/BigDataTask2/main/data/Recipes_Training.csv"
    URL_validation = "https://raw.githubusercontent.com/laurenzbrahner/BigDataTask2/main/data/Recipes_Validation.csv"

    # Load the CSV files from the URLs
    df_train = pd.read_csv(URL_training, sep=";")
    df_test = pd.read_csv(URL_test, sep=";")
    df_val = pd.read_csv(URL_validation, sep=";")

    # Map the cuisines to numbers
    cuisine_mapping = {
        "cajun_creole": 0,
        "chinese": 1,
        "french": 2,
        "indian": 3,
        "italian": 4,
        "mexican": 5,
        "southern_us": 6,
        "thai": 7
    }

    #df_train['cuisine'] = df_train['cuisine'].map(cuisine_mapping)
    df_test['cuisine'] = df_test['cuisine'].map(cuisine_mapping)
    #df_val['cuisine'] = df_val['cuisine'].map(cuisine_mapping)


    # Load the datasets
    raw_datasets = {}
    raw_datasets['train'] = Dataset.from_pandas(df_train)
    raw_datasets['test'] = Dataset.from_pandas(df_test)
    raw_datasets['val'] = Dataset.from_pandas(df_val)

    # Load the model and the tokenizer
    checkpoint = model
    tokenizer_1 = tokenizer

    # Tokenize the ingredients and add the cuisines as labels
    def tokenize_function(examples):
        # Tokenisieren der Zutaten und Hinzufügen der 'cuisines' als Labels
        tokenized_inputs = tokenizer_1(examples["ingredients"], truncation=True, padding="max_length")
        tokenized_inputs["labels"] = examples["cuisine"]
        return tokenized_inputs



    # Tokenize the datasets
    tokenized_datasets = {x: raw_datasets[x].map(tokenize_function, batched=True) for x in raw_datasets}

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")
    trainer = Trainer(model=model, data_collator=data_collator)

    predictions = trainer.predict(tokenized_datasets["val"])

    preds = np.argmax(predictions.predictions, axis=-1)



    # Confusion Matrix calculate
    cm = confusion_matrix(predictions.label_ids, preds)

    # Labels for Mapping
    labels = ["cajun_creole", "chinese", "french", "indian", "italian", "mexican", "southern_us", "thai"]

    # Confusion Matrix visualisieren
    # Create a figure and an axes
    fig, ax = plt.subplots(figsize=(10, 7))

    # Create the heatmap
    sns.heatmap(cm, annot=True, fmt='g', xticklabels=labels, yticklabels=labels, ax=ax)

    # laebl the axes
    ax.set_xlabel('predicted classes')
    ax.set_ylabel('actual classes')

    # save in variable
    heatmap_plot = fig

    true_labels = predictions.label_ids

    # Predictions
    preds = np.argmax(predictions.predictions, axis=-1)

    # Accuracy
    accuracy = accuracy_score(true_labels, preds)

    # Error Rate
    error_rate = 1 - accuracy

    # Precision, Recall, F1-Measure, and Support (we won't use support here)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, preds, average='macro')

    

    


    # DataFrame  
    metrics_data = {
        "Metric": ["Accuracy", "Error Rate", "Precision", "Recall", "F1-Measure"],
        "Value": [accuracy, error_rate, precision, recall, f1]
    }
    metrics_df = pd.DataFrame(metrics_data)


    # Confusion Matrix 
    cm = confusion_matrix(predictions.label_ids, preds)

    # Labels for Mapping
    labels = ["cajun_creole", "chinese", "french", "indian", "italian", "mexican", "southern_us", "thai"]

    
    data = []

    # find the most frequent misclassification per class
    for i in range(cm.shape[0]):
        row = cm[i].copy()
        row[i] = 0
        max_value = np.max(row)
        
        if max_value > 0:
            j = np.argmax(row)
            data.append({"Actual": labels[i], "Predicted": labels[j], "Count": max_value})

    # convert to DataFrame
    df = pd.DataFrame(data)

    # sort by count
    df_sorted = df.sort_values(by='Count', ascending=False)

    # Altair Plot
    chart = alt.Chart(df_sorted).mark_bar().encode(
        x=alt.X('Actual:N', sort='-y', title="Actual Class"),
        y=alt.Y('Count:Q', title="Error Count"),
        color=alt.Color(field='Predicted', type='nominal',legend=alt.Legend(title='Predicted Class')),
        tooltip=['Actual', 'Predicted', 'Count']
    ).properties(
        width=600,
        height=400,
        title="Most frequent misclassification per Class"
    ).configure_title(
        fontSize=25,
        anchor='start'
    ).configure_axis(
        labelFontSize=12,
        titleFontSize=24,
        titleColor='gray',
        labelColor='gray',
        titlePadding=7,
        grid=False
    ).configure_view(
        strokeWidth=0,
    ).configure_axisX(
        labelAngle=0,
        titleAnchor='start'
    ).configure_axisY(
        grid=False,
        titleAnchor='end',
        titleFontSize=20
    )


    return metrics_df, heatmap_plot, chart




  



dropdown = gr.Dropdown(model_list, label="Choose a pretrained model to view its evaluation measures")


# Interface
demo = gr.Interface(
    fn=pre_trained_model,
    inputs=dropdown,
    outputs=[gr.Dataframe(label="Metrics"), gr.Plot(label="Heatmap"), gr.Plot(label=" Misclasification Barplot")], 
    title="Fine-Tuned-Models -- evaluation and comparison",
    allow_flagging="never"

    )


demo.launch()




