# Generative AI - Plots

## Libraries

In [60]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress

## Load Data

In [61]:
# Load the Excel file
path = "Training_GenAI.xlsx"
df = pd.read_excel(path)

In [62]:
# Save cleaned dataset to excel file
df.to_excel('GenAI Experiments/Training_GenAI.xlsx', index=False, sheet_name='Data', engine='openpyxl')

## Análisis General

In [63]:
# Define the list of labels and colors
labels = ['Comentario Positivo', 'Comentario Negativo']
colors = ['darkgreen', 'darkred']

In [None]:
# Filter dataset for "Análisis General"
ageneral_df = df[df["Type"] == "analisis_general"]
len(ageneral_df)

In [None]:
# Rename columns to categorical: {'Comentario Positivo': 0, 'Comentario Negativo': 1}
mapping = {'Comentario Positivo': 0, 'Comentario Negativo': 1}

# Create a dictionary for renaming
rename_dict = {}
for key, value in mapping.items():
    rename_dict[f'{value}_precision'] = f'{key}_precision'
    rename_dict[f'{value}_recall'] = f'{key}_recall'
    rename_dict[f'{value}_f1-score'] = f'{key}_f1-score'
    rename_dict[f'{value}_support'] = f'{key}_support'

# Rename the columns
ageneral_df.rename(columns=rename_dict, inplace=True)

In [None]:
ageneral_df

### Windows

In [None]:
# Convert 'Timestamp' column to datetime if it's not already
ageneral_df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Sort the DataFrame by 'Timestamp' if needed
ageneral_df.sort_values('Timestamp', inplace=True)

# Resetting the index to ensure it's sequential
ageneral_df.reset_index(drop=True, inplace=True)

In [None]:
# Numeric columns
numeric_cols = ['Accuracy_Global', 'Std_Global', 'Time (s)', 'weighted avg_precision', 'weighted avg_recall', 'weighted avg_f1-score', 'Comentario Positivo_f1-score', 'Comentario Negativo_f1-score']

# Agrupar el DataFrame por context
df_grouped_context = ageneral_df.groupby(['Context'])[numeric_cols].mean().reset_index()

df_grouped_context


In [None]:
# Numeric columns
numeric_cols = ['Accuracy_Global', 'Std_Global', 'Time (s)', 'weighted avg_precision', 'weighted avg_recall', 'weighted avg_f1-score', 'Comentario Positivo_f1-score', 'Comentario Negativo_f1-score']

# Agrupar el DataFrame por prompt
df_grouped_prompt = ageneral_df.groupby(['Prompt'])[numeric_cols].mean().reset_index()

df_grouped_prompt


In [None]:
# Numeric columns
numeric_cols = ['Accuracy_Global', 'Std_Global', 'Time (s)', 'weighted avg_precision', 'weighted avg_recall', 'weighted avg_f1-score', 'Comentario Positivo_f1-score', 'Comentario Negativo_f1-score']

# Agrupar el DataFrame por prompt
df_grouped_prompt = ageneral_df.groupby(['Context', 'Prompt'])[numeric_cols].mean().reset_index()

df_grouped_prompt


#### F1-Score

In [None]:
# List of specific dates for vertical lines
specific_dates = ['2024-08-10 02:55:23', '2024-08-10 04:19:28', '2024-08-10 05:50:11', '2024-08-10 07:15:46', '2024-08-10 08:47:33', '2024-08-10 10:13:58', '2024-08-10 11:45:28', '2024-08-10 13:13:12', '2024-08-10 14:35:16']
specific_dates = pd.to_datetime(specific_dates)  # Convert to datetime if not already

# Find the indices of the specific dates in the DataFrame
specific_indices = ageneral_df[ageneral_df['Timestamp'].isin(specific_dates)].index.tolist()
specific_indices

In [None]:
plt.figure(figsize=(24, 8))


# Add a vertical line for best window
for date, index in zip(specific_dates, specific_indices):
    if index in [32]:
        plt.axvline(x=index, color='#1a2e49', linestyle='-', linewidth=3)
    else:
        plt.axvline(x=index, color='#1a2e49', linestyle='--', linewidth=2)

# Plotting the F1-Score as dots
plt.plot(ageneral_df.index, ageneral_df['weighted avg_f1-score'], linewidth=6, linestyle='-', marker='o', markersize=16, color='black', alpha=0.7, label='Weighted Avg. F1-Score')


# Calculate and plot trend line
slope, intercept, r_value, p_value, std_err = linregress(ageneral_df.index, ageneral_df['weighted avg_f1-score'])
trend = intercept + slope * ageneral_df.index
plt.plot(ageneral_df.index, trend, linewidth=2, color='darkred', linestyle='-', alpha=0.5, label=f'Trend Line (R² = {r_value**2:.2f})')


# Move legend to the top
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=2, fontsize=22)


# Setting labels and title
plt.title('Weighted Avg. F1-Score for "Análisis General"', fontsize=42)
plt.xlabel('Experiments', fontsize=30)
plt.ylabel('F1-Score', fontsize=30)
plt.gca().set_ylim(0.75, 0.85)
plt.gca().tick_params(axis='x', labelsize=22)
plt.gca().tick_params(axis='y', labelsize=22)

# Add a grid to improve readability
plt.grid(axis='y', linestyle='--', alpha=0.7)


# Remove spines for a clean look
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)

# Show the plot
plt.tight_layout()

# Save the plot as a PDF file
plt.savefig("../../../IMAGES/Análisis General/GenAI/GenAI_Experiments in _Análisis General_ weighted.pdf", format='pdf')

plt.show()

In [None]:
plt.figure(figsize=(24, 24))

# Add a vertical line for best window
for date, index in zip(specific_dates, specific_indices):
    if index in [32]:
        plt.axvline(x=index, color='#1a2e49', linestyle='-', linewidth=3)
    else:
        plt.axvline(x=index, color='#1a2e49', linestyle='--', linewidth=2)

# Plotting the F1-Score as dots
plt.plot(ageneral_df.index, ageneral_df['weighted avg_f1-score'], linewidth=9, linestyle='-', marker='o', markersize=18, color='black', alpha=0.7, label="Weighted Avg.")
for i, label in enumerate(labels):
    plt.plot(ageneral_df.index, ageneral_df[f'{label}_f1-score'], linewidth=7, linestyle='-', marker='o', markersize=16, color=colors[i], alpha=0.7, label=label)


# Calculate and plot trend line
for i in range(len(labels)):
    slope, intercept, r_value, p_value, std_err = linregress(ageneral_df.index, ageneral_df[f'{labels[i]}_f1-score'])
    trend = intercept + slope * ageneral_df.index
    plt.plot(ageneral_df.index, trend, linewidth=2, color=colors[i], linestyle='-', alpha=0.5)

plt.legend(loc='upper center', fontsize=22, bbox_to_anchor=(0.5, 0.99), ncol=3)


# Setting labels and title
plt.title('F1-Score per Class for "Análisis General"', fontsize=42)
plt.xlabel('Experiments', fontsize=30)
plt.ylabel('F1-Score', fontsize=30)
plt.ylim(0.63, 0.90)
plt.gca().tick_params(axis='x', labelsize=22)
plt.gca().tick_params(axis='y', labelsize=22)


# Remove spines for a clean look
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)



# Add a grid to improve readability
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Show the plot
plt.tight_layout()

# Save the plot as a PDF 
plt.savefig("../../../IMAGES/Análisis General/GenAI/GenAI_Experiments in _Análisis General_ f1score.pdf", format='pdf')

# Save the plot as a PNG file
plt.savefig("../../../IMAGES/Análisis General/GenAI/GenAI_Experiments in _Análisis General_ f1score.png", format='png', transparent=True)

plt.show()


In [None]:
# Create subplots
fig, axes = plt.subplots(len(labels), 1, figsize=(24, len(labels) * 4), sharex=True)

# Plot each balance in a separate subplot
for i, label in enumerate(labels):
    ax = axes[i]

    # Plot Weighted F1-Scores for each label
    ax.plot(ageneral_df.index, 
            ageneral_df[f'{label}_f1-score'], 
            linewidth=4, linestyle='-', marker='o', markersize=12, color=colors[i], alpha=0.7)
    
    
    for j in range(len(specific_indices) - 1):
        # Calculate and plot trend line for each window
        start_idx = specific_indices[j]
        end_idx = specific_indices[j + 1]
        
        window_indices = ageneral_df.index[start_idx:end_idx+1]
        slope, intercept, r_value, p_value, std_err = linregress(window_indices, ageneral_df[f'{label}_f1-score'][start_idx:end_idx+1])
        trend = intercept + slope * window_indices
        ax.plot(window_indices, trend, linewidth=2, color=colors[i], linestyle='-', alpha=0.5) # label=f'Trend Line W{j} (R² = {r_value**2:.2f})'
    
    ax.set_title(f'F1-Score for "{label}"', fontsize=20)
    ax.set_ylabel('F1-Score', fontsize=14)
    ax.grid(True)
    ax.legend(loc='lower right', fontsize=10)

    # Add vertical lines for specific indices
    for date, index in zip(specific_dates, specific_indices):
        # if index in [160, 184]:
        #     ax.axvline(x=index, color='#fd7b6e', linestyle='--', linewidth=2)
        # else:
            ax.axvline(x=index, color='#1a2e49', linestyle='--', linewidth=2)


# Set common labels
fig.text(0.5, 0.04, 'Experiments', ha='center', fontsize=18)

# Adjust layout
plt.tight_layout(rect=[0.03, 0.03, 1, 0.97])
plt.show()


### Context

In [None]:
# Set Seaborn style
from matplotlib.gridspec import GridSpec

sns.set(style="whitegrid")

# Calculate the upper limit for the y-axis dynamically
y_max = ageneral_df['weighted avg_f1-score'].mean() + 4 * ageneral_df['weighted avg_f1-score'].std()

# Create a figure with subplots using GridSpec
fig = plt.figure(figsize=(24, 8))
gs = GridSpec(1, 1, figure=fig)

# Plot for MODELS
ax1 = fig.add_subplot(gs[0, 0])
sns.barplot(ax=ax1, x='Context', y='weighted avg_f1-score', hue='Model', data=ageneral_df, palette='crest_r', alpha=0.7, ci=None)
ax1.set_title('Weighted Avg. F1-Score for Different Contexts', fontsize=30)
ax1.set_xlabel('', fontsize=18)
ax1.set_ylabel('F1-Score', fontsize=18)
ax1.tick_params(axis='x', labelsize=18)
ax1.tick_params(axis='y', labelsize=18)
ax1.set_ylim(0.5, y_max)

# Move legend to the top
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=4, fontsize=14.75)


# Adding grid for better readability
ax1.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Define the list of balances
contexts = ['context/context_0.txt', 'context/context_1.txt']

# Create subplots
fig, axes = plt.subplots(len(contexts), 1, figsize=(24, len(contexts) * 4), sharex=True)

# Initialize a list to store all lines for the legend
lines = []
labels_legend = []



# Plot each balance in a separate subplot
for i, context in enumerate(contexts):
    ax = axes[i]

    # Plot Weighted F1-Scores
    line1, = ax.plot(ageneral_df[ageneral_df['Context'] == context].index, 
            ageneral_df[ageneral_df['Context'] == context]['weighted avg_f1-score'], 
            linewidth=6, linestyle='-', marker='o', markersize=12, color='black', alpha=0.7, label=context)
    
    # Plot "Comentario Positivo" F1-Scores
    line2, = ax.plot(ageneral_df[ageneral_df['Context'] == context].index, 
            ageneral_df[ageneral_df['Context'] == context]['Comentario Positivo_f1-score'], 
            linewidth=4, linestyle='-', marker='o', markersize=9, color=colors[0], alpha=0.7, label=context)
    
    # Plot "Comentario Negativo" F1-Scores
    line3, =  ax.plot(ageneral_df[ageneral_df['Context'] == context].index, 
            ageneral_df[ageneral_df['Context'] == context]['Comentario Negativo_f1-score'], 
            linewidth=4, linestyle='-', marker='o', markersize=9, color=colors[1], alpha=0.7, label=context)
    
    ax.set_title(f'F1-Score for "{context}"', fontsize=20)
    ax.set_ylabel('F1-Score', fontsize=14)
    ax.set_ylim(0.6, 1)
    ax.grid(True)

# Append lines and labels only once
lines.extend([line1, line2, line3])
labels_legend.extend(['Weighted Avg. F1-Score', 'Comentario Positivo', 'Comentario Negativo'])

# Set common labels
fig.text(0.5, 0.02, 'Experiments', ha='center', fontsize=18)

# Add a single legend for all subplots
fig.legend(lines, labels_legend, loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=3, fontsize=14)

# Adjust layout
plt.tight_layout(rect=[0.03, 0.03, 1, 0.95])
plt.show()


### Prompt

In [None]:
# Set Seaborn style
from matplotlib.gridspec import GridSpec

sns.set(style="whitegrid")

# Calculate the upper limit for the y-axis dynamically
y_max = ageneral_df['weighted avg_f1-score'].mean() + 4 * ageneral_df['weighted avg_f1-score'].std()

# Create a figure with subplots using GridSpec
fig = plt.figure(figsize=(24, 8))
gs = GridSpec(1, 1, figure=fig)

# Plot for MODELS
ax1 = fig.add_subplot(gs[0, 0])
sns.barplot(ax=ax1, x='Prompt', y='weighted avg_f1-score', hue='Model', data=ageneral_df, palette='crest_r', alpha=0.7, ci=None)
ax1.set_title('Weighted Avg. F1-Score for Different Prompts', fontsize=30)
ax1.set_xlabel('', fontsize=18)
ax1.set_ylabel('F1-Score', fontsize=18)
ax1.tick_params(axis='x', labelsize=18)
ax1.tick_params(axis='y', labelsize=18)
ax1.set_ylim(0.5, y_max)

# Move legend to the top
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=4, fontsize=14.75)


# Adding grid for better readability
ax1.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Define the list of balances
prompts = ['prompts/analisis_general/prompt_0_EN.txt', 'prompts/analisis_general/prompt_0_ES.txt', 'prompts/analisis_general/prompt_1_EN.txt', 'prompts/analisis_general/prompt_1_ES.txt']

# Create subplots
fig, axes = plt.subplots(len(prompts), 1, figsize=(24, len(prompts) * 4), sharex=True)

# Initialize a list to store all lines for the legend
lines = []
labels_legend = []



# Plot each balance in a separate subplot
for i, prompt in enumerate(prompts):
    ax = axes[i]

    # Plot Weighted F1-Scores
    line1, = ax.plot(ageneral_df[ageneral_df['Prompt'] == prompt].index, 
            ageneral_df[ageneral_df['Prompt'] == prompt]['weighted avg_f1-score'], 
            linewidth=6, linestyle='-', marker='o', markersize=12, color='black', alpha=0.7, label=prompt)
    
    # Plot "Comentario Positivo" F1-Scores
    line2, = ax.plot(ageneral_df[ageneral_df['Prompt'] == prompt].index, 
            ageneral_df[ageneral_df['Prompt'] == prompt]['Comentario Positivo_f1-score'], 
            linewidth=4, linestyle='-', marker='o', markersize=9, color=colors[0], alpha=0.7, label=prompt)
    
    # Plot "Comentario Negativo" F1-Scores
    line3, =  ax.plot(ageneral_df[ageneral_df['Prompt'] == prompt].index, 
            ageneral_df[ageneral_df['Prompt'] == prompt]['Comentario Negativo_f1-score'], 
            linewidth=4, linestyle='-', marker='o', markersize=9, color=colors[1], alpha=0.7, label=prompt)
    
    ax.set_title(f'F1-Score for "{prompt}"', fontsize=20)
    ax.set_ylabel('F1-Score', fontsize=14)
    ax.set_ylim(0.6, 1)
    ax.grid(True)

# Append lines and labels only once
lines.extend([line1, line2, line3])
labels_legend.extend(['Weighted Avg. F1-Score', 'Comentario Positivo', 'Comentario Negativo'])

# Set common labels
fig.text(0.5, 0.02, 'Experiments', ha='center', fontsize=18)

# Add a single legend for all subplots
fig.legend(lines, labels_legend, loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=3, fontsize=14)

# Adjust layout
plt.tight_layout(rect=[0.03, 0.03, 1, 0.95])
plt.show()


### Performance (Time)

In [None]:
# Set Seaborn style
from matplotlib.gridspec import GridSpec

sns.set(style="whitegrid")

# Calculate the upper limit for the y-axis dynamically
y_max = ageneral_df['Time (s)'].mean() + 4 * ageneral_df['Time (s)'].std()

# Create a figure with subplots using GridSpec
fig = plt.figure(figsize=(24, 8))
gs = GridSpec(1, 1, figure=fig)

# Plot for MODELS
ax1 = fig.add_subplot(gs[0, 0])
sns.barplot(ax=ax1, x='Prompt', y='Time (s)', hue='Model', data=ageneral_df, palette='crest_r', alpha=0.7, ci=None)
ax1.set_title('Average Performance Time for Different Prompts', fontsize=30)
ax1.set_xlabel('', fontsize=18)
ax1.set_ylabel('Average Time (s)', fontsize=18)
ax1.tick_params(axis='x', labelsize=18)
ax1.tick_params(axis='y', labelsize=18)
ax1.set_ylim(0, y_max)

# Move legend to the top
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=4, fontsize=15)


# Adding grid for better readability
ax1.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Set Seaborn style
from matplotlib.gridspec import GridSpec

sns.set(style="whitegrid")

# Calculate the upper limit for the y-axis dynamically
y_max = ageneral_df['Time (s)'].mean() + 4 * ageneral_df['Time (s)'].std()

# Create a figure with subplots using GridSpec
fig = plt.figure(figsize=(24, 8))
gs = GridSpec(1, 1, figure=fig)

# Plot for MODELS
ax1 = fig.add_subplot(gs[0, 0])
sns.barplot(ax=ax1, x='Context', y='Time (s)', hue='Model', data=ageneral_df, palette='crest_r', alpha=0.7, ci=None)
ax1.set_title('Average Performance Time for Different Contexts', fontsize=30)
ax1.set_xlabel('', fontsize=18)
ax1.set_ylabel('Average Time (s)', fontsize=18)
ax1.tick_params(axis='x', labelsize=18)
ax1.tick_params(axis='y', labelsize=18)
ax1.set_ylim(0, y_max)

# Move legend to the top
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=5, fontsize=15)


# Adding grid for better readability
ax1.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Calculate the correlation matrix
correlation_matrix = ageneral_df[['weighted avg_f1-score', f'{labels[0]}_f1-score', f'{labels[1]}_f1-score', 'Accuracy_Global', 'Std_Global','Time (s)']].corr()

# Set Seaborn style
sns.set(style="whitegrid")

# Create a heatmap for the correlation matrix
plt.figure(figsize=(12, 10))
heatmap = sns.heatmap(correlation_matrix, annot=True, cmap='crest_r', fmt='.2f', linewidths=0.5, linecolor='black')

# Set title and labels
plt.title('Correlation Matrix of Performance Metrics', fontsize=20)

# Show the plot
plt.tight_layout()
plt.show()


### Confussion Matrix - Best Model

In [None]:
from sklearn.metrics import confusion_matrix

# Datos de la matriz de confusión para el modelo 'c1c0f5ad-38ae-4c8b-be38-c98019e67891'
y_true = ['Comentario Positivo'] * 1034 + ['Comentario Negativo'] * 616
y_pred = [
    'Comentario Positivo'] * 1006 + ['Comentario Negativo'] * 28 + \
    ['Comentario Positivo'] * 277 + ['Comentario Negativo'] * 339


# Crear la matriz de confusión
cm = confusion_matrix(y_true, y_pred, labels=labels)

# Plotear la matriz de confusión
plt.figure(figsize=(24, 22))

sns.heatmap(cm, annot=True, fmt='d', cmap='crest', xticklabels=labels, 
            yticklabels=labels, annot_kws={"size": 52}, cbar=False)

plt.xlabel('Predicted label', fontsize=30)
plt.ylabel('True label', fontsize=30)
plt.xticks(rotation=45, fontsize=50)
plt.yticks(rotation=0, fontsize=50)
# plt.title('Confusion Matrix for Best Model of GenAI Experiments')

plt.tight_layout()

# Save the plot as a PDF file
plt.savefig("../../../IMAGES/Análisis General/GenAI/GenAI_Experiments in _Análisis General_ best_model.pdf", format='pdf')

# Save the plot as a PNG file
plt.savefig("../../../IMAGES/Análisis General/GenAI/GenAI_Experiments in _Análisis General_ best_model.png", format='png', transparent=True)

plt.show()


## Contenido Negativo

In [83]:
# Define the list of labels and colors
labels = ['Desprestigiar Víctima', 'Desprestigiar Acto', 'Insultos', 'Desprestigiar Deportista Autora']
colors = ['#478CCF', '#36C2CE', '#77E4C8', '#4535C1']

In [None]:
# Filter dataset for "Contenido Negativo"
cnegativo_df = df[df["Type"] == "contenido_negativo"]
len(cnegativo_df)

In [None]:
cnegativo_df

In [None]:
# Rename columns to categorical: {'Desprestigiar Víctima': 0, 'Desprestigiar Acto': 1, 'Insultos': 2, 'Desprestigiar Deportista Autora': 3}
mapping = {'Desprestigiar Víctima': 0, 'Desprestigiar Acto': 1, 'Insultos': 2, 'Desprestigiar Deportista Autora': 3}

# Create a dictionary for renaming
rename_dict = {}
for key, value in mapping.items():
    rename_dict[f'{value}_precision'] = f'{key}_precision'
    rename_dict[f'{value}_recall'] = f'{key}_recall'
    rename_dict[f'{value}_f1-score'] = f'{key}_f1-score'
    rename_dict[f'{value}_support'] = f'{key}_support'

# Rename the columns
cnegativo_df.rename(columns=rename_dict, inplace=True)

### Windows

In [None]:
# Convert 'Timestamp' column to datetime if it's not already
cnegativo_df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Sort the DataFrame by 'Timestamp' if needed
cnegativo_df.sort_values('Timestamp', inplace=True)

# Resetting the index to ensure it's sequential
cnegativo_df.reset_index(drop=True, inplace=True)

In [None]:
# Numeric columns
numeric_cols = ['Accuracy_Global', 'Std_Global', 'Time (s)', 'weighted avg_precision', 'weighted avg_recall', 'weighted avg_f1-score', f'{labels[0]}_f1-score', f'{labels[1]}_f1-score', f'{labels[2]}_f1-score', f'{labels[3]}_f1-score']

# Agrupar el DataFrame por context
df_grouped_context = cnegativo_df.groupby(['Context'])[numeric_cols].mean().reset_index()

df_grouped_context


In [None]:
# Numeric columns
numeric_cols = ['Accuracy_Global', 'Std_Global', 'Time (s)', 'weighted avg_precision', 'weighted avg_recall', 'weighted avg_f1-score', f'{labels[0]}_f1-score', f'{labels[1]}_f1-score', f'{labels[2]}_f1-score', f'{labels[3]}_f1-score']

# Agrupar el DataFrame por prompt
df_grouped_prompt = cnegativo_df.groupby(['Prompt'])[numeric_cols].mean().reset_index()

df_grouped_prompt


In [None]:
# Numeric columns
numeric_cols = ['Accuracy_Global', 'Std_Global', 'Time (s)', 'weighted avg_precision', 'weighted avg_recall', 'weighted avg_f1-score', f'{labels[0]}_f1-score', f'{labels[1]}_f1-score', f'{labels[2]}_f1-score', f'{labels[3]}_f1-score']

# Agrupar el DataFrame por prompt
df_grouped_prompt = cnegativo_df.groupby(['Context', 'Prompt'])[numeric_cols].mean().reset_index()

df_grouped_prompt


#### F1-Score

In [None]:
# List of specific dates for vertical lines
specific_dates = ['2024-08-10 15:23:40', '2024-08-10 15:42:11', '2024-08-10 16:04:27', '2024-08-10 16:25:22', '2024-08-10 16:46:02', '2024-08-10 17:07:23', '2024-08-10 17:27:53', '2024-08-10 17:48:57', '2024-08-10 18:07:47']
specific_dates = pd.to_datetime(specific_dates)  # Convert to datetime if not already

# Find the indices of the specific dates in the DataFrame
specific_indices = cnegativo_df[cnegativo_df['Timestamp'].isin(specific_dates)].index.tolist()
specific_indices

In [None]:
plt.figure(figsize=(24, 10))


# Add a vertical line for best window
for date, index in zip(specific_dates, specific_indices):
    if index in [32]:
        plt.axvline(x=index, color='#1a2e49', linestyle='-', linewidth=3)
    else:
        plt.axvline(x=index, color='#1a2e49', linestyle='--', linewidth=2)

# Plotting the F1-Score as dots
plt.plot(cnegativo_df.index, cnegativo_df['weighted avg_f1-score'], linewidth=6, linestyle='-', marker='o', markersize=16, color='black', alpha=0.7, label='Weighted Avg. F1-Score')


# Calculate and plot trend line
slope, intercept, r_value, p_value, std_err = linregress(cnegativo_df.index, cnegativo_df['weighted avg_f1-score'])
trend = intercept + slope * cnegativo_df.index
plt.plot(cnegativo_df.index, trend, linewidth=2, color='darkred', linestyle='-', alpha=0.5, label=f'Trend Line (R² = {r_value**2:.2f})')


# Move legend to the top
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=2, fontsize=22)


# Setting labels and title
plt.title('Weighted Avg. F1-Score for "Contenido Negativo"', fontsize=42)
plt.xlabel('Experiments', fontsize=30)
plt.ylabel('F1-Score', fontsize=30)
plt.gca().set_ylim(0.25, 0.40)
plt.gca().tick_params(axis='x', labelsize=22)
plt.gca().tick_params(axis='y', labelsize=22)

# Add a grid to improve readability
plt.grid(axis='y', linestyle='--', alpha=0.7)


# Remove spines for a clean look
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)

# Show the plot
plt.tight_layout()

# Save the plot as a PDF file
plt.savefig("../../../IMAGES/Contenido Negativo/GenAI/GenAI_Experiments in _ContenidoNegativo_ weighted.pdf", format='pdf')

plt.show()

In [None]:
plt.figure(figsize=(24, 18))

# Add a vertical line for best window
for date, index in zip(specific_dates, specific_indices):
    if index in [32]:
        plt.axvline(x=index, color='#1a2e49', linestyle='-', linewidth=3)
    else:
        plt.axvline(x=index, color='#1a2e49', linestyle='--', linewidth=2)

# Plotting the F1-Score as dots
plt.plot(cnegativo_df.index, cnegativo_df['weighted avg_f1-score'], linewidth=9, linestyle='-', marker='o', markersize=18, color='black', alpha=0.7, label="Weighted Avg.")
for i, label in enumerate(labels):
    plt.plot(cnegativo_df.index, cnegativo_df[f'{label}_f1-score'], linewidth=7, linestyle='-', marker='o', markersize=16, color=colors[i], alpha=0.7, label=label)


# Calculate and plot trend line
for i in range(len(labels)):
    slope, intercept, r_value, p_value, std_err = linregress(cnegativo_df.index, cnegativo_df[f'{labels[i]}_f1-score'])
    trend = intercept + slope * cnegativo_df.index
    plt.plot(cnegativo_df.index, trend, linewidth=2, color=colors[i], linestyle='-', alpha=0.5)

# Move legend to the top
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=5, fontsize=22)


# Setting labels and title
plt.title('F1-Score per Class for "Contenido Negativo"', fontsize=42)
plt.xlabel('Experiments', fontsize=30)
plt.ylabel('F1-Score', fontsize=30)
plt.gca().set_ylim(-0.01, 0.5)
plt.gca().tick_params(axis='x', labelsize=22)
plt.gca().tick_params(axis='y', labelsize=22)

# Add a grid to improve readability
plt.grid(axis='y', linestyle='--', alpha=0.7)


# Remove spines for a clean look
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)

# Show the plot
plt.tight_layout()

# Save the plot as a PDF file
plt.savefig("../../../IMAGES/Contenido Negativo/GenAI/GenAI_Experiments in _ContenidoNegativo_f1score.pdf", format='pdf')

# Save the plot as a PNG file
plt.savefig("../../../IMAGES/Contenido Negativo/GenAI/GenAI_Experiments in _ContenidoNegativo_f1score.png", format='png', transparent=True)


plt.show()

### Performance (Time)

In [None]:
# Set Seaborn style
from matplotlib.gridspec import GridSpec

sns.set(style="whitegrid")

# Calculate the upper limit for the y-axis dynamically
y_max = cnegativo_df['Time (s)'].mean() + 4 * cnegativo_df['Time (s)'].std()

# Create a figure with subplots using GridSpec
fig = plt.figure(figsize=(24, 8))
gs = GridSpec(1, 1, figure=fig)

# Plot for MODELS
ax1 = fig.add_subplot(gs[0, 0])
sns.barplot(ax=ax1, x='Prompt', y='Time (s)', hue='Model', data=cnegativo_df, palette='Blues', alpha=0.7, ci=None)
ax1.set_title('Average Performance Time for Different Prompts', fontsize=30)
ax1.set_xlabel('', fontsize=18)
ax1.set_ylabel('Average Time (s)', fontsize=18)
ax1.tick_params(axis='x', labelsize=18)
ax1.tick_params(axis='y', labelsize=18)
ax1.set_ylim(0, y_max)

# Move legend to the top
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=4, fontsize=15)


# Adding grid for better readability
ax1.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Set Seaborn style
from matplotlib.gridspec import GridSpec

sns.set(style="whitegrid")

# Calculate the upper limit for the y-axis dynamically
y_max = cnegativo_df['Time (s)'].mean() + 4 * cnegativo_df['Time (s)'].std()

# Create a figure with subplots using GridSpec
fig = plt.figure(figsize=(24, 8))
gs = GridSpec(1, 1, figure=fig)

# Plot for MODELS
ax1 = fig.add_subplot(gs[0, 0])
sns.barplot(ax=ax1, x='Context', y='Time (s)', hue='Model', data=cnegativo_df, palette='Blues', alpha=0.7, ci=None)
ax1.set_title('Average Performance Time for Different Contexts', fontsize=30)
ax1.set_xlabel('', fontsize=18)
ax1.set_ylabel('Average Time (s)', fontsize=18)
ax1.tick_params(axis='x', labelsize=18)
ax1.tick_params(axis='y', labelsize=18)
ax1.set_ylim(0, y_max)

# Move legend to the top
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=5, fontsize=15)


# Adding grid for better readability
ax1.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Calculate the correlation matrix
correlation_matrix = cnegativo_df[['weighted avg_f1-score', f'{labels[0]}_f1-score', f'{labels[1]}_f1-score', f'{labels[2]}_f1-score', 'Accuracy_Global', 'Std_Global','Time (s)']].corr()

# Set Seaborn style
sns.set(style="whitegrid")

# Create a heatmap for the correlation matrix
plt.figure(figsize=(12, 10))
heatmap = sns.heatmap(correlation_matrix, annot=True, cmap='Blues', fmt='.2f', linewidths=0.5, linecolor='black')

# Set title and labels
plt.title('Correlation Matrix of Performance Metrics', fontsize=20)

# Show the plot
plt.tight_layout()
plt.show()


### Confussion Matrix - Best Model

In [None]:
from sklearn.metrics import confusion_matrix

# Datos de la matriz de confusión para el modelo '1f8f1a43-c705-4c45-8894-7ed7f610917c'
y_true = ['Desprestigiar Víctima'] * 171 + ['Desprestigiar Acto'] * 24 + ['Insultos'] * 144 + ['Desprestigiar Deportista Autora'] * 13
y_pred = [
    'Desprestigiar Víctima'] * 42 + ['Desprestigiar Acto'] * 41 + ['Insultos'] * 20 + ['Desprestigiar Deportista Autora'] * 68 + \
    ['Desprestigiar Víctima'] * 6 + ['Desprestigiar Acto'] * 9 + ['Insultos'] * 0 + ['Desprestigiar Deportista Autora'] * 9 + \
    ['Desprestigiar Víctima'] * 22 + ['Desprestigiar Acto'] * 20 + ['Insultos'] * 39 + ['Desprestigiar Deportista Autora'] * 63 + \
    ['Desprestigiar Víctima'] * 1 + ['Desprestigiar Acto'] * 3 + ['Insultos'] * 1 + ['Desprestigiar Deportista Autora'] * 8


# Crear la matriz de confusión
cm = confusion_matrix(y_true, y_pred, labels=labels)

# Plotear la matriz de confusión
plt.figure(figsize=(32, 30))

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, 
            yticklabels=labels, annot_kws={"size": 52}, cbar=False)

plt.xlabel('Predicted label', fontsize=30)
plt.ylabel('True label', fontsize=30)
plt.xticks(rotation=45, fontsize=46)
plt.yticks(rotation=0, fontsize=46)
# plt.title('Confusion Matrix for Best Model of ML Experiments')

plt.tight_layout()

# Save the plot as a PDF file
plt.savefig("../../../IMAGES/Contenido Negativo/GenAI/GenAI_Experiments in _ContenidoNegativo_ best_model.pdf", format='pdf')

# Save the plot as a PNG file
plt.savefig("../../../IMAGES/Contenido Negativo/GenAI/GenAI_Experiments in _ContenidoNegativo_ best_model.png", format='png', transparent=True)

plt.show()


## Insultos

In [98]:
# Define the list of labels and colors
labels = ['Sexistas/misóginos', 'Genéricos', 'Deseo de Dañar']
colors = ['#FFAF45', '#FB6D48', '#D74B76']

In [None]:
# Filter dataset for "Insultos"
insultos_df = df[df["Type"] == "insultos"]
len(insultos_df)

In [None]:
# Rename columns to categorical: {'Sexistas/misóginos': 0, 'Genéricos': 1, 'Deseo de Dañar': 2}
mapping = {'Sexistas/misóginos': 0, 'Genéricos': 1, 'Deseo de Dañar': 2}

# Create a dictionary for renaming
rename_dict = {}
for key, value in mapping.items():
    rename_dict[f'{value}_precision'] = f'{key}_precision'
    rename_dict[f'{value}_recall'] = f'{key}_recall'
    rename_dict[f'{value}_f1-score'] = f'{key}_f1-score'
    rename_dict[f'{value}_support'] = f'{key}_support'

# Rename the columns
insultos_df.rename(columns=rename_dict, inplace=True)

### Windows

In [None]:
# Convert 'Timestamp' column to datetime if it's not already
insultos_df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Sort the DataFrame by 'Timestamp' if needed
insultos_df.sort_values('Timestamp', inplace=True)

# Resetting the index to ensure it's sequential
insultos_df.reset_index(drop=True, inplace=True)

In [None]:
# Numeric columns
numeric_cols = ['Accuracy_Global', 'Std_Global', 'Time (s)', 'weighted avg_precision', 'weighted avg_recall', 'weighted avg_f1-score', f'{labels[0]}_f1-score', f'{labels[1]}_f1-score', f'{labels[2]}_f1-score']

# Agrupar el DataFrame por context
df_grouped_context = insultos_df.groupby(['Context'])[numeric_cols].mean().reset_index()

df_grouped_context


In [None]:
# Numeric columns
numeric_cols = ['Accuracy_Global', 'Std_Global', 'Time (s)', 'weighted avg_precision', 'weighted avg_recall', 'weighted avg_f1-score', f'{labels[0]}_f1-score', f'{labels[1]}_f1-score', f'{labels[2]}_f1-score']

# Agrupar el DataFrame por prompt
df_grouped_prompt = insultos_df.groupby(['Prompt'])[numeric_cols].mean().reset_index()

df_grouped_prompt


In [None]:
# Numeric columns
numeric_cols = ['Accuracy_Global', 'Std_Global', 'Time (s)', 'weighted avg_precision', 'weighted avg_recall', 'weighted avg_f1-score', f'{labels[0]}_f1-score', f'{labels[1]}_f1-score', f'{labels[2]}_f1-score']

# Agrupar el DataFrame por prompt
df_grouped_prompt = insultos_df.groupby(['Context', 'Prompt'])[numeric_cols].mean().reset_index()

df_grouped_prompt


#### F1-Score

In [None]:
# List of specific dates for vertical lines
specific_dates = ['2024-08-10 22:17:29', '2024-08-10 22:28:59', '2024-08-10 22:41:58', '2024-08-10 22:54:20', '2024-08-10 23:06:20', '2024-08-10 23:19:04', '2024-08-10 23:30:54', '2024-08-10 23:43:35', '2024-08-10 23:54:36']
specific_dates = pd.to_datetime(specific_dates)  # Convert to datetime if not already

# Find the indices of the specific dates in the DataFrame
specific_indices = insultos_df[insultos_df['Timestamp'].isin(specific_dates)].index.tolist()
specific_indices

In [None]:
plt.figure(figsize=(24, 10))


# Add a vertical line for best window
for date, index in zip(specific_dates, specific_indices):
    if index in [32]:
        plt.axvline(x=index, color='#1a2e49', linestyle='-', linewidth=3)
    else:
        plt.axvline(x=index, color='#1a2e49', linestyle='--', linewidth=2)

# Plotting the F1-Score as dots
plt.plot(insultos_df.index, insultos_df['weighted avg_f1-score'], linewidth=9, linestyle='-', marker='o', markersize=18, color='black', alpha=0.7, label='Weighted Avg. F1-Score')


# Calculate and plot trend line
slope, intercept, r_value, p_value, std_err = linregress(insultos_df.index, insultos_df['weighted avg_f1-score'])
trend = intercept + slope * insultos_df.index
plt.plot(insultos_df.index, trend, linewidth=2, color='darkred', linestyle='-', alpha=0.5, label=f'Trend Line (R² = {r_value**2:.2f})')


plt.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=2, fontsize=22)


# Setting labels and title
plt.title('Weighted Avg. F1-Score for "Insultos"', fontsize=42)
plt.xlabel('Experiments', fontsize=30)
plt.ylabel('F1-Score', fontsize=30)
plt.gca().set_ylim(0.3, 0.8)
plt.gca().tick_params(axis='x', labelsize=22)
plt.gca().tick_params(axis='y', labelsize=22)

# Add a grid to improve readability
plt.grid(axis='y', linestyle='--', alpha=0.7)


# Remove spines for a clean look
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)

# Show the plot
plt.tight_layout()

# Save the plot as a PDF file
plt.savefig("../../../IMAGES/Insultos/GenAI/GenAI_Experiments in _Insultos_ weighted.pdf", format='pdf')

plt.show()


In [None]:
plt.figure(figsize=(24, 18))

# Add a vertical line for best window
for date, index in zip(specific_dates, specific_indices):
    if index in [32]:
        plt.axvline(x=index, color='#1a2e49', linestyle='-', linewidth=3)
    else:
        plt.axvline(x=index, color='#1a2e49', linestyle='--', linewidth=2)

# Plotting the F1-Score as dots
plt.plot(insultos_df.index, insultos_df['weighted avg_f1-score'], linewidth=9, linestyle='-', marker='o', markersize=18, color='black', alpha=0.7, label="Weighted Avg.")
for i, label in enumerate(labels):
    plt.plot(insultos_df.index, insultos_df[f'{label}_f1-score'], linewidth=7, linestyle='-', marker='o', markersize=16, color=colors[i], alpha=0.7, label=label)


# Calculate and plot trend line
for i in range(len(labels)):
    slope, intercept, r_value, p_value, std_err = linregress(insultos_df.index, insultos_df[f'{labels[i]}_f1-score'])
    trend = intercept + slope * insultos_df.index
    plt.plot(insultos_df.index, trend, linewidth=2, color=colors[i], linestyle='-', alpha=0.5)

plt.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=5, fontsize=22)


# Setting labels and title
plt.title('F1-Score per Class for "Insultos"', fontsize=42)
plt.xlabel('Experiments', fontsize=30)
plt.ylabel('F1-Score', fontsize=30)
plt.gca().set_ylim(0, 0.8)
plt.gca().tick_params(axis='x', labelsize=22)
plt.gca().tick_params(axis='y', labelsize=22)

# Add a grid to improve readability
plt.grid(axis='y', linestyle='--', alpha=0.7)


# Remove spines for a clean look
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)

# Show the plot
plt.tight_layout()

# Save the plot as a PDF file
plt.savefig("../../../IMAGES/Insultos/GenAI/GenAI_Experiments in _Insultos_f1score.pdf", format='pdf')

# Save the plot as a PNG file
plt.savefig("../../../IMAGES/Insultos/GenAI/GenAI_Experiments in _Insultos_f1score.png", format='png', transparent=True)

plt.show()


### Performance (Time)

In [None]:
# Set Seaborn style
from matplotlib.gridspec import GridSpec

sns.set(style="whitegrid")

# Calculate the upper limit for the y-axis dynamically
y_max = insultos_df['Time (s)'].mean() + 4 * insultos_df['Time (s)'].std()

# Create a figure with subplots using GridSpec
fig = plt.figure(figsize=(24, 8))
gs = GridSpec(1, 1, figure=fig)

# Plot for MODELS
ax1 = fig.add_subplot(gs[0, 0])
sns.barplot(ax=ax1, x='Prompt', y='Time (s)', hue='Model', data=insultos_df, palette='flare', alpha=0.7, ci=None)
ax1.set_title('Average Performance Time for Different Prompts', fontsize=30)
ax1.set_xlabel('', fontsize=18)
ax1.set_ylabel('Average Time (s)', fontsize=18)
ax1.tick_params(axis='x', labelsize=18)
ax1.tick_params(axis='y', labelsize=18)
ax1.set_ylim(0, y_max)

# Move legend to the top
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=4, fontsize=15)


# Adding grid for better readability
ax1.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Set Seaborn style
from matplotlib.gridspec import GridSpec

sns.set(style="whitegrid")

# Calculate the upper limit for the y-axis dynamically
y_max = insultos_df['Time (s)'].mean() + 4 * insultos_df['Time (s)'].std()

# Create a figure with subplots using GridSpec
fig = plt.figure(figsize=(24, 8))
gs = GridSpec(1, 1, figure=fig)

# Plot for MODELS
ax1 = fig.add_subplot(gs[0, 0])
sns.barplot(ax=ax1, x='Context', y='Time (s)', hue='Model', data=insultos_df, palette='flare', alpha=0.7, ci=None)
ax1.set_title('Average Performance Time for Different Contexts', fontsize=30)
ax1.set_xlabel('', fontsize=18)
ax1.set_ylabel('Average Time (s)', fontsize=18)
ax1.tick_params(axis='x', labelsize=18)
ax1.tick_params(axis='y', labelsize=18)
ax1.set_ylim(0, y_max)

# Move legend to the top
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, 0.99), ncol=5, fontsize=15)


# Adding grid for better readability
ax1.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Calculate the correlation matrix
correlation_matrix = insultos_df[['weighted avg_f1-score', f'{labels[0]}_f1-score', f'{labels[1]}_f1-score', f'{labels[2]}_f1-score', 'Accuracy_Global', 'Std_Global','Time (s)']].corr()

# Set Seaborn style
sns.set(style="whitegrid")

# Create a heatmap for the correlation matrix
plt.figure(figsize=(12, 10))
heatmap = sns.heatmap(correlation_matrix, annot=True, cmap='flare', fmt='.2f', linewidths=0.5, linecolor='black')

# Set title and labels
plt.title('Correlation Matrix of Performance Metrics', fontsize=20)

# Show the plot
plt.tight_layout()
plt.show()


### Confussion Matrix - Best Model

In [None]:
from sklearn.metrics import confusion_matrix

# Datos de la matriz de confusión para el modelo '50764c95-6e1e-4d74-8bfc-1100950944d0'
y_true = ['Sexistas/misóginos'] * 12 + ['Genéricos'] * 189 + ['Deseo de Dañar'] * 5
y_pred = [
    'Sexistas/misóginos'] * 4 + ['Genéricos'] * 5 + ['Deseo de Dañar'] * 3 + \
    ['Sexistas/misóginos'] * 43 + ['Genéricos'] * 94 + ['Deseo de Dañar'] * 52 + \
    ['Sexistas/misóginos'] * 1 + ['Genéricos'] * 1 + ['Deseo de Dañar'] * 3


# Crear la matriz de confusión
cm = confusion_matrix(y_true, y_pred, labels=labels)

# Plotear la matriz de confusión
plt.figure(figsize=(24, 22))

sns.heatmap(cm, annot=True, fmt='d', cmap='flare', xticklabels=labels, 
            yticklabels=labels, annot_kws={"size": 52}, cbar=False)

plt.xlabel('Predicted label', fontsize=30)
plt.ylabel('True label', fontsize=30)
plt.xticks(rotation=45, fontsize=46)
plt.yticks(rotation=0, fontsize=46)
# plt.title('Confusion Matrix for Best Model of ML Experiments')

plt.tight_layout()

# Save the plot as a PDF file
plt.savefig("../../../IMAGES/Insultos/GenAI/GenAI_Experiments in _Insultos_ best_model.pdf", format='pdf')

# Save the plot as a PNG file
plt.savefig("../../../IMAGES/Insultos/GenAI/GenAI_Experiments in _Insultos_ best_model.png", format='png', transparent=True)

plt.show()


## Performance

In [None]:
# Mapping dictionary for task names
task_name_mapping = {
    "analisis_general": "Análisis General", 
    "contenido_negativo": "Contenido Negativo", 
    "insultos": "Insultos"
}

# Agrupar el DataFrame `df` por 'Type' y calcular la suma, promedio y número de experimentos del 'Time (s)'
df_grouped_sum = df.groupby(['Type'])['Time (s)'].sum().reset_index()
df_grouped_mean = df.groupby(['Type'])['Time (s)'].mean().reset_index()
df_grouped_count = df.groupby(['Type'])['Time (s)'].count().reset_index()

# Fusionar las agrupaciones en un solo DataFrame
df_grouped = df_grouped_sum.merge(df_grouped_mean, on='Type', suffixes=('_Total', '_Mean'))
df_grouped = df_grouped.merge(df_grouped_count, on='Type')

# Renombrar las columnas para mayor claridad
df_grouped.columns = ['Task', 'Total Time (s)', 'Mean Time (s)', 'Number of Experiments']

# Convert seconds to hours and round to 3 decimal places
df_grouped['Total Time (h)'] = (df_grouped['Total Time (s)'] / 3600).round(3)
df_grouped['Mean Time (h)'] = (df_grouped['Mean Time (s)'] / 3600).round(3)

# Apply the mapping to change the task names
df_grouped['Task'] = df_grouped['Task'].replace(task_name_mapping)

# Drop the original time in seconds columns if not needed
df_grouped = df_grouped[['Task', 'Total Time (h)', 'Mean Time (h)', 'Number of Experiments']]

df_grouped


In [None]:
# Set Seaborn style
sns.set(style="whitegrid")

# Maximum per limit for the y-axis between ML, DL and GenAI
y_max = 750

# Create a figure with subplots
fig, ax = plt.subplots(figsize=(24, 8))

# Plot for Total Time usando los colores personalizados
sns.barplot(ax=ax, x='Task', y='Number of Experiments', data=df_grouped, palette='bone', alpha=0.9)

# Set plot title and labels
ax.set_title('Total Number of Experiments per Classification Task in GenAI Experiments', fontsize=30)
ax.set_xlabel('', fontsize=18)
ax.set_ylabel('Time (h)', fontsize=18)
ax.tick_params(axis='x', labelsize=18)
ax.tick_params(axis='y', labelsize=18)
ax.set_ylim(0, y_max)


# Adding grid for better readability
ax.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Set Seaborn style
sns.set(style="whitegrid")

# Maximum per limit for the y-axis between ML, DL and GenAI
y_max = 75

# Create a figure with subplots
fig, ax = plt.subplots(figsize=(24, 8))

# Plot for Total Time usando los colores personalizados
sns.barplot(ax=ax, x='Task', y='Total Time (h)', data=df_grouped, palette='bone', alpha=0.9)

# Set plot title and labels
ax.set_title('Total Performance Time per Classification Task in GenAI Experiments', fontsize=30)
ax.set_xlabel('', fontsize=18)
ax.set_ylabel('Time (h)', fontsize=18)
ax.tick_params(axis='x', labelsize=18)
ax.tick_params(axis='y', labelsize=18)
ax.set_ylim(0, y_max)


# Adding grid for better readability
ax.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Set Seaborn style
sns.set(style="whitegrid")

# Maximum per limit for the y-axis between ML, DL and GenAI
y_max = 75

# Define colors for specific tasks
task_colors = {
    'Análisis General': '#8E809E',
    'Contenido Negativo': '#6AA6D4',
    'Insultos': '#E38A83'
}

# Create a list of colors for the bars based on the task
bar_colors = [task_colors.get(task, '#CCCCCC') for task in df_grouped['Task']]  # Default color is light grey if not specified

# Create a figure with subplots
fig, ax = plt.subplots(figsize=(24, 8))

# Plot for Total Time using the customized colors
sns.barplot(ax=ax, x='Task', y='Total Time (h)', data=df_grouped, palette=bar_colors, alpha=0.9)

# Set plot title and labels
# ax.set_title('Total Performance Time per Classification Task in ML Experiments', fontsize=40)
ax.set_xlabel('', fontsize=20)
ax.set_ylabel('Time (h)', fontsize=30)
ax.tick_params(axis='x', labelsize=34)
ax.tick_params(axis='y', labelsize=34)
ax.set_ylim(0, y_max)

# Add value labels on top of the bars in the same color as the bars
for bar, color in zip(ax.patches, bar_colors):
    yval = bar.get_height()
    ax.text(
        bar.get_x() + bar.get_width() / 2, yval + 1, f'{yval:.3f}',
        ha='center', va='bottom', fontsize=34, fontweight='bold',
        color=color  # Match text color with the bar color
    )

# Add a grid to improve readability
ax.grid(axis='y', linestyle='--', alpha=0.7)

# Remove spines for a clean look
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

# Adjust layout to avoid clipping of labels and titles
plt.tight_layout()

# Save the plot as a PDF file
plt.savefig("../../../IMAGES/GenAI_total_performance_time.pdf", format='pdf')

# Save the plot as a PNG file
plt.savefig("../../../IMAGES/GenAI_total_performance_time.png", format='png', transparent=True)

plt.show()

In [None]:
# Set Seaborn style
sns.set(style="whitegrid")

# Maximum per limit for the y-axis between ML, DL and GenAI
y_max = 6

# Create a figure with subplots
fig, ax = plt.subplots(figsize=(24, 8))

# Plot for Total Time usando los colores personalizados
sns.barplot(ax=ax, x='Task', y='Mean Time (h)', data=df_grouped, palette='bone', alpha=0.9)

# Set plot title and labels
ax.set_title('Mean Performance Time per Classification Task in GenAI Experiments', fontsize=30)
ax.set_xlabel('', fontsize=18)
ax.set_ylabel('Time (h)', fontsize=18)
ax.tick_params(axis='x', labelsize=18)
ax.tick_params(axis='y', labelsize=18)
ax.set_ylim(0, y_max)


# Adding grid for better readability
ax.grid(True)

# Show the plot
plt.tight_layout()
plt.show()