<a href="https://colab.research.google.com/github/rmaacario/spatial-semantics-translation/blob/main/Statistics_and_Graphs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#%load_ext cudf.pandas

In [None]:
import os
import pandas as pd
import numpy as np
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import scipy.stats as stats
from matplotlib.colors import LinearSegmentedColormap
import plotnine as p
from plotnine import aes
from IPython.display import display
from plotnine import stat_summary, ggplot, aes, geom_tile, scale_fill_gradientn, labs
from scipy.stats import mannwhitneyu

In [None]:
def compute_statistics(scores):
    """Compute statistics for a given series of scores."""
    numeric_scores = [float(score) for score in scores if isinstance(score, (int, float))]

    median = np.median(numeric_scores)
    q1, q3 = np.percentile(numeric_scores, [25, 75])
    iqr = q3 - q1
    whisker_low = max(np.min(numeric_scores), q1 - 1.5 * iqr)
    whisker_high = min(np.max(numeric_scores), q3 + 1.5 * iqr)

    filtered_scores = [score for score in numeric_scores if whisker_low <= score <= whisker_high]
    outliers = [score for score in numeric_scores if score not in filtered_scores]

    mean_score = np.mean(filtered_scores)
    std_dev = np.std(filtered_scores)
    ci = stats.t.interval(0.95, len(filtered_scores)-1, loc=mean_score, scale=stats.sem(filtered_scores))

    return mean_score, median, q1, q3, whisker_low, whisker_high, outliers, std_dev, ci


def analyze_scores(folder_path, score_labels, model_labels):
    """Analyze scores from CSV files in the given folder."""
    scores_dict = {score_name: [] for score_name in score_labels}

    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path).drop(columns=['Unnamed: 0'], errors='ignore')

            model_name = df.columns[3] if len(df.columns) > 3 else None
            if not model_name:
                print(f"Skipping file {filename}: Model name not found.")
                continue

            for score_name in score_labels:
                if score_name not in df.columns:
                    print(f"Skipping score {score_name} in file {filename}: Score column not found.")
                    continue

                scores = df[score_name].dropna().tolist()
                statistics = compute_statistics(scores)
                model_name_label = model_labels.get(model_name, model_name)
                scores_dict[score_name].append((model_name_label, *statistics))

    return scores_dict

In [None]:
import pandas as pd
from scipy.stats import ttest_ind

def compare_groups(scores_dict, score_labels, model_labels, llms, nmt):
    p_values = {}

    # Iterate over each score type
    for score_name, score_data in scores_dict.items():
        # Create a DataFrame from the score data
        df = pd.DataFrame(score_data, columns=['Model', 'Mean', 'Median', 'Q1', 'Q3',
                                               'Whisker Low', 'Whisker High', 'Outliers',
                                               'Standard Deviation', 'Confidence Interval'])
        # Filter the DataFrame for LLMs and NMTs
        llm_df = df[df['Model'].isin(llms)]
        nmt_df = df[df['Model'].isin(nmt)]

        # Perform t-test for each pair of models
        p_values[score_name] = {}
        for llm_model in llms:
            for nmt_model in nmt:
                # Filter data for the current pair of models
                llm_data = llm_df[llm_df['Model'] == llm_model]['Mean'].tolist()
                nmt_data = nmt_df[nmt_df['Model'] == nmt_model]['Mean'].tolist()

                try:
                    # Check variance and sample size
                    if len(llm_data) > 1 and len(nmt_data) > 1:
                        if max(llm_data) - min(llm_data) > 1e-5 and max(nmt_data) - min(nmt_data) > 1e-5:
                            # Perform t-test
                            _, p_value = ttest_ind(llm_data, nmt_data, equal_var=False)
                            p_values[score_name][(llm_model, nmt_model)] = p_value
                        else:
                            p_values[score_name][(llm_model, nmt_model)] = None
                    else:
                        p_values[score_name][(llm_model, nmt_model)] = None
                except Exception as e:
                    print(f"Error computing p-value for {score_name}: {e}")
                    p_values[score_name][(llm_model, nmt_model)] = None

    return p_values


In [None]:
import pandas as pd
import numpy as np
import plotnine as p

def plot_boxplots(scores_dict, score_labels, model_labels, max_outliers=3, max_confidence_intervals=3):
    """Plot boxplot for each score, with all metrics plotted in a single boxplot."""
    melted_dfs = []  # Initialize an empty list to store melted DataFrames
    for score_name, score_data in scores_dict.items():
        print(f"Processing score: {score_name}")
        print("Score data:")
        print(score_data)

        df = pd.DataFrame(score_data, columns=['Model', 'Mean', 'Median', 'Q1', 'Q3',
                                               'Whisker Low', 'Whisker High', 'Outliers',
                                               'Standard Deviation', 'Confidence Interval'])
        print("DataFrame:")
        print(df)

        # Melt the DataFrame to have all metrics in one column
        melted_df = pd.melt(df, id_vars=['Model'], value_vars=['Mean', 'Median', 'Q1', 'Q3',
                                                               'Whisker Low', 'Whisker High',
                                                               'Standard Deviation'],
                             var_name='Metric', value_name='Value')

        print("Melted DataFrame:")
        print(melted_df)

        melted_dfs.append(melted_df)  # Append the melted DataFrame to the list
        # Define colors based on model titles
        colors = {model: 'lightblue' if model in ['Gemma-7B', 'LLaMA-2-7B', 'LLaMA-2-13B', 'LLaMA-3-8B',  'Mistral-7B', 'Mixtral-8x7B'] else 'lightgray' for model in melted_df['Model'].unique()}

        # Calculate the dynamic y-axis limits
        y_min = melted_df['Value'].min()
        y_max = melted_df['Value'].max()
        y_padding = (y_max - y_min) * 0.1  # Add some padding to the range

        plot = (
          p.ggplot(melted_df, p.aes(x='Model', y='Value', fill='factor(Model)')) +
          p.geom_boxplot(stat='boxplot', colour="black", show_legend=False) +
          p.stat_summary(geom="point", shape='o', size=3, color="red", fill="white") +
          p.labs(x="Model", y=score_labels[score_name]) +
          p.theme_gray() +
          p.scale_fill_manual(values=colors) +
          p.theme(axis_text_x=p.element_text(angle=45, vjust=1, hjust=-1, size=8)) +
          p.ylim(y_min - y_padding, y_max + y_padding) +  # Set y-axis limits
          p.guides(fill=p.guide_legend(title="Models:", override_aes={'lightblue': 'LLMs', 'lightgray': 'NMT'}))
        )

        display(plot)

    if not melted_dfs:
        print("No melted DataFrames found. Check input data.")

    # Concatenate all melted DataFrames into a single DataFrame
    final_df = pd.concat(melted_dfs, ignore_index=True)
    return final_df

In [None]:
import numpy as np
import pandas as pd
from plotnine import *

def plot_heatmap(p_values, llm_models, nmt_models, metric):
    data = np.full((len(llm_models), len(nmt_models)), np.nan)

    for i, llm_model in enumerate(llm_models):
        for j, nmt_model in enumerate(nmt_models):
            data[i, j] = p_values.get((llm_model, nmt_model), np.nan)

    df = pd.DataFrame(data, index=llm_models, columns=nmt_models)

    # Define a custom color map
    custom_cmap = ["blue", "green", "yellow", "orange", "red"]

    # Plot the heatmap using ggplot
    plot = (
        ggplot(df.melt(ignore_index=False).reset_index(), aes(x="variable", y="index", fill="value")) +
        geom_tile() +
        scale_fill_gradientn(colors=custom_cmap, limits=(0, 1), breaks=np.linspace(0, 1, len(custom_cmap)),
                             labels=["0", "0.25", "0.5", "0.75", "1"], na_value='gray') +
        labs(title=f"P-values for {metric}", x="NMT Models", y="LLM Models")
    )
    print(plot)


In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def plot_heatmap(p_values, llm_models, nmt_models, metric):
    data = np.full((len(llm_models), len(nmt_models)), np.nan)

    for i, llm_model in enumerate(llm_models):
        for j, nmt_model in enumerate(nmt_models):
            data[i, j] = p_values.get((llm_model, nmt_model), np.nan)

    df = pd.DataFrame(data, index=llm_models, columns=nmt_models)

    # Define a custom color map
    custom_cmap = ["blue", "green", "yellow", "orange", "red"]

    # Plot the heatmap using seaborn
    plt.figure(figsize=(10, 8))
    sns.heatmap(df, cmap=custom_cmap, vmin=0, vmax=1, annot=True, fmt=".2f", linewidths=.5)
    plt.title(f"P-values for {metric}")
    plt.xlabel("NMT Models")
    plt.ylabel("LLM Models")
    plt.show()

In [None]:
def main():
    folder_path = '/content/'
    score_labels = {
        'bleu_score': 'BLEU',
        'meteor_score': 'METEOR',
        'bert_score': 'BERTScore',
        'comet_score': 'COMET',
        'ter_score': 'TER'
    }
    model_labels = {

        'gemma:7b': 'Gemma-7B',
        'llama2:7b': 'LLaMA-2-7B',
        'llama2:13b': 'LLaMA-2-13B',
        'llama3': 'LLaMA-3-8B',
        'deepl': 'DeepL',
        'mistral': 'Mistral-7B',
        'amazon-stock': 'Amazon (Stock)',
        'amazon-custom': 'Amazon (Custom)',
        'googletrans': 'Google',
        'mixtral': 'Mixtral-8x7B'
    }

    llms = ['Gemma-7B', 'LLaMA-2-7B', 'LLaMA-2-13B', 'LLaMA-3-8B', 'Mistral-7B', 'Mixtral-8x7B']
    nmt = ['DeepL', 'Amazon (Stock)', 'Amazon (Custom)', 'Google']


    scores_dict = analyze_scores(folder_path, score_labels, model_labels)

    plot_boxplots(scores_dict, score_labels, model_labels)

    #plot_means_confidence_intervals(scores_dict, score_labels)


    #for score_label, score_name in score_labels.items():
    #    p_values = compare_groups(scores_dict, score_labels, model_labels, llms, nmt)
    #    print(f"P-values for {score_name}:")
    #    print(p_values)
    #    plot_heatmap(p_values, llms, nmt, score_name)

if __name__ == "__main__":
    main()

In [None]:
scores_dict

In [None]:
pd.read_csv("/content/TEDTalks.en_pt-br.llama3.csv")

In [None]:
spatial_df = df['spatial_sense'].value_counts().reset_index()
spatial_df.columns = ['spatial_sense', 'quantity']

In [None]:
spatial_df

In [None]:
across = df[df['spatial_sense'] == 'Into(5)']
across

In [None]:
#across['source'][292]
#df.loc[df['inner_id'] == 98716, 'spatial_sense']  = 'Into(1)'

In [None]:
#df.to_csv('NMT ANALYSIS/TEDTalks.en_pt-br.DeepL.ANALYZED.csv')

In [None]:
# Define legend mapping
legend_mapping = {
    'Across': '(1) Perpendicular position; (2) Movement over a surface; (3) Opposite location; (4) Distribution; (5) Non-spatial',
    'Through': '(1) Movement within a passage; (2) Movement within an open area; (3) Movement penetrating a barrier; (4) Part of a route; (5) Non-spatial',
    'Into': '(1) Movement/direction leading to enclosure; (2) Movement resulting in physical contact/collision; (3) Non-spatial',
    'Onto': '(1) Movement to a location on a surface; (2) Sense of attachment; (3) Non-spatial'
}

# Create figure
fig = go.Figure(data=[go.Bar(x=spatial_df['spatial_sense'], y=spatial_df['quantity'])])

# Define custom legend items as annotations with smaller font size and white background
legend_annotations = [
    dict(x=1.1, y=1-i*0.1, xref="paper", yref="paper",
         text=f"{category}: {description}", showarrow=False,
         font=dict(size=10))
    for i, (category, description) in enumerate(legend_mapping.items())
]

# Add legend annotations to the layout
fig.update_layout(
    title='Histogram of Spatial Sense Quantity',
    xaxis_title='Meaning',
    yaxis_title='Quantity',
    annotations=legend_annotations,
    legend_tracegroupgap=50,  # Adjust spacing between legend items
)

# Add a white rectangle behind the legend annotations
fig.add_shape(
    type="rect",
    xref="paper",
    yref="paper",
    x0=1.05,
    y0=1,
    x1=1.25,
    y1=0.5,
    fillcolor="white",
    layer="below",
    opacity=1,
    line_width=0
)

# Show the plot
fig.show()

In [None]:
total_spatial

In [None]:
total_non_spatial

In [None]:
# Define the classification function
def classify_sense(sense):
    if sense.endswith('(5)') or sense == 'Into(3)' or sense == 'Onto(3)':
        return 'Non-Spatial'
    else:
        return 'Spatial'

# Apply the classification function to the 'spatial_sense' column
spatial_df['sense_type'] = spatial_df['spatial_sense'].apply(classify_sense)

# Sort DataFrame by quantity in descending order
spatial_df = spatial_df.sort_values(by='quantity', ascending=False)

# Separate spatial and non-spatial senses
spatial_senses = spatial_df[spatial_df['sense_type'] == 'Spatial']
non_spatial_senses = spatial_df[spatial_df['sense_type'] == 'Non-Spatial']

# Create traces for spatial and non-spatial senses with assigned colors
trace_spatial = go.Bar(x=spatial_senses['spatial_sense'],
                       y=spatial_senses['quantity'], name='Spatial', marker=dict(color='darkblue'))
trace_non_spatial = go.Bar(x=non_spatial_senses['spatial_sense'],
                           y=non_spatial_senses['quantity'], name='Non-Spatial', marker=dict(color='royalblue'))

# Create figure
fig = go.Figure(data=[trace_spatial, trace_non_spatial])

# Update layout with legend
fig.update_layout(
    title='Spatial vs Non-Spatial Meaning Frequency',
    xaxis_title='Meanings',
    yaxis_title='Frequency',
    legend_title='Meaning:',
    legend_traceorder='reversed',
    legend_tracegroupgap=50,  # Adjust spacing between legend items
)

# Show the plot
fig.show()

In [None]:
scores_dict

In [None]:
scores_dict = analyze_scores(folder_path, score_labels, model_labels)
p_values = pairwise_ttest(scores_dict, 'BLEU')  # or 'METEOR'
print(p_values)