# Notebook for the analysis and creation of plots and tables for the report

In [2]:
import pandas as pd
import numpy as np

# Load results.csv 

- Tables for delta sarc ratio with confidence intervals and marginals. 
- The same table but just for up and down results.
- Something with the flip rate 
- Plots: 
  - Heatmap input to second round. Let's make it according to claims. 
  - Label distribution: Adjust so that we can see the small percentage. 
  - Valid-output distribution for both rounds. 

### Helper functions

In [3]:
def ci95(x):
    '''
    Helper function to return the 95% Confidence Interval around the mean. 
    '''
    n = x.count()
    if n <= 1:
        return np.nan
    se = x.std(ddof=1) / np.sqrt(n)
    return 1.96 * se

In [4]:
def fmt(m, c):
    if pd.isna(m) or pd.isna(c):
        return "NaN"
    return f"{m:.3f} ± {c:.3f}"

## EDA Sarcasm Ratio

## Delta sarc ratio table

In [32]:
def make_delta_table(df, filter = None):
    columns = ['delta_sarc', 'model_receiver', 'model_sender']

    for c in columns:
        if c not in df.columns:
            raise ValueError(f'Missing required column: {c}')
    
    if filter is None:
        df_filter = df[columns].copy()
    elif filter == 'up':
        df_filter = df[df['flip_direction'] == 'up'].copy()
        df_filter = df_filter[columns].copy()
    elif filter == 'down':
        df_filter = df[df['flip_direction'] == 'down'].copy()
        df_filter = df_filter[columns].copy()
    else:
        raise ValueError('Filter not defined')
    
    df_filter['abs_delta_sarc'] = df_filter['delta_sarc'].abs()

    df_filter.drop('delta_sarc', inplace = True, axis = 1)

    df_sender_receiver = (df_filter.groupby(['model_receiver', 'model_sender'])['abs_delta_sarc'].agg(mean = 'mean', count = 'count', ci95=ci95).reset_index())

    df_sender = (df_filter.groupby('model_sender')['abs_delta_sarc'].agg(mean = 'mean', count = 'count', ci95=ci95).reset_index())
    df_receiver = (df_filter.groupby('model_receiver')['abs_delta_sarc'].agg(mean = 'mean', count = 'count', ci95=ci95).reset_index())

    # align marginals with main table
    sender_stats = df_sender.set_index('model_sender')
    receiver_stats = df_receiver.set_index('model_receiver')


    pivot_mean = df_sender_receiver.pivot(
        index='model_receiver',
        columns = 'model_sender',
        values = 'mean'
    )
    
    pivot_ci = df_sender_receiver.pivot(
        index='model_receiver',
        columns='model_sender',
        values='ci95'
    )

    df_table = pivot_mean.copy().astype(object)
    for r in df_table.index:
        for c in df_table.columns:
            m = pivot_mean.loc[r, c]
            ci = pivot_ci.loc[r, c]
            df_table.loc[r, c] = fmt(m, ci)


    receiver_col = []
    for r in df_table.index:
        m = receiver_stats.loc[r, 'mean']
        ci = receiver_stats.loc[r, 'ci95']
        receiver_col.append(fmt(m, ci))
    df_table['receiver_marg'] = receiver_col


    # global_mean = df_filter['abs_delta_sarc'].mean()
    # global_ci = ci95(df_filter['abs_delta_sarc'])

    sender_row = {}
    for s in df_table.columns[:-1]:
        if s in sender_stats.index:
            m = sender_stats.loc[s, 'mean']
            ci = sender_stats.loc[s, 'ci95']
            sender_row[s] = fmt(m, ci)
        else:
            sender_row[s] = "--"

    # sender_row['receiver_marg'] = fmt(global_mean, global_ci)

    # append sender_marg row
    df_table.loc['sender_marg'] = sender_row

    # ---- LaTeX table (now with mean ± ci95 strings) ----
    table = df_table.to_latex(
        index=True,
        caption="Abs delta sarcasm by sender/receiver",
        label="tab:abs_delta_sarc",
        bold_rows=False,
        na_rep="--",
        escape=False
    )

    return df_table, table
    

In [33]:
results = pd.read_csv('results/results.csv')

In [34]:
print(results.head())

   id model_receiver    model_sender  valid_json_count_r2  round2_sarc_ratio  \
0   1   llama-3.2-3b  mistral-0.2-7b                   10                1.0   
1   3   llama-3.2-3b  mistral-0.2-7b                   10                0.8   
2  32   llama-3.2-3b  mistral-0.2-7b                   10                1.0   
3  33   llama-3.2-3b  mistral-0.2-7b                   10                0.5   
4  38   llama-3.2-3b  mistral-0.2-7b                   10                0.3   

   round1_sarc_ratio  delta_sarc flip_direction  flip  
0                1.0         0.0           none     0  
1                1.0        -0.2           none     0  
2                1.0         0.0           none     0  
3                0.9        -0.4           none     0  
4                0.9        -0.6           down     1  


In [35]:
df, table = make_delta_table(results)

In [36]:
print(table)

\begin{table}
\caption{Abs delta sarcasm by sender/receiver}
\label{tab:abs_delta_sarc}
\begin{tabular}{lllllll}
\toprule
model_sender & llama-3.2-3b & mistral-0.2-7b & mistral-0.3-7b & qwen-2.5-1.5b & qwen-2.5-7b & receiver_marg \\
model_receiver &  &  &  &  &  &  \\
\midrule
llama-3.2-3b & NaN & 0.206 ± 0.005 & 0.887 ± 0.007 & 0.048 ± 0.001 & 0.347 ± 0.011 & 0.104 ± 0.001 \\
mistral-0.2-7b & 0.892 ± 0.003 & NaN & 0.915 ± 0.003 & 0.372 ± 0.003 & 0.868 ± 0.005 & 0.550 ± 0.003 \\
mistral-0.3-7b & 0.246 ± 0.012 & 0.179 ± 0.004 & NaN & 0.061 ± 0.001 & 0.204 ± 0.008 & 0.088 ± 0.001 \\
qwen-2.5-1.5b & 0.957 ± 0.001 & 0.972 ± 0.000 & 0.975 ± 0.000 & NaN & 0.973 ± 0.000 & 0.970 ± 0.000 \\
qwen-2.5-7b & 0.705 ± 0.010 & 0.353 ± 0.007 & 0.949 ± 0.004 & 0.079 ± 0.001 & NaN & 0.193 ± 0.002 \\
sender_marg & 0.917 ± 0.001 & 0.724 ± 0.002 & 0.964 ± 0.001 & 0.133 ± 0.001 & 0.885 ± 0.002 & -- \\
\bottomrule
\end{tabular}
\end{table}



### The same tabel, just split into up and down flips.

In [37]:
up_df, up_table = make_delta_table(results, filter='up')
down_df, down_table = make_delta_table(results, filter='down')

In [39]:
print(up_table)

\begin{table}
\caption{Abs delta sarcasm by sender/receiver}
\label{tab:abs_delta_sarc}
\begin{tabular}{lllllll}
\toprule
model_sender & llama-3.2-3b & mistral-0.2-7b & mistral-0.3-7b & qwen-2.5-1.5b & qwen-2.5-7b & receiver_marg \\
model_receiver &  &  &  &  &  &  \\
\midrule
llama-3.2-3b & NaN & 0.906 ± 0.006 & 0.911 ± 0.004 & NaN & 0.909 ± 0.005 & 0.910 ± 0.003 \\
mistral-0.2-7b & 0.924 ± 0.002 & NaN & 0.951 ± 0.002 & 0.839 ± 0.023 & 0.949 ± 0.002 & 0.942 ± 0.001 \\
mistral-0.3-7b & 0.887 ± 0.023 & 0.980 ± 0.039 & NaN & NaN & 0.912 ± 0.019 & 0.903 ± 0.015 \\
qwen-2.5-1.5b & 0.958 ± 0.001 & 0.973 ± 0.000 & 0.975 ± 0.000 & NaN & 0.974 ± 0.000 & 0.970 ± 0.000 \\
qwen-2.5-7b & 0.942 ± 0.003 & 0.973 ± 0.003 & 0.969 ± 0.002 & 0.938 ± 0.022 & NaN & 0.961 ± 0.002 \\
sender_marg & 0.953 ± 0.001 & 0.972 ± 0.000 & 0.970 ± 0.000 & 0.877 ± 0.017 & 0.970 ± 0.000 & -- \\
\bottomrule
\end{tabular}
\end{table}



# Plots 