In [None]:
import pandas as pd

import torch
import numpy as np
from numpy import pad
import pickle, os
from scipy.stats import spearmanr, pearsonr
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

import voyageai
from voyageai import get_embeddings
import google.generativeai as genai
from openai import OpenAI
from huggingface_hub import login
import openai
from sentence_transformers import SentenceTransformer


In [None]:
#
device='cpu'
sentence_transformer_models_dict = {}

In [None]:
%run ./utils.ipynb

In [None]:
%run ./config_embed.ipynb

In [None]:
BASEPATH = "STS_summary"

In [None]:
REPLACEMENT_OR_REMOVAL = 'REMOVAL'

In [None]:
EVAL_MODEL_LIST

In [None]:
EVAL_MODEL_LIST = EVAL_MODEL_LIST[0:1]

In [None]:
LLM_FOR_ANONYMIZATION=['gemini','sonnet'] #['sonnet','gemini']

In [None]:
#

for llm_used in LLM_FOR_ANONYMIZATION:

    correlation_df_llm_wise = pd.DataFrame(columns=['model', f'SC_cosine_sim_score_machine_human', f'SC_cosine_sim_score_machine_human_anonymized_llm_used_{llm_used}','PC_cosine_sim_score_machine_human', f'PC_cosine_sim_score_machine_human_anonymized_llm_used_{llm_used}'])


    df_cop = pd.read_pickle( f'{BASEPATH}/hum_mac_summ_data_anonymized_LLM_used_{llm_used}_{REPLACEMENT_OR_REMOVAL}.pkl')
    print('llm_used ', llm_used)

    REPLACEMENT_OR_REMOVAL = 'REMOVAL'


    for model_name in EVAL_MODEL_LIST: #[-3:]:

        df_model = df_cop.copy()
        print('model_name ', model_name)


        print('machine summary')

        df_model[f'embedding_machine_summaries_{model_name}'] = df_model['machine_summaries'].apply(
            lambda summaries: np.array(embed_content(summaries.tolist(), model=model_name)) 
        )

        print('human summary')

        df_model[f'embedding_human_summaries_{model_name}'] = df_model['human_summaries'].apply(
        lambda summaries: np.array(embed_content(summaries.tolist(), model=model_name)) 
    )


        print('machine summary anonymized')
        df_model[f'embedding_machine_summaries_anonymized_{model_name}_llm_used_{llm_used}'] = df_model[f'machine_summaries_anonymized_{llm_used}'].apply(
            lambda summaries: np.array(embed_content(summaries, model=model_name))
        )

        
        print('human summary anonymized')
        df_model[f'embedding_human_summaries_anonymized_{model_name}_llm_used_{llm_used}'] = df_model[f'human_summaries_anonymized_{llm_used}'].apply(
            lambda summaries: np.array(embed_content(summaries, model=model_name))
        )


        df_model[f'sim_machine_human_best_anonymized_llm_used_{llm_used}'] = df_model.apply(lambda row: compute_max_cosine_sim_machine_human(row, model_name, anon=True, llm_used=llm_used), axis=1)
        df_model['sim_machine_human_best'] = df_model.apply(lambda row: compute_max_cosine_sim_machine_human(row, model_name, anon=False), axis=1)


        print('getting correlation')

        cosine_sim_score_anon_machine_human = df_model[f'sim_machine_human_best_anonymized_llm_used_{llm_used}'].explode().values
        cosine_sim_score_machine_human = df_model['sim_machine_human_best'].explode().values
        relevance_machine_human = df_model['relevance'].explode().values

        # Compute Spearman and Pearson correlation between cosine_sim_score_anon and relevance
        spearman_corr_anon_relevance_machine_human, _ = spearmanr(cosine_sim_score_anon_machine_human, relevance_machine_human)
        pearson_corr_anon_relevance_machine_human, _ = pearsonr(cosine_sim_score_anon_machine_human, relevance_machine_human)

        # Compute Spearman and Pearson correlation between cosine_sim_score and relevance
        spearman_corr_cosine_relevance_machine_human, _ = spearmanr(cosine_sim_score_machine_human, relevance_machine_human)
        pearson_corr_cosine_relevance_machine_human, _ = pearsonr(cosine_sim_score_machine_human, relevance_machine_human)

        correlation_df_llm_wise = correlation_df_llm_wise.append({'model':model_name, 'SC_cosine_sim_score_machine_human': spearman_corr_cosine_relevance_machine_human, f'SC_cosine_sim_score_machine_human_anonymized_llm_used_{llm_used}': spearman_corr_anon_relevance_machine_human, 'PC_cosine_sim_score_machine_human': pearson_corr_cosine_relevance_machine_human, f'PC_cosine_sim_score_machine_human_anonymized_llm_used_{llm_used}': pearson_corr_anon_relevance_machine_human}, ignore_index=True)


        print('llm used', llm_used)
        print('->>>model ', model_name)
        print('::spearman_corr_cosine_relevance_machine_human ', spearman_corr_cosine_relevance_machine_human)
        print('spearman_corr_anonymized_relevance_machine_human ', spearman_corr_anon_relevance_machine_human)

        print('pearson_corr_cosine_relevance_machine_human ',pearson_corr_cosine_relevance_machine_human)
        print('pearson_corr_anonymized_relevance_machine_human ', pearson_corr_anon_relevance_machine_human)


        correlation_df_llm_wise.to_pickle(f'{BASEPATH}/temp/correlation_results_{model_name}_llm_used_{llm_used}.pkl')
        correlation_df_llm_wise.to_csv(f'{BASEPATH}/temp/correlation_results_{model_name}_llm_used_{llm_used}.csv', index=False)
        df_model.to_pickle(f'{BASEPATH}/temp/df_{model_name}_llm_used_{llm_used}.pkl')


        correlation_df_llm_wise.to_pickle(f'{BASEPATH}/correlation_results_llm_used_{llm_used}.pkl')
        correlation_df_llm_wise = correlation_df_llm_wise.round(3)




In [None]:
BASEPATH

In [None]:
df1 = pd.read_pickle(f'{BASEPATH}/correlation_results_llm_used_sonnet.pkl')
df2 = pd.read_pickle(f'{BASEPATH}/correlation_results_llm_used_gemini.pkl')[['model','SC_cosine_sim_score_machine_human_anonymized_llm_used_gemini','PC_cosine_sim_score_machine_human_anonymized_llm_used_gemini']]


In [None]:
display(df1)

In [None]:
display(df2)

In [None]:



# Calculate mean cosine similarity scores for SC and PC
merged_df['mean_SC_cosine_sim_score_machine_human_anonymized'] = merged_df[['SC_cosine_sim_score_machine_human_anonymized_llm_used_sonnet', 'SC_cosine_sim_score_machine_human_anonymized_llm_used_gemini']].mean(axis=1)
merged_df['mean_PC_cosine_sim_score_machine_human_anonymized'] = merged_df[['PC_cosine_sim_score_machine_human_anonymized_llm_used_sonnet', 'PC_cosine_sim_score_machine_human_anonymized_llm_used_gemini']].mean(axis=1)

# Calculate standard error of the mean for SC and PC
def std_err(x):
    return x.std(ddof=1) / np.sqrt(len(x))

merged_df['std_SC_cosine_sim_score_machine_human_anonymized'] = merged_df[['SC_cosine_sim_score_machine_human_anonymized_llm_used_sonnet', 'SC_cosine_sim_score_machine_human_anonymized_llm_used_gemini']].apply(std_err, axis=1)
merged_df['std_PC_cosine_sim_score_machine_human_anonymized'] = merged_df[['PC_cosine_sim_score_machine_human_anonymized_llm_used_sonnet', 'PC_cosine_sim_score_machine_human_anonymized_llm_used_gemini']].apply(std_err, axis=1)

# Round all values to 3 decimal places
merged_df = merged_df.round(3)


display(merged_df[['model','SC_cosine_sim_score_machine_human','PC_cosine_sim_score_machine_human', 'mean_SC_cosine_sim_score_machine_human_anonymized', 'std_SC_cosine_sim_score_machine_human_anonymized', 'mean_PC_cosine_sim_score_machine_human_anonymized', 'std_PC_cosine_sim_score_machine_human_anonymized']])

In [None]:


# Calculate mean cosine similarity scores for SC and PC
merged_df['mean_SC_cosine_sim_score_machine_human_anonymized'] = merged_df[['SC_cosine_sim_score_machine_human_anonymized_llm_used_sonnet', 'SC_cosine_sim_score_machine_human_anonymized_llm_used_gemini']].mean(axis=1)
merged_df['mean_PC_cosine_sim_score_machine_human_anonymized'] = merged_df[['PC_cosine_sim_score_machine_human_anonymized_llm_used_sonnet', 'PC_cosine_sim_score_machine_human_anonymized_llm_used_gemini']].mean(axis=1)

# Calculate standard error of the mean for SC and PC
def std_err(x):
    return x.std(ddof=1) / np.sqrt(len(x))

merged_df['std_SC_cosine_sim_score_machine_human_anonymized'] = merged_df[['SC_cosine_sim_score_machine_human_anonymized_llm_used_sonnet', 'SC_cosine_sim_score_machine_human_anonymized_llm_used_gemini']].apply(std_err, axis=1)
merged_df['std_PC_cosine_sim_score_machine_human_anonymized'] = merged_df[['PC_cosine_sim_score_machine_human_anonymized_llm_used_sonnet', 'PC_cosine_sim_score_machine_human_anonymized_llm_used_gemini']].apply(std_err, axis=1)

# Round all values to 3 decimal places
merged_df = merged_df.round(3)


display(merged_df[['model','SC_cosine_sim_score_machine_human','PC_cosine_sim_score_machine_human', 'mean_SC_cosine_sim_score_machine_human_anonymized', 'std_SC_cosine_sim_score_machine_human_anonymized', 'mean_PC_cosine_sim_score_machine_human_anonymized', 'std_PC_cosine_sim_score_machine_human_anonymized']])

In [None]:
output_df = merged_df[['model','SC_cosine_sim_score_machine_human','PC_cosine_sim_score_machine_human', 'mean_SC_cosine_sim_score_machine_human_anonymized', 'std_SC_cosine_sim_score_machine_human_anonymized', 'mean_PC_cosine_sim_score_machine_human_anonymized', 'std_PC_cosine_sim_score_machine_human_anonymized']]

In [None]:
output_df.columns

In [None]:
latex_code = output_df.to_latex()

In [None]:

mean_std_concat_pc = output_df[['mean_PC_cosine_sim_score_machine_human_anonymized', 'std_PC_cosine_sim_score_machine_human_anonymized']].astype(str).agg(' \pm '.join, axis=1)

output_df['Pearson_Correlation_Anonymized'] = mean_std_concat_pc

mean_std_concat_sc = output_df[['mean_SC_cosine_sim_score_machine_human_anonymized', 'std_SC_cosine_sim_score_machine_human_anonymized']].astype(str).agg(' \pm '.join, axis=1)

output_df['Spearman_Correlation_Anonymized'] = mean_std_concat_sc

In [None]:
display(output_df)

In [None]:
output_df = output_df[['model','SC_cosine_sim_score_machine_human','Spearman_Correlation_Anonymized','PC_cosine_sim_score_machine_human','Pearson_Correlation_Anonymized']]

In [None]:
latex_code = output_df.to_latex(index=False)

In [None]:
#
print(latex_code)

In [None]:
#
print(latex_code)