In [38]:
import pandas as pd
import numpy as np

## Load the lists of pseudo pairs and the linguistic analysis results per corpora

In [20]:
pseudo_pairs_unfam_numpy = np.load("../data/pseudo pairs/pseudo_pairs_list_unfam_freeConv.npy")
pseudo_pairs_basline_numpy = np.load ("../data/pseudo pairs/pseudo_pairs_list_fam_freeConv.npy")
pseudo_pairs_unimodal_numpy = np.load("../data/pseudo pairs/pseudo_pairs_list_uni_freeConv.npy")

In [40]:
baseline_df = pd.read_csv("../results/baseline_linguistic_analysis_248.csv")
unimodal_df = pd.read_csv("../results/unimodal_linguistic_analysis_248.csv")
unfamiliar_df = pd.read_csv("../results/unfamiliar_linguistic_analysis_248.csv")
whole_df = pd.read_csv("../results/linguistic_analysis_condition_mordor_gondor_248.csv")
first_third = pd.read_csv("../results/linguistic_analysis_condition_mordor_gondor_first_third_248.csv")
second_third = pd.read_csv("../results/linguistic_analysis_condition_mordor_gondor_second_third_248.csv")
third_third = pd.read_csv("../results/linguistic_analysis_condition_mordor_gondor_third_third_248.csv")

## Creates pseudo pairs with the linguistic analysis results

In [34]:
def MakePseudoPairs(df):
    mordor_df = df[df["LabName"] == "Mordor"].reset_index(drop=True)
    gondor_df = df[df["LabName"] == "Gondor"].reset_index(drop=True)
    mordor_df["key"] = 1
    gondor_df["key"] = 1
    cross_joined = pd.merge(mordor_df, gondor_df, on="key").drop("key", axis=1)
    filtered = cross_joined[cross_joined["PairNo_x"] != cross_joined["PairNo_y"]]
    columns_to_compare = [col for col in df.columns if col not in ["PairNo", "LabName"]]
    mordor_columns = [f"{col}_mordor" for col in columns_to_compare]
    gondor_columns = [f"{col}_gondor" for col in columns_to_compare]

    result = filtered[["PairNo_x", "PairNo_y"] + 
                      [f"{col}_x" for col in columns_to_compare] + 
                      [f"{col}_y" for col in columns_to_compare]]

    result.columns = ["PairNo_mordor", "PairNo_gondor"] + mordor_columns + gondor_columns

    return result

In [35]:
transformed_baseline_df = MakePseudoPairs(baseline_df)
transformed_unimodal_df = MakePseudoPairs(unimodal_df)
transformed_unfamiliar_df = MakePseudoPairs(unfamiliar_df)
transformed_whole_df = MakePseudoPairs(whole_df)

In [41]:
transformed_first_third_df = MakePseudoPairs(first_third)
transformed_second_third_df = MakePseudoPairs(second_third)
transformed_third_third_df = MakePseudoPairs(third_third)

## Save the pseudo dataframes to CSV files

In [36]:
transformed_baseline_df.to_csv("../results/pseudo_pairs_baseline_linguistic_analysis.csv", index=False)
transformed_unimodal_df.to_csv("../results/pseudo_pairs_unimodal_linguistic_analysis.csv", index=False)
transformed_unfamiliar_df.to_csv("../results/pseudo_pairs_unfamiliar_linguistic_analysis.csv", index=False)

In [42]:
transformed_whole_df.to_csv("../results/pseudo_pairs_whole_corpus_linguistic_analysis.csv", index=False)
transformed_first_third_df.to_csv("../results/pseudo_pairs_first_third_linguistic_analysis.csv", index=False)
transformed_second_third_df.to_csv("../results/pseudo_pairs_second_third_linguistic_analysis.csv", index=False)
transformed_third_third_df.to_csv("../results/pseudo_pairs_third_third_linguistic_analysis.csv", index=False)