In [1]:
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams["figure.figsize"] = (12, 16)

In [2]:
import pandas as pd

In [3]:
IN_PATH: str = '../results/manifolds/raw'
OUT_PATH: str = '../results/manifolds'

In [4]:
DATASETS: list = ['train', 'test']

In [5]:
MODELS: list = [ 'base', 'textattack', 'fabriceyhc', 'wakaka']

In [6]:
COLS_OF_INTEREST: list = ['mean', 'std', 'min', 'max']

In [7]:
### Load Datasets into memory

In [8]:
def convert_to_multi_col(df:pd.DataFrame, meta_col: str) -> pd.DataFrame:
    df.columns = pd.MultiIndex.from_product([[meta_col], df.columns], names=['model', 'metrics'])

    return df

In [9]:
analysis: dict = {
    data: pd.concat(
        [(
            pd
            .read_csv(f'{IN_PATH}/{data}.{model}.csv', index_col=0)
            .set_index(['dim'])
            .pipe(convert_to_multi_col, model)
        )
            for model in MODELS],
        axis=1)
    for data in DATASETS
}

In [10]:
for label, data in analysis.items():
    display(label, data)

'train'

model,base,base,base,base,base,textattack,textattack,textattack,textattack,textattack,fabriceyhc,fabriceyhc,fabriceyhc,fabriceyhc,fabriceyhc,wakaka,wakaka,wakaka,wakaka,wakaka
metrics,centroid_point_distances_negative,intra_distance_negative,centroid_point_distances_positive,intra_distance_positive,extra_distance,centroid_point_distances_negative,intra_distance_negative,centroid_point_distances_positive,intra_distance_positive,extra_distance,centroid_point_distances_negative,intra_distance_negative,centroid_point_distances_positive,intra_distance_positive,extra_distance,centroid_point_distances_negative,intra_distance_negative,centroid_point_distances_positive,intra_distance_positive,extra_distance
dim,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
768,"[[5.992325561399737, 5.203575077395552, 3.5995...",5.764313,"[[6.28652306558972, 3.8467728310963367, 5.1201...",6.138071,1.259246,"[[6.751666973990453, 7.309737450435016, 5.0174...",6.738079,"[[8.802257554520567, 5.346788559260816, 6.4295...",7.117969,7.820493,"[[4.967599522866965, 5.136100990019949, 7.8128...",7.644175,"[[4.750496262880372, 5.36116840102162, 4.42993...",6.040889,22.864886,"[[8.912249447423662, 5.991112258898744, 6.4723...",7.437458,"[[11.313891151375156, 10.330851402943722, 5.53...",8.561553,11.483087


'test'

model,base,base,base,base,base,textattack,textattack,textattack,textattack,textattack,fabriceyhc,fabriceyhc,fabriceyhc,fabriceyhc,fabriceyhc,wakaka,wakaka,wakaka,wakaka,wakaka
metrics,centroid_point_distances_negative,intra_distance_negative,centroid_point_distances_positive,intra_distance_positive,extra_distance,centroid_point_distances_negative,intra_distance_negative,centroid_point_distances_positive,intra_distance_positive,extra_distance,centroid_point_distances_negative,intra_distance_negative,centroid_point_distances_positive,intra_distance_positive,extra_distance,centroid_point_distances_negative,intra_distance_negative,centroid_point_distances_positive,intra_distance_positive,extra_distance
dim,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
768,"[[4.832424433221461, 9.807014103979695, 3.7882...",5.798742,"[[5.229993597663103, 4.427183632815254, 5.6675...",6.084546,1.229414,"[[6.785032081639631, 9.407629650697947, 6.6619...",6.935167,"[[5.874596543022215, 7.184561450019481, 6.6260...",7.183807,7.312816,"[[9.073984610715168, 10.79574947060581, 10.667...",9.273344,"[[3.9066403335518403, 9.816295601628925, 11.72...",7.438157,19.964325,"[[6.595880875307386, 8.794097214610463, 7.2356...",7.581062,"[[6.3813950679018925, 9.885993432500822, 6.399...",8.446871,11.398275


In [11]:
### Calculate Centroid Distance and Cluster Dispersion

In [12]:
distances: dict = {
    label: (
        data
        .filter(regex=".*extra_distance")
        .describe()
        .T
    )
    for label, data in analysis.items()
}

In [13]:
for label, data in distances.items():
    formatted: pd.DataFrame = (
        data
        [COLS_OF_INTEREST]
        .round(3)
    )
    formatted.to_csv(f'{OUT_PATH}/{label}.metric.distance.csv')
    formatted.style.to_latex(f'{OUT_PATH}/{label}.metric.distance.tex')
    display(label, formatted)
    # print(formatted.to_markdown())

'train'

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,min,max
model,metrics,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
base,extra_distance,1.259,,1.259,1.259
textattack,extra_distance,7.82,,7.82,7.82
fabriceyhc,extra_distance,22.865,,22.865,22.865
wakaka,extra_distance,11.483,,11.483,11.483


'test'

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,min,max
model,metrics,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
base,extra_distance,1.229,,1.229,1.229
textattack,extra_distance,7.313,,7.313,7.313
fabriceyhc,extra_distance,19.964,,19.964,19.964
wakaka,extra_distance,11.398,,11.398,11.398


In [14]:
dispersion: dict = {
    label: (
        data
        .filter(regex=".*intra_distance")
        .describe()
        .T
    )
    for label, data in analysis.items()
}

In [15]:
for label, data in dispersion.items():
    formatted: pd.DataFrame = (
        data
        [['mean', 'std', 'min', 'max']]
        .round(3)
    )
    formatted.to_csv(f'{OUT_PATH}/{label}.metric.dispersion.csv')
    formatted.style.to_latex(f'{OUT_PATH}/{label}.metric.dispersion.tex')
    display(label, formatted)
    # print(formatted.to_markdown())

'train'

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,min,max
model,metrics,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
base,intra_distance_negative,5.764,,5.764,5.764
base,intra_distance_positive,6.138,,6.138,6.138
textattack,intra_distance_negative,6.738,,6.738,6.738
textattack,intra_distance_positive,7.118,,7.118,7.118
fabriceyhc,intra_distance_negative,7.644,,7.644,7.644
fabriceyhc,intra_distance_positive,6.041,,6.041,6.041
wakaka,intra_distance_negative,7.437,,7.437,7.437
wakaka,intra_distance_positive,8.562,,8.562,8.562


'test'

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,min,max
model,metrics,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
base,intra_distance_negative,5.799,,5.799,5.799
base,intra_distance_positive,6.085,,6.085,6.085
textattack,intra_distance_negative,6.935,,6.935,6.935
textattack,intra_distance_positive,7.184,,7.184,7.184
fabriceyhc,intra_distance_negative,9.273,,9.273,9.273
fabriceyhc,intra_distance_positive,7.438,,7.438,7.438
wakaka,intra_distance_negative,7.581,,7.581,7.581
wakaka,intra_distance_positive,8.447,,8.447,8.447
