In [1]:
import os
import numpy as np
import pandas as pd
from itertools import combinations

In [2]:
ratios = {
    'ratio1': lambda x: (x['boosted_jet']-x['boosted_el'])/x['boosted_el'],
    'ratio2': lambda x: (x['boosted_jet']-x['boosted_el'])/x['el_jet'],
    'ratio3': lambda x: x['boosted_jet']-x['boosted_el']
}

In [3]:
wass_distances = pd.read_csv(os.path.join('..', '..', 'data', 'wass_distances.csv'), index_col=0)
for name, func in ratios.items():
    wass_distances[name] = wass_distances.apply(func, axis=1)
wass_distances.head(10)

Unnamed: 0,name,boosted_el,boosted_jet,el_jet,description,ratio1,ratio2,ratio3
0,reta,0.009634,0.11234,0.106335,fold_0,10.661268,0.965875,0.102706
1,eratio,0.030893,0.570198,0.540972,fold_0,17.457198,0.996918,0.539305
2,f1,0.168429,0.037788,0.131495,fold_0,-0.775644,-0.993509,-0.130641
3,f3,0.005357,0.007369,0.012707,fold_0,0.375631,0.158364,0.002012
4,wstot,0.168632,2.870422,2.806875,fold_0,16.021842,0.962562,2.70179
5,weta2,0.000888,0.00423,0.003354,fold_0,3.766014,0.996634,0.003343
6,rhad,0.007246,0.43224,0.43646,fold_0,58.650032,0.973728,0.424993
7,rhad1,0.005028,0.230405,0.233835,fold_0,44.828784,0.96383,0.225377
8,rphi,0.014843,0.118906,0.107186,fold_0,7.011002,0.970867,0.104063
9,reta,0.009672,0.112294,0.106135,fold_1,10.609704,0.966893,0.102622


In [4]:
def get_order(df: pd.DataFrame, sort_col: str) -> pd.Series:
    df = df.sort_values(sort_col, ascending=False)
    ordered_ss = df['name'].values
    values = df[sort_col].values
    row = pd.Series(
        [ordered_ss, values],
        index=['order', 'values']
    )
    return row
orders = {
    name: wass_distances.groupby('description').apply(get_order, sort_col=name) # type: ignore
    for name in ratios.keys()
}
orders['ratio2']

Unnamed: 0_level_0,order,values
description,Unnamed: 1_level_1,Unnamed: 2_level_1
complete,"[eratio, weta2, rhad, rphi, reta, rhad1, wstot...","[0.998441230854057, 0.997375408176468, 0.97394..."
fold_0,"[eratio, weta2, rhad, rphi, reta, rhad1, wstot...","[0.9969180831973063, 0.9966337317018531, 0.973..."
fold_1,"[eratio, weta2, rhad, rphi, reta, wstot, rhad1...","[0.9986839399103837, 0.9982627673618552, 0.974..."
fold_2,"[eratio, weta2, rhad, rphi, rhad1, reta, wstot...","[0.9990351944112922, 0.9982445610820844, 0.977..."
fold_3,"[eratio, weta2, rhad, rhad1, rphi, reta, wstot...","[0.9995209596886648, 0.9979037609989745, 0.978..."
fold_4,"[eratio, weta2, rhad, rphi, reta, rhad1, wstot...","[0.9972712119801533, 0.9956431382840439, 0.971..."


In [12]:
def get_order_differences(df: pd.DataFrame):
    differences = list()
    for left, right in combinations(df.index, 2):
        is_equal = (df.loc[left, 'order'] ==\
            df.loc[right, 'order']).all()   # type: ignore
        if not is_equal:
            differences.append((left, right))
    return differences

order_differences = {
    ratio_name: get_order_differences(ratio_order)
    for ratio_name, ratio_order in orders.items()
}
order_differences

{'ratio1': [('complete', 'fold_3'),
  ('fold_0', 'fold_3'),
  ('fold_1', 'fold_3'),
  ('fold_2', 'fold_3'),
  ('fold_3', 'fold_4')],
 'ratio2': [('complete', 'fold_1'),
  ('complete', 'fold_2'),
  ('complete', 'fold_3'),
  ('fold_0', 'fold_1'),
  ('fold_0', 'fold_2'),
  ('fold_0', 'fold_3'),
  ('fold_1', 'fold_2'),
  ('fold_1', 'fold_3'),
  ('fold_1', 'fold_4'),
  ('fold_2', 'fold_3'),
  ('fold_2', 'fold_4'),
  ('fold_3', 'fold_4')],
 'ratio3': []}

In [14]:
print(orders['ratio1'].loc['complete', 'order'])
print(orders['ratio1'].loc['fold_3', 'order'])
print(orders['ratio1'].loc['complete', 'order'] == orders['ratio1'].loc['fold_3', 'order'])

['rhad' 'rhad1' 'eratio' 'wstot' 'reta' 'rphi' 'weta2' 'f3' 'f1']
['rhad1' 'rhad' 'eratio' 'wstot' 'reta' 'rphi' 'weta2' 'f3' 'f1']
[False False  True  True  True  True  True  True  True]


In [15]:
ratio = 'ratio2'
fold = 'fold_1'
print(orders[ratio].loc['complete', 'order'])
print(orders[ratio].loc[fold, 'order'])
print(orders[ratio].loc['complete', 'order'] == orders[ratio].loc[fold, 'order'])

['eratio' 'weta2' 'rhad' 'rphi' 'reta' 'rhad1' 'wstot' 'f3' 'f1']
['eratio' 'weta2' 'rhad' 'rphi' 'reta' 'wstot' 'rhad1' 'f3' 'f1']
[ True  True  True  True  True False False  True  True]


In [16]:
ratio = 'ratio2'
fold = 'fold_2'
print(orders[ratio].loc['complete', 'order'])
print(orders[ratio].loc[fold, 'order'])
print(orders[ratio].loc['complete', 'order'] == orders[ratio].loc[fold, 'order'])

['eratio' 'weta2' 'rhad' 'rphi' 'reta' 'rhad1' 'wstot' 'f3' 'f1']
['eratio' 'weta2' 'rhad' 'rphi' 'rhad1' 'reta' 'wstot' 'f3' 'f1']
[ True  True  True  True False False  True  True  True]


In [17]:
ratio = 'ratio2'
fold = 'fold_3'
print(orders[ratio].loc['complete', 'order'])
print(orders[ratio].loc[fold, 'order'])
print(orders[ratio].loc['complete', 'order'] == orders[ratio].loc[fold, 'order'])

['eratio' 'weta2' 'rhad' 'rphi' 'reta' 'rhad1' 'wstot' 'f3' 'f1']
['eratio' 'weta2' 'rhad' 'rhad1' 'rphi' 'reta' 'wstot' 'f3' 'f1']
[ True  True  True False False False  True  True  True]
