In [1]:
import pandas as pd

pd.set_option('display.float_format', '{:.2f}'.format)
pd.set_option('display.max_rows', 500)

def load_data(model):
    d1 = pd.read_csv(f'results_{model}.csv', index_col=0)
    d2 = pd.read_csv(f'results_{model}_scaled.csv', index_col=0)
    data = pd.concat([d1, d2])
    return data

def prep_data(data, model_name):
    data['model'] = model_name
    data['run_type'] = data.apply(run_type, axis=1)
    data['agg_type'] = data['agg_type'].fillna('none')
    data = data[data.agg_type == 'minmax'] # or percentile/none for other aggregations
    data = data[data.remove_zero_acc]
    data = data[data.seed == 1] # or 1 for seed = 1
    data = data[data.use_cifar | (~data.use_unseen & ~data.use_cifar)]
    data.set_index(['model', 'dataset', 'seed', 'use_cifar', 'use_unseen', 'train_current', 'remove_zero_acc', 'limit', 'agg_type', 
                'run_type'], inplace=True)
    data = data['tau']
    return data

def run_type(row):
    if not row['use_cifar']:
        return row['model'].upper()
    if not row['use_unseen']:
        return f'{row["model"].upper()} + Cifar10'
    return f'{row["model"].upper()} + 8 Others'

xdata = prep_data(pd.read_csv('results_transfer_xgb.csv'), 'xgb')
tdata = prep_data(pd.read_csv('results_transfer_rf.csv'), 'rf')

table = xdata.unstack('limit') #or tdata for RF table
table['AVG'] = table.mean(axis=1)
table.index = table.index.droplevel(['model', 'seed', 'use_cifar', 'use_unseen', 'remove_zero_acc','agg_type']).swaplevel(2,1)
print(table.to_latex(float_format='%.2f'))
table

\begin{tabular}{lllrrrrrrrrrr}
\toprule
 &  & limit & 100 & 200 & 300 & 400 & 500 & 600 & 700 & 800 & 900 & AVG \\
dataset & run_type & train_current &  &  &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{5}{*}{addnist} & XGB & True & 0.34 & 0.42 & 0.24 & 0.11 & -0.02 & 0.26 & 0.23 & 0.37 & 0.26 & 0.24 \\
\cline{2-13}
 & \multirow[t]{2}{*}{XGB + Cifar10} & False & -0.13 & 0.50 & 0.11 & 0.22 & 0.35 & 0.26 & 0.32 & 0.26 & 0.24 & 0.24 \\
 &  & True & 0.37 & 0.45 & 0.27 & 0.21 & 0.09 & 0.25 & 0.34 & 0.37 & 0.27 & 0.29 \\
\cline{2-13}
 & \multirow[t]{2}{*}{XGB + 8 Others} & False & 0.26 & 0.29 & 0.16 & 0.28 & 0.39 & 0.28 & 0.22 & 0.29 & 0.22 & 0.26 \\
 &  & True & 0.24 & 0.36 & 0.28 & 0.21 & 0.19 & 0.22 & 0.31 & 0.38 & 0.29 & 0.28 \\
\cline{1-13} \cline{2-13}
\multirow[t]{5}{*}{chesseract} & XGB & True & 0.52 & 0.51 & 0.50 & 0.50 & 0.48 & 0.52 & 0.60 & 0.60 & 0.65 & 0.54 \\
\cline{2-13}
 & \multirow[t]{2}{*}{XGB + Cifar10} & False & 0.50 & 0.33 & 0.17 & 0.17 & 0.25 & 0.21 & 0.27 & 0.36 & 0.

Unnamed: 0_level_0,Unnamed: 1_level_0,limit,100,200,300,400,500,600,700,800,900,AVG
dataset,run_type,train_current,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
addnist,XGB,True,0.34,0.42,0.24,0.11,-0.02,0.26,0.23,0.37,0.26,0.24
addnist,XGB + Cifar10,False,-0.13,0.5,0.11,0.22,0.35,0.26,0.32,0.26,0.24,0.24
addnist,XGB + Cifar10,True,0.37,0.45,0.27,0.21,0.09,0.25,0.34,0.37,0.27,0.29
addnist,XGB + 8 Others,False,0.26,0.29,0.16,0.28,0.39,0.28,0.22,0.29,0.22,0.26
addnist,XGB + 8 Others,True,0.24,0.36,0.28,0.21,0.19,0.22,0.31,0.38,0.29,0.28
chesseract,XGB,True,0.52,0.51,0.5,0.5,0.48,0.52,0.6,0.6,0.65,0.54
chesseract,XGB + Cifar10,False,0.5,0.33,0.17,0.17,0.25,0.21,0.27,0.36,0.35,0.29
chesseract,XGB + Cifar10,True,0.41,0.38,0.49,0.39,0.42,0.5,0.59,0.59,0.6,0.48
chesseract,XGB + 8 Others,False,0.54,0.29,0.31,0.17,0.22,0.35,0.31,0.35,0.52,0.34
chesseract,XGB + 8 Others,True,0.54,0.43,0.52,0.37,0.44,0.5,0.44,0.57,0.56,0.49


In [2]:
import pandas as pd

pd.set_option('display.float_format', '{:.2f}'.format)
pd.set_option('display.max_rows', 500)

def load_data(model):
    d1 = pd.read_csv(f'results_{model}.csv', index_col=0)
    d2 = pd.read_csv(f'results_{model}_scaled.csv', index_col=0)
    data = pd.concat([d1, d2])
    return data

def prep_data(data, model_name):
    data['agg_type'] = data['agg_type'].fillna('none')
    data = data[~data.use_cifar & ~data.use_unseen]
    data.set_index(['dataset', 'seed', 'use_cifar', 'use_unseen', 'train_current', 'remove_zero_acc', 'limit', 'agg_type'], inplace=True)
    data = data['tau']
    return data

def run_type(row):
    if not row['use_cifar']:
        return row['model'].upper()
    if not row['use_unseen']:
        return f'{row["model"].upper()} + Cifar10'
    return f'{row["model"].upper()} + 8 Others'

xdata = prep_data(pd.read_csv('results_transfer_xgb.csv'), 'xgb')
tdata = prep_data(pd.read_csv('results_transfer_rf.csv'), 'rf')

table = xdata.unstack(['limit', 'remove_zero_acc', 'agg_type']).mean().unstack(['limit']) #or tdata for RF
print(table.to_latex(float_format='{%.2f}'))
table

\begin{tabular}{llrrrrrrrrr}
\toprule
 & limit & 100 & 200 & 300 & 400 & 500 & 600 & 700 & 800 & 900 \\
remove_zero_acc & agg_type &  &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{False} & minmax & {0.37} & {0.41} & {0.44} & {0.48} & {0.47} & {0.44} & {0.46} & {0.52} & {0.50} \\
 & none & {0.37} & {0.41} & {0.44} & {0.48} & {0.47} & {0.44} & {0.46} & {0.52} & {0.50} \\
 & percentile & {0.35} & {0.40} & {0.44} & {0.50} & {0.47} & {0.45} & {0.48} & {0.55} & {0.52} \\
\cline{1-11}
\multirow[t]{3}{*}{True} & minmax & {0.37} & {0.41} & {0.45} & {0.48} & {0.45} & {0.44} & {0.46} & {0.54} & {0.51} \\
 & none & {0.37} & {0.41} & {0.45} & {0.48} & {0.45} & {0.44} & {0.46} & {0.54} & {0.51} \\
 & percentile & {0.36} & {0.42} & {0.43} & {0.49} & {0.45} & {0.46} & {0.47} & {0.55} & {0.51} \\
\cline{1-11}
\bottomrule
\end{tabular}



Unnamed: 0_level_0,limit,100,200,300,400,500,600,700,800,900
remove_zero_acc,agg_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
False,minmax,0.37,0.41,0.44,0.48,0.47,0.44,0.46,0.52,0.5
False,none,0.37,0.41,0.44,0.48,0.47,0.44,0.46,0.52,0.5
False,percentile,0.35,0.4,0.44,0.5,0.47,0.45,0.48,0.55,0.52
True,minmax,0.37,0.41,0.45,0.48,0.45,0.44,0.46,0.54,0.51
True,none,0.37,0.41,0.45,0.48,0.45,0.44,0.46,0.54,0.51
True,percentile,0.36,0.42,0.43,0.49,0.45,0.46,0.47,0.55,0.51


In [3]:
import pandas as pd

pd.set_option('display.float_format', '{:.2f}'.format)
pd.set_option('display.max_rows', 500)

def load_data(model):
    d1 = pd.read_csv(f'results_{model}.csv', index_col=0)
    d2 = pd.read_csv(f'results_{model}_scaled.csv', index_col=0)
    data = pd.concat([d1, d2])
    return data

def prep_data(data, model_name):
    data['agg_type'] = data['agg_type'].fillna('none')
    data = data[data.use_cifar | data.use_unseen]
    data.set_index(['dataset', 'seed', 'use_cifar', 'use_unseen', 'train_current', 'remove_zero_acc', 'limit', 'agg_type'], inplace=True)
    data = data['tau']
    return data

def run_type(row):
    if not row['use_cifar']:
        return row['model'].upper()
    if not row['use_unseen']:
        return f'{row["model"].upper()} + Cifar10'
    return f'{row["model"].upper()} + 8 Others'

xdata = prep_data(pd.read_csv('results_transfer_xgb.csv'), 'xgb')
tdata = prep_data(pd.read_csv('results_transfer_rf.csv'), 'rf')

table = xdata.unstack(['limit', 'remove_zero_acc', 'agg_type']).mean().unstack(['limit']) #or tdata for RF
print(table.to_latex(float_format='{%.2f}'))
table

\begin{tabular}{llrrrrrrrrr}
\toprule
 & limit & 100 & 200 & 300 & 400 & 500 & 600 & 700 & 800 & 900 \\
remove_zero_acc & agg_type &  &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{False} & minmax & {0.29} & {0.31} & {0.29} & {0.32} & {0.32} & {0.31} & {0.33} & {0.35} & {0.35} \\
 & none & {0.29} & {0.31} & {0.26} & {0.31} & {0.31} & {0.30} & {0.31} & {0.33} & {0.33} \\
 & percentile & {0.32} & {0.34} & {0.31} & {0.36} & {0.37} & {0.35} & {0.36} & {0.40} & {0.40} \\
\cline{1-11}
\multirow[t]{3}{*}{True} & minmax & {0.35} & {0.35} & {0.31} & {0.35} & {0.34} & {0.34} & {0.35} & {0.38} & {0.40} \\
 & none & {0.32} & {0.32} & {0.27} & {0.32} & {0.32} & {0.31} & {0.31} & {0.34} & {0.36} \\
 & percentile & {0.34} & {0.35} & {0.31} & {0.36} & {0.37} & {0.35} & {0.36} & {0.41} & {0.40} \\
\cline{1-11}
\bottomrule
\end{tabular}



Unnamed: 0_level_0,limit,100,200,300,400,500,600,700,800,900
remove_zero_acc,agg_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
False,minmax,0.29,0.31,0.29,0.32,0.32,0.31,0.33,0.35,0.35
False,none,0.29,0.31,0.26,0.31,0.31,0.3,0.31,0.33,0.33
False,percentile,0.32,0.34,0.31,0.36,0.37,0.35,0.36,0.4,0.4
True,minmax,0.35,0.35,0.31,0.35,0.34,0.34,0.35,0.38,0.4
True,none,0.32,0.32,0.27,0.32,0.32,0.31,0.31,0.34,0.36
True,percentile,0.34,0.35,0.31,0.36,0.37,0.35,0.36,0.41,0.4


In [4]:
import pandas as pd

pd.set_option('display.float_format', '{:.2f}'.format)
pd.set_option('display.max_rows', 500)

def load_data(model):
    d1 = pd.read_csv(f'results_{model}.csv', index_col=0)
    d2 = pd.read_csv(f'results_{model}_scaled.csv', index_col=0)
    data = pd.concat([d1, d2])
    return data

def prep_data(data, model_name):
    data['model'] = model_name
    data['run_type'] = data.apply(run_type, axis=1)
    data['agg_type'] = data['agg_type'].fillna('none')
    data = data[data.agg_type == 'percentile']
    data = data[data.remove_zero_acc]
    data = data[data.use_cifar | (~data.use_unseen & ~data.use_cifar)]
    data.set_index(['model', 'dataset', 'seed', 'use_cifar', 'use_unseen', 'train_current', 'remove_zero_acc', 'limit', 'agg_type', 
                'run_type'], inplace=True)
    data = data['tau']
    return data

def run_type(row):
    if not row['use_cifar']:
        return row['model'].upper()
    if not row['use_unseen']:
        return f'{row["model"].upper()} + Cifar10'
    return f'{row["model"].upper()} + 8 Others'

xdata = prep_data(pd.read_csv('results_transfer_xgb.csv'), 'xgb')
tdata = prep_data(pd.read_csv('results_transfer_rf.csv'), 'rf')

table = tdata.unstack('limit')
table.index = table.index.droplevel(['model', 'use_cifar', 'use_unseen', 'remove_zero_acc','agg_type'])
table = table.mean(axis=1).unstack('seed').mean(axis=1).unstack('dataset')
table['avg'] = table.mean(axis=1)
table.index = table.index.swaplevel(0,1)
table = table.loc[[(           'RF',  True),
            ('RF + Cifar10', True),
            ('RF + Cifar10',  False),
            ('RF + 8 Others', True),
            ('RF + 8 Others',  False)]]

cols = "AddNIST Language MultNIST CIFARTile Gutenberg Isabella GeoClassing Chesseract".lower().split() + ['avg']

print(table[cols].to_latex(float_format='{%.3f}'))

table = xdata.unstack('limit')
table.index = table.index.droplevel(['model', 'use_cifar', 'use_unseen', 'remove_zero_acc','agg_type'])
table = table.mean(axis=1).unstack('seed').mean(axis=1).unstack('dataset')
table['avg'] = table.mean(axis=1)
table.index = table.index.swaplevel(0,1)
table = table.loc[[(           'XGB',  True),
            ('XGB + Cifar10', True),
            ('XGB + Cifar10',  False),
            ('XGB + 8 Others', True),
            ('XGB + 8 Others',  False)]]

print(table[cols].to_latex(float_format='{%.3f}'))

\begin{tabular}{llrrrrrrrrr}
\toprule
 & dataset & addnist & language & multnist & cifartile & gutenberg & isabella & geoclassing & chesseract & avg \\
run_type & train_current &  &  &  &  &  &  &  &  &  \\
\midrule
RF & True & {0.246} & {0.551} & {0.393} & {0.360} & {0.550} & {0.472} & {0.521} & {0.561} & {0.457} \\
\cline{1-11}
\multirow[t]{2}{*}{RF + Cifar10} & True & {0.250} & {0.494} & {0.367} & {0.383} & {0.480} & {0.461} & {0.489} & {0.500} & {0.428} \\
 & False & {0.234} & {0.272} & {0.280} & {0.271} & {0.327} & {0.032} & {0.291} & {0.332} & {0.255} \\
\cline{1-11}
\multirow[t]{2}{*}{RF + 8 Others} & True & {0.263} & {0.437} & {0.362} & {0.368} & {0.471} & {0.480} & {0.492} & {0.526} & {0.425} \\
 & False & {0.246} & {0.286} & {0.304} & {0.297} & {0.397} & {0.187} & {0.312} & {0.373} & {0.300} \\
\cline{1-11}
\bottomrule
\end{tabular}

\begin{tabular}{llrrrrrrrrr}
\toprule
 & dataset & addnist & language & multnist & cifartile & gutenberg & isabella & geoclassing & chesseract &

In [5]:
data = pd.read_csv('leave_one_out_transfer.csv', index_col=0)
data = data.set_index(['model', 'target', 'agg_type'])
data = data.unstack('agg_type').unstack('model')
data.columns = data.columns.swaplevel(2,0)
data = data.stack('agg_type', future_stack=True)
data = data.sort_index(level=1)
data = data[sorted(data.columns)]
table = data.xs('percentile', level=1)
print(table.to_latex(float_format='{%.3f}'))
table

\begin{tabular}{lrrrr}
\toprule
model & \multicolumn{2}{r}{rf} & \multicolumn{2}{r}{xgb} \\
 & rho & tau & rho & tau \\
target &  &  &  &  \\
\midrule
addnist & {0.671} & {0.500} & {0.551} & {0.409} \\
chesseract & {0.599} & {0.423} & {0.626} & {0.445} \\
cifartile & {0.558} & {0.374} & {0.557} & {0.388} \\
geoclassing & {0.693} & {0.504} & {0.667} & {0.475} \\
gutenberg & {0.816} & {0.643} & {0.839} & {0.669} \\
isabella & {0.308} & {0.211} & {0.278} & {0.187} \\
language & {0.623} & {0.450} & {0.622} & {0.456} \\
multnist & {0.722} & {0.533} & {0.749} & {0.561} \\
\bottomrule
\end{tabular}



model,rf,rf,xgb,xgb
Unnamed: 0_level_1,rho,tau,rho,tau
target,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
addnist,0.67,0.5,0.55,0.41
chesseract,0.6,0.42,0.63,0.44
cifartile,0.56,0.37,0.56,0.39
geoclassing,0.69,0.5,0.67,0.48
gutenberg,0.82,0.64,0.84,0.67
isabella,0.31,0.21,0.28,0.19
language,0.62,0.45,0.62,0.46
multnist,0.72,0.53,0.75,0.56


In [6]:
print(data.loc[[('addnist', 'minmax'), ('cifartile', 'minmax'), ('addnist', 'percentile'), ('cifartile', 'percentile')]].to_latex(float_format='{%.3f}'))

\begin{tabular}{llrrrr}
\toprule
 & model & \multicolumn{2}{r}{rf} & \multicolumn{2}{r}{xgb} \\
 &  & rho & tau & rho & tau \\
target & agg_type &  &  &  &  \\
\midrule
addnist & minmax & {0.599} & {0.433} & {0.541} & {0.397} \\
\cline{1-6}
cifartile & minmax & {0.494} & {0.327} & {0.563} & {0.393} \\
\cline{1-6}
addnist & percentile & {0.671} & {0.500} & {0.551} & {0.409} \\
\cline{1-6}
cifartile & percentile & {0.558} & {0.374} & {0.557} & {0.388} \\
\cline{1-6}
\bottomrule
\end{tabular}



In [7]:
cols = 'CIFAR10 AddNIST Language MultNIST CIFARTile Gutenberg Isabella GeoClassing Chesseract'.lower().split()
print(cols)
pd.set_option('display.float_format', '{:.3f}'.format)

data = pd.read_csv('one_to_one_transfer_xgb.csv', index_col=0)
data.set_index(['source', 'target'], inplace=True)
data = data['tau']
data = data.unstack('target')[cols]
data = data.loc[['cifar10all']+cols]
data['avg'] = data.mean(axis=1)
print(data.to_latex(float_format='{%.3f}'))

['cifar10', 'addnist', 'language', 'multnist', 'cifartile', 'gutenberg', 'isabella', 'geoclassing', 'chesseract']
\begin{tabular}{lrrrrrrrrrr}
\toprule
target & cifar10 & addnist & language & multnist & cifartile & gutenberg & isabella & geoclassing & chesseract & avg \\
source &  &  &  &  &  &  &  &  &  &  \\
\midrule
cifar10all & {0.639} & {0.466} & {0.349} & {0.459} & {0.381} & {0.599} & {0.281} & {0.174} & {0.405} & {0.417} \\
cifar10 & {0.535} & {0.187} & {0.365} & {0.409} & {0.391} & {0.506} & {0.254} & {0.354} & {0.332} & {0.370} \\
addnist & {0.488} & {0.596} & {0.337} & {0.489} & {0.423} & {0.503} & {0.248} & {0.261} & {0.421} & {0.418} \\
language & {0.443} & {0.125} & {0.277} & {0.216} & {0.087} & {0.205} & {0.255} & {0.306} & {0.386} & {0.255} \\
multnist & {0.495} & {0.330} & {0.366} & {0.416} & {0.301} & {0.450} & {0.183} & {0.153} & {0.304} & {0.333} \\
cifartile & {0.523} & {0.508} & {0.247} & {0.368} & {0.280} & {0.437} & {-0.082} & {0.105} & {0.222} & {0.290} \\
guten

In [8]:
cols = 'CIFAR10 AddNIST Language MultNIST CIFARTile Gutenberg Isabella GeoClassing Chesseract'.lower().split()
print(cols)
pd.set_option('display.float_format', '{:.3f}'.format)

data = pd.read_csv('one_to_one_transfer_rf.csv', index_col=0)
data.set_index(['source', 'target'], inplace=True)
data = data['tau']
data = data.unstack('target')[cols]
data = data.loc[['cifar10all']+cols]
data['avg'] = data.mean(axis=1)
print(data.to_latex(float_format='{%.3f}'))

['cifar10', 'addnist', 'language', 'multnist', 'cifartile', 'gutenberg', 'isabella', 'geoclassing', 'chesseract']
\begin{tabular}{lrrrrrrrrrr}
\toprule
target & cifar10 & addnist & language & multnist & cifartile & gutenberg & isabella & geoclassing & chesseract & avg \\
source &  &  &  &  &  &  &  &  &  &  \\
\midrule
cifar10all & {0.648} & {0.516} & {0.326} & {0.447} & {0.374} & {0.525} & {0.239} & {0.284} & {0.424} & {0.420} \\
cifar10 & {0.582} & {0.370} & {0.380} & {0.434} & {0.379} & {0.586} & {0.236} & {0.375} & {0.414} & {0.417} \\
addnist & {0.526} & {0.577} & {0.342} & {0.478} & {0.336} & {0.577} & {0.163} & {0.230} & {0.412} & {0.405} \\
language & {0.486} & {0.229} & {0.200} & {0.128} & {0.126} & {0.133} & {0.268} & {0.219} & {0.430} & {0.246} \\
multnist & {0.497} & {0.280} & {0.401} & {0.445} & {0.307} & {0.500} & {0.241} & {0.307} & {0.335} & {0.368} \\
cifartile & {0.553} & {0.575} & {0.236} & {0.365} & {0.332} & {0.495} & {-0.056} & {0.219} & {0.257} & {0.331} \\
guten