In [None]:
import pandas as pd
import os
import numpy as np
import itertools
from utils.hp_functions import get_search_space, get_search_space_with_radius
import matplotlib.pyplot as plt

In [None]:
def collect_results(logs_folder, method, dset, net, task, seed, search_space):
    entries = []
    total = 0
    completed = 0
    
    for hp_params in itertools.product(*[iter(search_space[key]) for key in search_space.keys()]):
        if method == 'ar':
            if hp_params[1] != -hp_params[2]:
                continue
        total += 1
        path_results = os.path.join(logs_folder, method, net, dset, task)
        for ix, key in enumerate(search_space.keys()):
            path_results = os.path.join(path_results, f'{key}_{hp_params[ix]}')
        path_results = os.path.join(path_results, f'seed_{seed}', 'run_0', 'results.npy')
        if os.path.exists(path_results):
            completed += 1
            results = np.load(path_results, allow_pickle=True).item()
            temp = {}
            for ix, key in enumerate(search_space.keys()):
                temp[key] = hp_params[ix]
                
                
            for metric in ['t_acc', '1shot_acc', '1shot_10crop_acc', '3shot_acc', '3shot_10crop_acc', '25random_acc',
                           '50random_acc', '50random_10crop_acc', '100random_acc', '100random_10crop_acc']:
                if metric in results.keys():
                    temp[metric] = np.array(results[metric])[-1]*100
            
            temp['final_loss'] = np.mean(np.array(results['total_loss'])[-100:])
            temp['s_acc'] = np.array(results['s_acc'])[-1]*100
            temp['ent'] = np.array(results['ent'])[-1]
            
            temp['ent'] = np.array(results['ent'])[-1]
            temp['dev_svm'] = np.array(results['dev_svm'])[-1]
            temp['snd'] = np.array(results['snd'])[-1]
            
            df = pd.DataFrame(temp, index=[0])
            entries.append(df)
        else:
            if os.path.exists(path_results[:-11]):
                completed += 1
    print(f'   {method} {completed}/{total} {completed/total*100:1.2f}')
    return entries

In [None]:
logs_folder = 'logs_hp_search'
net = 'ResNet50'
seed = 2020

results = {}
for dset, task in zip(['office-home', 'visda'], ['AC', 'TV']):
    print(dset)
    results[dset] = {}
    for method in ['ba3us', 'pada', 'safn', 'ar', 'jumbot', 'mpot']:
        search_space = get_search_space(method)
        entries = collect_results(logs_folder, method, dset, net, task, seed, search_space)
        if len(entries)>0:
            results[dset][method] = pd.concat(entries, ignore_index=True)
        np.save('results/data_hp_search.npy', results)

In [None]:
logs_folder = 'logs_hp_search_nonlinear'
net = 'ResNet50'
seed = 2020

results = {}
for dset, task in zip(['office-home'], ['AC']):
    print(dset)
    results[dset] = {}
    for method in ['safn']:
        search_space = get_search_space(method)
        entries = collect_results(logs_folder, method, dset, net, task, seed, search_space)
        if len(entries)>0:
            results[dset][method] = pd.concat(entries, ignore_index=True)
np.save('results/data_hp_search_nonlinear.npy', results)

In [None]:
logs_folder = 'logs_hp_search_with_radius'
net = 'ResNet50'
seed = 2020

results = {}
for dset, task in zip(['office-home', 'visda'], ['AC', 'TV']):
    print(dset)
    results[dset] = {}
    for method in ['source_only_plus', 'ar']:
        search_space = get_search_space_with_radius(method)
        entries = collect_results(logs_folder, method, dset, net, task, seed, search_space)
        if len(entries)>0:
            results[dset][method] = pd.concat(entries, ignore_index=True)
np.save('results/data_hp_search_radius.npy', results)

#### Counting how many models are trained for each method

In [None]:
total = 0
for method in ['pada', 'safn', 'ba3us', 'ar', 'jumbot', 'mpot']:
    space = get_search_space(method)
    count = 1
    for key in space.keys():
        count *= len(space[key])
    print(f'For {method} {count} models are trained.')
    total += count
print(f'A total of {total} models are trained.')

In [None]:
def get_source_only_acc(logs_folder, dataset):
    table = {}
    seeds = [2020, 2021, 2022]
    net = 'ResNet50'
    if dataset == 'office-home':
        task = 'AC'
    elif dataset == 'visda':
        task = 'TV'
    s_acc = []
    for seed in seeds:
        output_dir = os.path.join(logs_folder, 'source_only_plus', net, dataset, task)
        path_results = os.path.join(output_dir, f"seed_{seed}", 'run_0', 'results.npy')
        results = np.load(path_results, allow_pickle=True).item()
        s_acc.append(np.array(results['s_acc'])[-1])
    return np.mean(s_acc)*100

In [None]:
# For ent, dev_svm and ent, first select the runs for which the source accuracy is above thr. Then select the one
# with the best metric.
def gather_hp_heuristic(df, method, s_acc_thr, radius=False):
    hp = get_hp(method)
    if radius:
        hp += ['radius']
    r = {}
    acc = {}
    # Oracle Selection
    r['oracle'] = {}
    for key in hp:
        r['oracle'][key] = df[method].sort_values('t_acc').iloc[-1][key]
    acc['oracle'] = df[method].sort_values('t_acc').iloc[-1]['t_acc']
    
    # Acc selection (bigger the better)
    for metric in ['1shot_acc', '25random_acc', '50random_acc', '100random_acc', 's_acc']:
        r[metric] = {}
        select = df[method][metric].max() == df[method][metric]
        idx = df[method][select]['s_acc'].argmax()
        for key in hp:
            r[metric][key] = df[method][select].iloc[idx][key]
        acc[metric] = df[method][select].iloc[idx]['t_acc']

    # Ent and dev_svm selection (smaller the better)
    for metric in ['ent', 'dev_svm']:
        r[metric] = {}
        for key in hp:
            r[metric][key] = df[method][df[method]['s_acc']>s_acc_thr].sort_values(metric).iloc[0][key]
        acc[metric] = df[method][df[method]['s_acc']>s_acc_thr].sort_values(metric).iloc[0]['t_acc']

    # SND selection (bigger the better)
    for metric in ['snd']:
        r[metric] = {}
        for key in hp:
            r[metric][key] = df[method][df[method]['s_acc']>s_acc_thr].sort_values(metric).iloc[-1][key]
        acc[metric] = df[method][df[method]['s_acc']>s_acc_thr].sort_values(metric).iloc[-1]['t_acc']
    return r, acc

# Select the hyper-parameters solely base on the best metric
# Runs where the final loss is >= 5 are removed
def gather_hp_best(df, method, radius=False):
    hp = get_hp(method)
    if radius:
        hp += ['radius']
    r = {}
    acc = {}
    if len(hp) == 0:
        for metric in ['oracle', '1shot_acc', '50random_acc', '100random_acc', 's_acc', 'ent', 'dev_svm', 'snd']:
            r[metric] = {}
        return r
    # Oracle Selection
    r['oracle'] = {}
    for key in hp:
        r['oracle'][key] = df[method].sort_values('t_acc').iloc[-1][key]
    acc['oracle'] = df[method].sort_values('t_acc').iloc[-1]['t_acc']

    # Acc selection (bigger the better)
    for metric in ['1shot_acc', '50random_acc', '100random_acc', 's_acc']:
        r[metric] = {}
        select = df[method][metric].max() == df[method][metric]
        idx = df[method][select]['s_acc'].argmax()
        for key in hp:
            r[metric][key] = df[method][select].iloc[idx][key]
        acc[metric] = df[method][select].iloc[idx]['t_acc']
        
    # Ent and dev_svm selection (smaller the better)
    for metric in ['ent', 'dev_svm']:
        r[metric] = {}
        select = df[method].sort_values(metric)['final_loss']<5
        for key in hp:
            r[metric][key] = df[method].sort_values(metric)[select].iloc[0][key]
        acc[metric] = df[method].sort_values(metric)[select].iloc[0]['t_acc']

    # SND selection (bigger the better)
    for metric in ['snd']:
        r[metric] = {}
        select = df[method].sort_values(metric)['final_loss']<5
        for key in hp:
            r[metric][key] = df[method].sort_values(metric)[select].iloc[-1][key]
        acc[metric] = df[method].sort_values(metric)[select].iloc[-1]['t_acc']    

    return r, acc

In [None]:
def get_hp(method):
    if (method == 'jumbot') or (method == 'mixunbot'):
        hp = ['tau', 'eta_1', 'eta_2', 'eta_3']
    elif method == 'ar':
        hp = ['rho0', 'up', 'low','ent_weight']
    elif method == 'ba3us':
        hp = ['cot_weight', 'ent_weight']
    elif method == 'mpot':
        hp = ['epsilon', 'eta_1', 'eta_2', 'mass']
    elif method == 'pada':
        hp = ['lambda']
    elif method == 'safn':
        hp = ['lambda', 'delta_r']
    elif method == 'source_only_plus':
        hp = []
    return hp

In [None]:
df = np.load('results/data_hp_search.npy', allow_pickle=True).item()
hp = {}
acc = {}
for dset in ['office-home', 'visda']:
    s_acc = get_source_only_acc('logs_hp_chosen', dset)
    thr = 0.9
    if dset == 'office-home':
        print(f'Accuracy on AC task for S. Only: {s_acc*thr:.2f}')
    else:
        print(f'Accuracy on TV task for S. Only: {s_acc*thr:.2f}')
    hp[dset] = {}
    acc[dset] = {}
    for method in df[dset].keys():
        if method in ['jumbot', 'mpot', 'ba3us', 'safn']:
            hp[dset][method], _ = gather_hp_heuristic(df[dset], method, s_acc*thr)
        else:
            hp[dset][method], _ = gather_hp_best(df[dset], method)
    hp[dset]['source_only_plus'] = gather_hp_best(df[dset], 'source_only_plus')
np.save('results/hp_chosen.npy', hp)

In [None]:
df = np.load('results/data_hp_search_radius.npy', allow_pickle=True).item()
hp = {}
acc = {}
for dset in ['office-home']:
    s_acc = get_source_only_acc('logs_hp_chosen', dset)
    thr = 0.9
    if dset == 'office-home':
        print(f'Accuracy on AC task for S. Only: {s_acc*thr:.2f}')
    else:
        print(f'Accuracy on TV task for S. Only: {s_acc*thr:.2f}')
    hp[dset] = {}
    acc[dset] = {}
    for method in df[dset].keys():
        if method in ['jumbot', 'mpot', 'ba3us', 'safn']:
            hp[dset][method], _ = gather_hp_heuristic(df[dset], method, s_acc*thr, radius=True)
        else:
            hp[dset][method], _ = gather_hp_best(df[dset], method, radius=True)
np.save('results/hp_chosen_radius.npy', hp)

In [None]:
df = np.load('results/data_hp_search_nonlinear.npy', allow_pickle=True).item()
hp = {}
acc = {}
for dset in ['office-home']:
    s_acc = get_source_only_acc('logs_hp_chosen', dset)
    thr = 0.9
    if dset == 'office-home':
        print(f'Accuracy on AC task for S. Only: {s_acc*thr:.2f}')
    else:
        print(f'Accuracy on TV task for S. Only: {s_acc*thr:.2f}')
    hp[dset] = {}
    acc[dset] = {}
    for method in df[dset].keys():
        if method in ['jumbot', 'mpot', 'ba3us', 'safn']:
            hp[dset][method], _ = gather_hp_heuristic(df[dset], method, s_acc*thr)
        else:
            hp[dset][method], _ = gather_hp_best(df[dset], method)
    hp[dset]['source_only_plus'] = gather_hp_best(df[dset], 'source_only_plus')
np.save('results/hp_chosen_nonlinear.npy', hp)

# Table 4

In [None]:
df = np.load('results/data_hp_search.npy', allow_pickle=True).item()
acc_heuristic = {}
acc_best = {}
for dataset in ['office-home', 'visda']:
    s_acc = get_source_only_acc('logs_hp_chosen', dataset)
    thr = 0.9
    acc_heuristic[dataset] = {}
    acc_best[dataset] = {}
    for method in df[dataset].keys():
        _, acc_heuristic[dataset][method] = gather_hp_heuristic(df[dataset], method, s_acc*thr)
        _, acc_best[dataset][method] = gather_hp_best(df[dataset], method)

In [None]:
methods = ['ba3us', 'jumbot', 'mpot', 'safn']
metrics = ['ent', 'dev_svm', 'snd']
hp_metrics = {'s_acc': 's-acc', 'ent': 'ent', 'dev_svm': 'dev', 'snd': 'snd', '1shot_acc': '1-shot',
              '25random_acc': '25-rnd', '50random_acc': '50-rnd', '100random_acc': '100-rnd', 'oracle': 'oracle'}
table = ['\\begin{table}[h]\\centering']
table.append('\\resizebox{\\textwidth}{!}{')
temp = '\\begin{tabular}{c'
for i in range(len(methods)*len(metrics)+1):
    temp += '@{\hskip 2pt}|@{\hskip 2pt}c'
temp += '}'
table.append(temp)
temp = '\\multirow{2}{*}{Dataset} & \\multirow{2}{*}{Variant}'
for method in methods:
    temp += ' & \\multicolumn{3}{|@{\hskip 2pt}c}{\\textsc{'+method+'}}'
temp += '\\\\'
table.append(temp)

temp = ' &'
for method in methods:
    for metric in metrics:
        temp += ' & \\textsc{'+hp_metrics[metric]+'}'
temp += '\\\\'
table.append(temp)
table.append('\\midrule')

for ix, dataset in enumerate(['office-home', 'visda']):
    if ix >0:
        table.append('\\midrule\\midrule')
    temp = '\\multirow{2}{*}{\\textsc{'+dataset+'}}'
    temp += ' & Naive'
    for method in methods:
        for metric in metrics:
            if acc_best[dataset][method][metric] >= acc_heuristic[dataset][method][metric]:
                temp += ' & \\textbf{'+f'{acc_best[dataset][method][metric]:.2f}'+'}'
            else:
                temp += f' & {acc_best[dataset][method][metric]:.2f}'
    temp += '\\\\'
    table.append(temp)
    temp = ' & Heuristic'
    for method in methods:
        for metric in metrics:
            if acc_heuristic[dataset][method][metric] >= acc_best[dataset][method][metric]:
                temp += ' & \\textbf{'+f'{acc_heuristic[dataset][method][metric]:.2f}'+'}'
            else:
                temp += f' & {acc_heuristic[dataset][method][metric]:.2f}'
    temp += '\\\\'
    table.append(temp)
table.append('\\end{tabular}}')
table.append('\\caption{Comparison between the naive model selection strategy and our heuristic approach. Accuracy on AC task for Office-Home and SR task for VisDA. Best results in \\textbf{bold}.}')
table.append('\\label{table:model_selection_heuristic}')
table.append('\\end{table}')
print('\n'.join(table))
print('\n')

# Table 10

In [None]:
hp = np.load('results/hp_chosen.npy', allow_pickle=True).item()
metrics = ['oracle', '1shot_acc', '50random_acc', '100random_acc', 's_acc', 'ent', 'dev_svm', 'snd']
dict_hp = {'lambda': '$\\lambda$', 'delta_r': '$\Delta r$', 'cot_weight': '$\lambda_{wce}$', 'ent_weight': '$\lambda_{ent}$',
 'rho0': '$\\rho_0$', 'up': '$A_{up}$', 'low': '$A_{low}$', 'tau': '$\\tau$', 'epsilon': '$\epsilon$', 'eta_1': '$\eta_1$',
 'eta_2': '$\eta_2$', 'eta_3': '$\eta_3$', 'mass': '$m$'}

table = ['\\begin{table}[h]\\centering', '\\resizebox{\\textwidth}{!}{']
temp = '\\begin{tabular}{c'
for i in range(len(metrics)+2):
    temp += '@{\hskip 2pt}|@{\hskip 2pt}c'
temp += '}'
table.append(temp)
temp = 'Method & Dataset & HP'
for metric in metrics:
    temp += ' & \\textsc{'+hp_metrics[metric]+'}'
temp += '\\\\'
table.append(temp)
# table.append('\\midrule')

datasets = ['office-home', 'visda']
methods = ['pada', 'safn', 'ba3us', 'ar', 'jumbot', 'mpot']
for method in methods:
    table.append('\\midrule\\midrule')
    for ix, dataset in enumerate(datasets):
        hp_names = hp[dataset][method]['oracle'].keys()
        for jx, name in enumerate(hp_names):
            if (ix == 0) & (jx == 0):
                temp = '\\multirow{'+str(len(datasets)*len(hp_names))+'}{*}{\\textsc{'+method+'}}'
            else:
                temp = ''
            if jx == 0:
#                 table.append('\\midrule\\midrule')
                if ix != 0:
                    temp += '\\cmidrule[0.5pt](l{-0.5ex}){2-11}\n'
                if len(hp_names)>1:
                    temp += ' & \\multirow{'+str(len(hp_names))+'}{*}{\\textsc{'+dataset+'}} & '
                else:
                    temp += ' & \\textsc{'+dataset+'} & '
            else:
                temp += ' & & '
#             temp += name.replace('_','-')
            temp += dict_hp[name]
            for metric in metrics:
                temp += f' & '+str(hp[dataset][method][metric][name])
            temp += '\\\\'
            table.append(temp)
table.append('\\end{tabular}}')
table.append('\\caption{Hyper-parameters selected for the different methods for each model selection strategy on both \\textsc{office-home} and \\textsc{visda}.}')
table.append('\\label{table:hp_chosen}')
table.append('\\end{table}')
print('\n'.join(table))
print('\n')