In [16]:
%load_ext autoreload
%autoreload 2
from typing import Set, List, Callable
import sys 
sys.path.append('.')
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from tqdm import tqdm

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
# mpl.rc('font', size=14)

# Count Analysis: 
## How many bad models (on target) are there in our experiments based on a specific measure?

**Measure**: Count how often a model achieves less than X percent of the source accuracy on target.

**Best result:** We have lots of bad models in our experiment and our method IWA performs well in this setting.  


**Procedure:**
1. load results table


### Load preprocessed results of all experiments
To perform this analysis, we must at first load all results. 
For an explanation of how to best preprocess, store and load experiment results, have a look at this repo: [https://github.com/maximilianmbeck/viz](https://github.com/maximilianmbeck/viz).

In [18]:
# load all results
results_file = './data/iclr_all_results_dfs.p'
with open(results_file, 'rb') as f:
    # results are stored as a nested dictionary
    results_dict = pickle.load(f)
results_dict.keys()

dict_keys(['MINI_DOMAIN_NET', 'WISDM', 'HAR', 'HHAR_SA', 'AMAZON_REVIEWS', 'EEG', 'MOONS'])

### Combine all accuracy tables into one big table

In [19]:
index_tuples = []
acc_df_list = []
for dataset, da_methods_dict in results_dict.items():
    for da_method, result_df_dict in tqdm(da_methods_dict.items(), desc=f'CA for {dataset}'):
        acc_df = result_df_dict['acc_df']
        index_tuples.append((dataset, da_method))
        acc_df_list.append(acc_df)
# combine all in one dataframe
acc_index = pd.MultiIndex.from_tuples(index_tuples, names=['dataset', 'da_method'])
acc_df = pd.concat(acc_df_list, axis=0, keys=acc_index)
acc_df = acc_df.reindex(sorted(acc_df.columns), axis=1)

CA for MINI_DOMAIN_NET: 100%|██████████| 11/11 [00:00<00:00, 182361.04it/s]
CA for WISDM: 100%|██████████| 11/11 [00:00<00:00, 200597.15it/s]
CA for HAR: 100%|██████████| 11/11 [00:00<00:00, 152268.46it/s]
CA for HHAR_SA: 100%|██████████| 11/11 [00:00<00:00, 66194.18it/s]
CA for AMAZON_REVIEWS: 100%|██████████| 11/11 [00:00<00:00, 111712.70it/s]
CA for EEG: 100%|██████████| 11/11 [00:00<00:00, 138550.58it/s]
CA for MOONS: 100%|██████████| 11/11 [00:00<00:00, 172154.27it/s]


In [20]:
acc_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0,0.0001,0.001,0.01,0.05,0.1,0.25,0.5,0.75,1,...,10,2,5,agg,dev,iwv,source_reg,target_confidence_reg,target_majority_reg,target_majority_vote
dataset,da_method,domains,seed,domain,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
MINI_DOMAIN_NET,HoMM,real_src-clipart_tgt,1,source,0.971487,0.973523,0.947047,0.926680,,0.947047,,,,0.942974,...,0.977597,,0.965377,0.967413,0.965377,0.965377,0.981670,0.969450,0.963340,0.959267
MINI_DOMAIN_NET,HoMM,real_src-clipart_tgt,1,target,0.557377,0.573770,0.565574,0.663934,,0.606557,,,,0.565574,...,0.631148,,0.540984,0.606557,0.540984,0.540984,0.549180,0.581967,0.573770,0.598361
MINI_DOMAIN_NET,HoMM,real_src-clipart_tgt,2,source,0.979633,0.963340,0.955193,0.955193,,0.967413,,,,0.963340,...,0.957230,,0.928717,0.965377,0.967413,0.967413,0.973523,0.971487,0.973523,0.963340
MINI_DOMAIN_NET,HoMM,real_src-clipart_tgt,2,target,0.590164,0.540984,0.590164,0.540984,,0.565574,,,,0.565574,...,0.590164,,0.581967,0.590164,0.565574,0.565574,0.565574,0.590164,0.581967,0.598361
MINI_DOMAIN_NET,HoMM,real_src-clipart_tgt,3,source,0.963340,0.957230,0.967413,0.973523,,0.963340,,,,0.973523,...,0.926680,,0.959267,0.971487,0.973523,0.963340,0.973523,0.965377,0.965377,0.965377
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MOONS,DSAN,0_src-1_tgt,1,target,1.000000,1.000000,0.977143,0.994286,0.982857,1.000000,1.0,1.0,1.0,1.000000,...,0.977143,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
MOONS,DSAN,0_src-1_tgt,2,source,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.0,1.0,1.0,1.000000,...,0.994286,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
MOONS,DSAN,0_src-1_tgt,2,target,0.988571,1.000000,1.000000,1.000000,1.000000,1.000000,1.0,1.0,1.0,1.000000,...,1.000000,1.0,1.000000,1.000000,0.988571,0.988571,1.000000,1.000000,1.000000,1.000000
MOONS,DSAN,0_src-1_tgt,3,source,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.0,1.0,1.0,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000


In [21]:
acc_df.dropna()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0,0.0001,0.001,0.01,0.05,0.1,0.25,0.5,0.75,1,...,10,2,5,agg,dev,iwv,source_reg,target_confidence_reg,target_majority_reg,target_majority_vote
dataset,da_method,domains,seed,domain,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
WISDM,HoMM,18_src-23_tgt,1,source,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,0.981132,0.981132,0.981132,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.0
WISDM,HoMM,18_src-23_tgt,1,target,0.800000,0.766667,0.633333,0.700000,0.733333,0.733333,0.600000,0.666667,0.566667,0.666667,...,0.666667,0.566667,0.566667,0.7,0.800000,0.800000,0.766667,0.800000,0.766667,0.7
WISDM,HoMM,18_src-23_tgt,2,source,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.000000,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.0
WISDM,HoMM,18_src-23_tgt,2,target,0.700000,0.800000,0.666667,0.766667,0.733333,0.666667,0.566667,0.566667,0.600000,0.600000,...,0.633333,0.533333,0.633333,0.7,0.700000,0.700000,0.733333,0.766667,0.766667,0.7
WISDM,HoMM,18_src-23_tgt,3,source,1.000000,0.981132,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,0.981132,0.981132,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MOONS,DSAN,0_src-1_tgt,1,target,1.000000,1.000000,0.977143,0.994286,0.982857,1.000000,1.000000,1.000000,1.000000,1.000000,...,0.977143,1.000000,1.000000,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.0
MOONS,DSAN,0_src-1_tgt,2,source,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,0.994286,1.000000,1.000000,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.0
MOONS,DSAN,0_src-1_tgt,2,target,0.988571,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.000000,1.0,0.988571,0.988571,1.000000,1.000000,1.000000,1.0
MOONS,DSAN,0_src-1_tgt,3,source,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.000000,1.0,1.000000,1.000000,1.000000,1.000000,1.000000,1.0


In [22]:
acc_df.columns, acc_df.index

(Index(['0', '0.0001', '0.001', '0.01', '0.05', '0.1', '0.25', '0.5', '0.75',
        '1', '1.5', '10', '2', '5', 'agg', 'dev', 'iwv', 'source_reg',
        'target_confidence_reg', 'target_majority_reg', 'target_majority_vote'],
       dtype='object'),
 MultiIndex([('MINI_DOMAIN_NET', 'HoMM',   'real_src-clipart_tgt', '1', ...),
             ('MINI_DOMAIN_NET', 'HoMM',   'real_src-clipart_tgt', '1', ...),
             ('MINI_DOMAIN_NET', 'HoMM',   'real_src-clipart_tgt', '2', ...),
             ('MINI_DOMAIN_NET', 'HoMM',   'real_src-clipart_tgt', '2', ...),
             ('MINI_DOMAIN_NET', 'HoMM',   'real_src-clipart_tgt', '3', ...),
             ('MINI_DOMAIN_NET', 'HoMM',   'real_src-clipart_tgt', '3', ...),
             ('MINI_DOMAIN_NET', 'HoMM', 'real_src-infograph_tgt', '1', ...),
             ('MINI_DOMAIN_NET', 'HoMM', 'real_src-infograph_tgt', '1', ...),
             ('MINI_DOMAIN_NET', 'HoMM', 'real_src-infograph_tgt', '2', ...),
             ('MINI_DOMAIN_NET', 'HoMM', 're

In [23]:
# acc_df.style

### Count number of bad models
**Criteria**: A model is bad if it reaches less than X percent of source performance on target

**Procedure**: 
For counting the bad models, we must iterate over every experiment (2 rows at once, as each experiment yields a source and a target accuracy row).
1. Iterate over experiments (grouped by seed)
2. Take all individual model results (skip NaN values)
3. Apply bad model criterion, column-wise (pass all rows). Assign 1 if model is bad, 0 otherwise.
4. Keep target accuracy of baselines as columnss

In [24]:
# parameters
individual_models = ['0', '0.0001', '0.001', '0.01', '0.05', '0.1', '0.25', '0.5', '0.75', '1', '1.5', '10', '2', '5']
aggregation_methods = [
    'agg', 'dev', 'iwv', 'source_reg', 'target_confidence_reg', 'target_majority_reg', 'target_majority_vote'
]
model_counts = ['n_bad_models', 'n_models', 'frac_bad_models']

aggregation_specifier = {
    'n_bad_models': 'sum',
    'n_models': 'sum',
    'frac_bad_models': 'mean',
    'agg': 'mean',
    'dev': 'mean',
    'iwv': 'mean',
    'source_reg': 'mean',
    'target_confidence_reg': 'mean',
    'target_majority_reg': 'mean',
    'target_majority_vote': 'mean'
}

In [25]:
def count_models_based_on_treshold_criterion(
    acc_df: pd.DataFrame,
    frac_threshold: float,
    individual_models: List[str] = [
        '0', '0.0001', '0.001', '0.01', '0.05', '0.1', '0.25', '0.5', '0.75', '1', '1.5', '10', '2', '5'
    ],
    aggregation_methods: List[str] = [
        'agg', 'dev', 'iwv', 'source_reg', 'target_confidence_reg', 'target_majority_reg', 'target_majority_vote'
    ]
) -> pd.DataFrame:
    from functools import partial

    def bad_model_filter(source_target_acc: pd.Series,
                         source_only_target_accuracy: float,
                         performance_percentage_threshold: float = 0.95) -> float:
        """Returns 1 if model is bad, 0 otherwise.

        Args:
            source_target_acc (pd.Series): source and target performance.
            source_only_target_accuracy (float): source only accuracy
            performance_percentage_threshold (float, optional): Threshold to determine when model is bad: target < `performance_percentage_threshold`*source. Defaults to 0.8.

        Returns:
            float: 1. if model is bad, 0 otherwise
        """
        assert len(source_target_acc) == 2
        if source_target_acc.isna().any():
            return float('NaN')
        return float(source_target_acc[1] < performance_percentage_threshold * source_only_target_accuracy)

    index_tuples = []
    data_rows = []
    for index, df in acc_df.groupby(level=['dataset', 'da_method', 'domains', 'seed']):
        source_only_target_accuracy = df.xs(key='target', level='domain')['0'].item()
        criterion = partial(bad_model_filter, performance_percentage_threshold=frac_threshold, source_only_target_accuracy=source_only_target_accuracy)
        # find bad models
        bad_models = df[individual_models].apply(criterion, axis=0)
        # count bad models
        n_bad_models = bad_models.sum()
        # count total models (num non NaN)
        n_models = bad_models.notna().sum()
        counts = pd.Series({
            'n_bad_models': n_bad_models,
            'n_models': n_models,
            'frac_bad_models': float(n_bad_models) / n_models
        })
        # select target accuracy of aggregation methods
        target_acc_agg = df[aggregation_methods].xs(key='target', level='domain').iloc[0]  # select only acc

        dr = pd.concat([bad_models, counts, target_acc_agg])
        index_tuples.append(index)
        data_rows.append(dr)

    index = pd.MultiIndex.from_tuples(index_tuples, names=['dataset', 'da_method', 'domains', 'seed'])
    df = pd.DataFrame(data=data_rows, index=index)
    return df

In [26]:
res_df = count_models_based_on_treshold_criterion(acc_df, frac_threshold=0.6)

In [27]:
res_df = count_models_based_on_treshold_criterion(acc_df, frac_threshold=0.6)
display(res_df.groupby(level=['dataset']).agg(aggregation_specifier).style)
res_df.agg(aggregation_specifier)

Unnamed: 0_level_0,n_bad_models,n_models,frac_bad_models,agg,dev,iwv,source_reg,target_confidence_reg,target_majority_reg,target_majority_vote
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AMAZON_REVIEWS,0.0,5544.0,0.0,0.787881,0.764435,0.772056,0.78934,0.785694,0.786152,0.787037
EEG,42.0,2310.0,0.018182,0.736567,0.660194,0.700137,0.717225,0.725043,0.728996,0.729223
HAR,74.0,2310.0,0.032035,0.835101,0.764646,0.773801,0.769129,0.832449,0.833333,0.840341
HHAR_SA,55.0,2310.0,0.02381,0.786932,0.721751,0.74579,0.721612,0.771226,0.768314,0.770606
MINI_DOMAIN_NET,13.0,1320.0,0.009848,0.530699,0.514537,0.513236,0.517864,0.525897,0.525289,0.526479
MOONS,6.0,462.0,0.012987,0.997229,0.980606,0.989264,0.988918,0.997403,0.980779,0.979567
WISDM,107.0,2310.0,0.04632,0.763716,0.725823,0.735742,0.737171,0.76464,0.767785,0.768284


n_bad_models               297.000000
n_models                 16566.000000
frac_bad_models              0.017473
agg                          0.755707
dev                          0.712857
iwv                          0.726249
source_reg                   0.730947
target_confidence_reg        0.750579
target_majority_reg          0.750873
target_majority_vote         0.752597
dtype: float64

In [28]:
res_df = count_models_based_on_treshold_criterion(acc_df, frac_threshold=0.7)
display(res_df.groupby(level=['dataset']).agg(aggregation_specifier).style)
res_df.agg(aggregation_specifier)

Unnamed: 0_level_0,n_bad_models,n_models,frac_bad_models,agg,dev,iwv,source_reg,target_confidence_reg,target_majority_reg,target_majority_vote
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AMAZON_REVIEWS,14.0,5544.0,0.002525,0.787881,0.764435,0.772056,0.78934,0.785694,0.786152,0.787037
EEG,106.0,2310.0,0.045887,0.736567,0.660194,0.700137,0.717225,0.725043,0.728996,0.729223
HAR,93.0,2310.0,0.04026,0.835101,0.764646,0.773801,0.769129,0.832449,0.833333,0.840341
HHAR_SA,124.0,2310.0,0.05368,0.786932,0.721751,0.74579,0.721612,0.771226,0.768314,0.770606
MINI_DOMAIN_NET,24.0,1320.0,0.018182,0.530699,0.514537,0.513236,0.517864,0.525897,0.525289,0.526479
MOONS,11.0,462.0,0.02381,0.997229,0.980606,0.989264,0.988918,0.997403,0.980779,0.979567
WISDM,179.0,2310.0,0.077489,0.763716,0.725823,0.735742,0.737171,0.76464,0.767785,0.768284


n_bad_models               551.000000
n_models                 16566.000000
frac_bad_models              0.032411
agg                          0.755707
dev                          0.712857
iwv                          0.726249
source_reg                   0.730947
target_confidence_reg        0.750579
target_majority_reg          0.750873
target_majority_vote         0.752597
dtype: float64

In [29]:
frac_threshold=0.8
res_df = count_models_based_on_treshold_criterion(acc_df, frac_threshold=frac_threshold)
print(f'Frac threshold:{}')
display(res_df.groupby(level=['dataset']).agg(aggregation_specifier).style)
res_df.agg(aggregation_specifier)

Unnamed: 0_level_0,n_bad_models,n_models,frac_bad_models,agg,dev,iwv,source_reg,target_confidence_reg,target_majority_reg,target_majority_vote
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AMAZON_REVIEWS,81.0,5544.0,0.01461,0.787881,0.764435,0.772056,0.78934,0.785694,0.786152,0.787037
EEG,185.0,2310.0,0.080087,0.736567,0.660194,0.700137,0.717225,0.725043,0.728996,0.729223
HAR,156.0,2310.0,0.067532,0.835101,0.764646,0.773801,0.769129,0.832449,0.833333,0.840341
HHAR_SA,229.0,2310.0,0.099134,0.786932,0.721751,0.74579,0.721612,0.771226,0.768314,0.770606
MINI_DOMAIN_NET,47.0,1320.0,0.035606,0.530699,0.514537,0.513236,0.517864,0.525897,0.525289,0.526479
MOONS,23.0,462.0,0.049784,0.997229,0.980606,0.989264,0.988918,0.997403,0.980779,0.979567
WISDM,317.0,2310.0,0.137229,0.763716,0.725823,0.735742,0.737171,0.76464,0.767785,0.768284


n_bad_models              1038.000000
n_models                 16566.000000
frac_bad_models              0.061133
agg                          0.755707
dev                          0.712857
iwv                          0.726249
source_reg                   0.730947
target_confidence_reg        0.750579
target_majority_reg          0.750873
target_majority_vote         0.752597
dtype: float64

In [30]:
res_df = count_models_based_on_treshold_criterion(acc_df, frac_threshold=0.85)
display(res_df.groupby(level=['dataset']).agg(aggregation_specifier).style)
res_df.agg(aggregation_specifier)

Unnamed: 0_level_0,n_bad_models,n_models,frac_bad_models,agg,dev,iwv,source_reg,target_confidence_reg,target_majority_reg,target_majority_vote
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AMAZON_REVIEWS,123.0,5544.0,0.022186,0.787881,0.764435,0.772056,0.78934,0.785694,0.786152,0.787037
EEG,278.0,2310.0,0.120346,0.736567,0.660194,0.700137,0.717225,0.725043,0.728996,0.729223
HAR,207.0,2310.0,0.08961,0.835101,0.764646,0.773801,0.769129,0.832449,0.833333,0.840341
HHAR_SA,308.0,2310.0,0.133333,0.786932,0.721751,0.74579,0.721612,0.771226,0.768314,0.770606
MINI_DOMAIN_NET,69.0,1320.0,0.052273,0.530699,0.514537,0.513236,0.517864,0.525897,0.525289,0.526479
MOONS,30.0,462.0,0.064935,0.997229,0.980606,0.989264,0.988918,0.997403,0.980779,0.979567
WISDM,422.0,2310.0,0.182684,0.763716,0.725823,0.735742,0.737171,0.76464,0.767785,0.768284


n_bad_models              1437.000000
n_models                 16566.000000
frac_bad_models              0.084800
agg                          0.755707
dev                          0.712857
iwv                          0.726249
source_reg                   0.730947
target_confidence_reg        0.750579
target_majority_reg          0.750873
target_majority_vote         0.752597
dtype: float64

In [31]:
res_df = count_models_based_on_treshold_criterion(acc_df, frac_threshold=0.9)
display(res_df.groupby(level=['dataset']).agg(aggregation_specifier).style)
res_df.agg(aggregation_specifier)

Unnamed: 0_level_0,n_bad_models,n_models,frac_bad_models,agg,dev,iwv,source_reg,target_confidence_reg,target_majority_reg,target_majority_vote
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AMAZON_REVIEWS,185.0,5544.0,0.033369,0.787881,0.764435,0.772056,0.78934,0.785694,0.786152,0.787037
EEG,387.0,2310.0,0.167532,0.736567,0.660194,0.700137,0.717225,0.725043,0.728996,0.729223
HAR,281.0,2310.0,0.121645,0.835101,0.764646,0.773801,0.769129,0.832449,0.833333,0.840341
HHAR_SA,415.0,2310.0,0.179654,0.786932,0.721751,0.74579,0.721612,0.771226,0.768314,0.770606
MINI_DOMAIN_NET,106.0,1320.0,0.080303,0.530699,0.514537,0.513236,0.517864,0.525897,0.525289,0.526479
MOONS,40.0,462.0,0.08658,0.997229,0.980606,0.989264,0.988918,0.997403,0.980779,0.979567
WISDM,613.0,2310.0,0.265368,0.763716,0.725823,0.735742,0.737171,0.76464,0.767785,0.768284


n_bad_models              2027.000000
n_models                 16566.000000
frac_bad_models              0.119987
agg                          0.755707
dev                          0.712857
iwv                          0.726249
source_reg                   0.730947
target_confidence_reg        0.750579
target_majority_reg          0.750873
target_majority_vote         0.752597
dtype: float64

In [32]:
# save table
# import dataframe_image as dfi
# dfi.export(mean_corr_per_dataset_dam, 'mean_corr_per_dataset_dam.png', dpi=300)