# Imports

In [None]:
import numpy as np
import pandas as pd
import json

# Create random debug data

## Detailed score data as CSV

In [None]:
metrics = ['bleu1', 'bleu2', 'bleu3', 'bleu4']
n_samples_per_val = 100
valorations = 4
data = np.random.random(size=(n_samples_per_val * valorations, len(metrics)))
df = pd.DataFrame(data, columns=metrics)
df.head(2)

In [None]:
n = n_samples_per_val
gt = ['positive'] * n * 2 + ['negative'] * n * 2
gen = ['positive'] * n + ['negative'] * n + ['positive'] * n + ['negative'] * n

In [None]:
df = df.assign(**{'gt': gt, 'gen': gen})
df.head(2)

In [None]:
df.to_csv('~/debug-data.csv', index=False)

In [None]:
df.head()

# Real examples to CSV

In [None]:
from itertools import product

In [None]:
%run -n nlp_in_chexpert_groups.py
%run ../../datasets/common/constants.py

In [None]:
outdir = os.path.join(_EXP_FOLDER, 'csv_exports')

In [None]:
abn_to_writable = lambda x: ABN_SHORTCUTS[x]

In [None]:
NLP_METRICS = ['bleu1', 'bleu2', 'bleu3', 'bleu4', 'rouge', 'cider']
VAL_TO_NAME = {-2: 'none', -1: 'unc', 0: 'neg', 1: 'pos'}

In [None]:
dataset_name = 'mimic'
exp_by_abn = load_experiments(dataset_name)
len(exp_by_abn)

In [None]:
groups2 = (0, 1)
groups4 = (-2, -1, 0, 1)

def create_base_df(abnormality, groups):
    if groups == 2:
        groups = groups2
    elif groups == 4:
        groups = groups4
    data = [
        (abn_to_writable(abnormality), VAL_TO_NAME[g_gt], VAL_TO_NAME[g_gen])
        for g_gt, g_gen in product(groups, groups)
    ]
    df = pd.DataFrame(data, columns=['abnormality', 'gt', 'gen'])
    return df

In [None]:
def get_metric_col_name(metric_name, metric_i):
    if metric_name == 'cider-IDF':
        return 'cider'
    if metric_name == 'bleu':
        return f'bleu{metric_i+1}'
    return metric_name

In [None]:
def create_summary_df(exp, abnormality):
    abn_dfs = {
        2: create_base_df(abnormality, 2), # Chex-2
        4: create_base_df(abnormality, 4), # Chex-4
    }
    
    for result in exp.results:
        groups = result.groups

        n_metrics, n, m = result.cube.shape
        l = len(result.dists.keys())
        assert n == m and m*m == l and m == len(groups), (n, m, l, groups)

        for metric_i in range(n_metrics):
            df_data = []
            cube = result.cube[metric_i] # shape: n_groups, n_groups
            
            cube = cube.astype(np.float16)
            
            for (i, group_gen), (j, group_gt) in product(enumerate(groups), enumerate(groups)):
                df_data.append((
                    cube[j, i], # metric value
                    VAL_TO_NAME[group_gt], # gt valuation
                    VAL_TO_NAME[group_gen], # generated valuation
                ))

            metric_name = get_metric_col_name(result.metric, metric_i)
            right_df = pd.DataFrame(df_data, columns=[metric_name, 'gt', 'gen'])

            df = abn_dfs[len(groups)]
            abn_dfs[len(groups)] = df.merge(right_df, how='left', on=['gt', 'gen'])
    return abn_dfs

In [None]:
def experiments_to_df(df_extractor):
    cols = ['abnormality', 'gt', 'gen'] + NLP_METRICS
    main_dfs = {
        2: pd.DataFrame(columns=cols),
        4: pd.DataFrame(columns=cols),
    }
    for abnormality in CHEXPERT_DISEASES[1:]:
        exp = exp_by_abn[abnormality]
        abn_dfs = df_extractor(exp, abnormality)

        for chex_k in sorted(list(main_dfs.keys())):
            d1 = main_dfs[chex_k]
            d2 = abn_dfs[chex_k]
            n1, n2 = len(d1), len(d2)
            main_dfs[chex_k] = pd.concat((d1, d2), axis=0, ignore_index=True)

            n3 = len(main_dfs[chex_k])
            assert n3 == n1 + n2, (n1, n2, n3)

    return main_dfs

## Concat summary scores

In [None]:
main_dfs = experiments_to_df(create_summary_df)
len(main_dfs[2]), len(main_dfs[4])

In [None]:
main_dfs[2].head(20)

In [None]:
for chex_k, df in main_dfs.items():
    fname = os.path.join(outdir, f'summaries-{dataset_name}-chex{chex_k}.csv')
    df.to_csv(fname, index=False)

## Sample scores

In [None]:
class ResultsByGroup:
    def __init__(self, n_groups):
        # self.n_groups = n_groups
        groups = (0, 1) if n_groups == 2 else (-2, -1, 0, 1)
        
        self._keys = list(product(groups, groups))
        self.scores_arr = {
            k: None
            for k in self._keys
        }
        
        self.metrics = {
            k: []
            for k in self._keys
        }
        
    def add_metrics(self, metrics):
        # metrics shape: n_metrics
        self.metrics = [] # TODO
        
    def add_item(self, key, scores, metrics):
        # scores shape: n_metrics, n_samples
        # metrics shape: n_metrics
        if scores.ndim == 1:
            scores = np.expand_dims(scores, 0)
        
        assert len(metrics) == scores.shape[0], (len(metrics), scores.shape)

        prev_arr = self.scores_arr[key]
        if prev_arr is None:
            self.scores_arr[key] = scores
        else:
            self.scores_arr[key] = np.concatenate((prev_arr, scores), axis=0)
            
        seen_metrics = self.metrics[key]
        assert all(m not in seen_metrics for m in metrics), (metrics, seen_metrics)
        self.metrics[key] += metrics
        
    def to_df(self, key):
        columns = self.metrics[key]
        scores = self.scores_arr[key].transpose() # shape: n_samples, n_metrics
        
        scores = scores.astype(np.float16)
        
        n_samples = scores.shape[0]
        
        df = pd.DataFrame(scores, columns=columns)
        
        gt, gen = key
        gt, gen = VAL_TO_NAME[gt], VAL_TO_NAME[gen]
        df['gt'] = gt
        df['gen'] = gen
        
        return df
    
    def iter_dfs(self):
        return [
            self.to_df(key)
            for key in self._keys
        ]
            

def create_samples_df(exp, abnormality):
    scores_by_group = {
        2: ResultsByGroup(2),
        4: ResultsByGroup(4),
    }
    
    # results = sorted(exp.results, key=sort_metrics) # bleu, rouge, CIDEr, always in that order
    results = exp.results
    for result in results:
        groups = result.groups
        
        if len(groups) not in scores_by_group:
            continue

        n_metrics, n, m = result.cube.shape
        l = len(result.dists.keys())
        assert n == m and m*m == l and m == len(groups), (n, m, l, groups)

        for (i, group_gen), (j, group_gt) in product(enumerate(groups), enumerate(groups)):
            samples_square = result.dists[(group_gt, group_gen)] # shape: n_metrics, n_samples
            
            metrics = [
                get_metric_col_name(result.metric, metric_i)
                for metric_i in range(n_metrics)
            ]
            
            key = (group_gt, group_gen)
            scores_by_group[len(groups)].add_item(key, samples_square, metrics)

    chex2 = scores_by_group[2]
    chex4 = scores_by_group[4]
    
    chex2 = pd.concat(chex2.iter_dfs(), axis=0, ignore_index=True)
    chex4 = pd.concat(chex4.iter_dfs(), axis=0, ignore_index=True)

    chex2['abnormality'] = abn_to_writable(abnormality)
    chex4['abnormality'] = abn_to_writable(abnormality)
    
    return {
        2: chex2,
        4: chex4,
    }

In [None]:
sample_dfs = experiments_to_df(create_samples_df)
len(sample_dfs[2]), len(sample_dfs[4])

In [None]:
for chex_k, df in sample_dfs.items():
    fname = os.path.join(outdir, f'samples-{dataset_name}-chex{chex_k}.csv')
    df.to_csv(fname, index=False)