In [1]:
%load_ext autoreload
%autoreload 2
DEVICE = "cuda"
from domino import explore, DominoSlicer
import meerkat as mk
import pandas as pd
import numpy as np
from sklearn.metrics import balanced_accuracy_score, roc_auc_score, confusion_matrix


In [2]:
def run_domino_l3_isic24(arch="clip", dataset="imagenet", n_slices=15, weight=1, seed=42):
    if arch == "clip" and dataset == "imagenet":
        model_name = "clip"
    else:
        model_name = "{}_{}".format(arch, dataset)

    df_val = mk.read("../extracted_features/l3_isic24_ERM_val_{}_{}.mk".format(arch, dataset))
    df_test = mk.read("../extracted_features/l3_isic24_ERM_test_{}_{}.mk".format(arch, dataset))

    domino = DominoSlicer(
        y_log_likelihood_weight=0,
        y_hat_log_likelihood_weight=weight,
        n_mixture_components=n_slices,
        n_slices=n_slices,
        confusion_noise = 0.001,
        random_state=seed,
    )
    
    _ = domino.fit(data=df_val, embeddings="{}(img)".format(model_name), targets=None, pred_probs="pred_1")
    df_val["domino_slices"] = domino.predict(
        data=df_val, embeddings="{}(img)".format(model_name), targets=None, pred_probs="pred_1"
    )
    df_test["domino_slices"] = domino.predict(
        data=df_test, embeddings="{}(img)".format(model_name), targets=None, pred_probs="pred_1"
    )

    #sample['argmax_domino'] = sample['domino_slices'].map(lambda x: np.argmax(x))
    df_val['argmax_domino'] = df_val['domino_slices'].to_numpy().argmax(axis=1)
    df_test['argmax_domino'] = df_test['domino_slices'].to_numpy().argmax(axis=1)
    df_val['group'] = 0
    df_test['group'] = 0

    try:
        df_val = df_val.drop(['img', "{}(img)".format(model_name)])
    except:
        print('columns were already deleted')

    df_val['split'] = 'val'

    try:
        df_test = df_test.drop(['img', "{}(img)".format(model_name)])
    except:
        print('columns were already deleted')
    df_test['split'] = 'test'

    df_merged = pd.concat([df_val.to_pandas(), df_test.to_pandas()])
    #df_merged = df_merged.rename(columns={"Contamination": "sg_gt"})

    df = df_merged.copy()

    return df


In [2]:
def run_domino_l3(arch="clip", dataset="imagenet", n_slices=10, weight=20, seed=42):
    if arch == "clip" and dataset == "imagenet":
        model_name = "clip"
    else:
        model_name = "{}_{}".format(arch, dataset)

    df_val = mk.read("../extracted_features/l3_subgroup_ERM_val_{}_{}.mk".format(arch, dataset))
    df_test = mk.read("../extracted_features/l3_subgroup_ERM_test_{}_{}.mk".format(arch, dataset))

    domino = DominoSlicer(
        y_log_likelihood_weight=0,
        y_hat_log_likelihood_weight=weight,
        n_mixture_components=n_slices,
        n_slices=n_slices,
        confusion_noise = 0.001,
        random_state=seed,
    )
    
    _ = domino.fit(data=df_val, embeddings="{}(img)".format(model_name), targets=None, pred_probs="pred_1")
    df_val["domino_slices"] = domino.predict(
        data=df_val, embeddings="{}(img)".format(model_name), targets=None, pred_probs="pred_1"
    )
    df_test["domino_slices"] = domino.predict(
        data=df_test, embeddings="{}(img)".format(model_name), targets=None, pred_probs="pred_1"
    )

    #sample['argmax_domino'] = sample['domino_slices'].map(lambda x: np.argmax(x))
    df_val['argmax_domino'] = df_val['domino_slices'].to_numpy().argmax(axis=1)
    df_test['argmax_domino'] = df_test['domino_slices'].to_numpy().argmax(axis=1)
    df_val['group'] = 0
    df_test['group'] = 0

    try:
        df_val = df_val.drop(['img', "{}(img)".format(model_name)])
    except:
        print('columns were already deleted')

    df_val['split'] = 'val'

    try:
        df_test = df_test.drop(['img', "{}(img)".format(model_name)])
    except:
        print('columns were already deleted')
    df_test['split'] = 'test'

    df_merged = pd.concat([df_val.to_pandas(), df_test.to_pandas()])
    #df_merged = df_merged.rename(columns={"Contamination": "sg_gt"})

    df = df_merged.copy()

    return df


In [7]:
def run_domino_2artifacts(arch="clip", dataset="imagenet", n_slices=10, weight=20, bias_level="0.6"):
    
    if arch == "clip" and dataset == "imagenet":
        model_name = "clip"
    else:
        model_name = "{}_{}".format(arch, dataset)
    
    df_val = mk.read("../extracted_features/domino_hypertag_{}_{}_{}_{}.mk".format("val", bias_level, arch, dataset))
    df_test = mk.read("../extracted_features/domino_hypertag_{}_{}_{}_{}.mk".format("test", bias_level, arch, dataset))

    domino = DominoSlicer(
        y_log_likelihood_weight=0,
        y_hat_log_likelihood_weight=weight,
        n_mixture_components=n_slices,
        n_slices=n_slices,
        confusion_noise = 0.001,
        random_state=42,
    )

    _ = domino.fit(data=df_val, embeddings="{}(img)".format(model_name), targets=None, pred_probs="pred_1")
    df_val["domino_slices"] = domino.predict(
        data=df_val, embeddings="{}(img)".format(model_name), targets=None, pred_probs="pred_1"
    )
    df_test["domino_slices"] = domino.predict(
        data=df_test, embeddings="{}(img)".format(model_name), targets=None, pred_probs="pred_1"
    )

    df_val['argmax_domino'] = df_val['domino_slices'].to_numpy().argmax(axis=1)
    df_test['argmax_domino'] = df_test['domino_slices'].to_numpy().argmax(axis=1)

    try:
        df_val = df_val.drop(['img', "{}(img)".format(model_name)])
    except:
        print('columns were already deleted')
    df_val['split'] = 'val'
    
    try:
        df_test = df_test.drop(['img', "{}(img)".format(model_name)])
    except:
        print('columns were already deleted')
    df_test['split'] = 'test'
 
    df_merged = pd.concat([df_val.to_pandas(), df_test.to_pandas()])
    df = df_merged.copy()

    return df

In [3]:


def eval_subgroup_statistics(df):
    # Initialize a list to store the results
    results = []

    # Iterate over each subgroup and x_split combination
    for (subgroup, split_x), group in df.groupby(['argmax_domino', 'split']):
        # Calculate the number of samples in this subgroup
        num_samples = len(group)
        
        # Count positive and negative samples
        num_positives = group['gt'].sum()
        num_negatives = num_samples - num_positives
        
        # Calculate class ratio (positive / negative)
        class_ratio = num_positives / num_negatives if num_negatives > 0 else float('inf')
        
        # Calculate balanced accuracy
        balanced_acc = balanced_accuracy_score(group['gt'], group['pred_1'] >= 0.5)
        
        # Calculate contamination rate
        #if contaminated:
            #contaminated_1 = 
            #contaminated_2 = 
            #sg_gt = np.sum(group['sg_gt'].astype(float)) / len(group)
        sg_gt = group['group'].value_counts(normalize=True).to_dict()
        # if value > 0.8, it is considered as the ground truth
        final_sg_gt = max(sg_gt, key=sg_gt.get)
        final_sg_gt_perc = sg_gt[final_sg_gt]
        
        # get key of the max value
        #sg_gt = max(sg_gt, key=sg_gt.get)
        #tn, fp, fn, tp = confusion_matrix(group['gt'], group['pred_1'] >= 0.5).ravel()
        # Calculate sensitivity and specificity
        #sensitivity = tp / (tp + fn)
        #specificity = tn / (tn + fp)

        # Calculate AUC if both classes are present
        if len(group['gt'].unique()) > 1:  # AUC requires both classes
            auc = roc_auc_score(group['gt'], group['pred_1'])
        else:
            auc = None  # AUC is not defined if there's only one class present in the subgroup
        
        # Calculate average scores
        avg_scores = np.mean(group['pred_1'])
        
        # Append the results
        results.append({
            'subgroup': subgroup,
            'split': split_x,
            'num_samples': num_samples,
            'num_positives': num_positives,
            'num_negatives': num_negatives,
            'class_ratio': class_ratio,
            'balanced_accuracy': balanced_acc,
            'auc': auc,
            'avg_scores': avg_scores,
            'sg_gt': final_sg_gt,
            'sg_gt_prob': final_sg_gt_perc,
            #'sensitivity':sensitivity,
            #'specificity':specificity,
        })

    # Convert results to a DataFrame
    results_df = pd.DataFrame(results)
    return results_df


In [10]:
"""
# Get max contamination, with the corresponding num_positives and num_negatives
best_subgroups = []
for slices in [15]:
    for weights in [1]:
        df = run_domino_2artifacts(n_slices=slices, weight=weights)
        results_df = eval_subgroup_statistics(df)
        # print top 6 subgroups with highest contamination
        top_k = results_df.nlargest(6, 'sg_gt')
        print(top_k)
        best_subgroups.extend(top_k.to_dict('records'))
        #best_subgroups.append(best)
        #print("Slices: {}, Weight: {}, Contamination: {}, Positives: {}, Negatives: {}".format(slices, weights, best["contamination"], best["num_positives"], best["num_negatives"]))

best_sgs = pd.DataFrame(best_subgroups)
print(best_sgs)
"""

'\n# Get max contamination, with the corresponding num_positives and num_negatives\nbest_subgroups = []\nfor slices in [15]:\n    for weights in [1]:\n        df = run_domino_2artifacts(n_slices=slices, weight=weights)\n        results_df = eval_subgroup_statistics(df)\n        # print top 6 subgroups with highest contamination\n        top_k = results_df.nlargest(6, \'sg_gt\')\n        print(top_k)\n        best_subgroups.extend(top_k.to_dict(\'records\'))\n        #best_subgroups.append(best)\n        #print("Slices: {}, Weight: {}, Contamination: {}, Positives: {}, Negatives: {}".format(slices, weights, best["contamination"], best["num_positives"], best["num_negatives"]))\n\nbest_sgs = pd.DataFrame(best_subgroups)\nprint(best_sgs)\n'

In [None]:
# Which one we are using?
slices, weights = 15, 1
arch, dataset = "clip", "cxr"
df = run_domino_2artifacts(arch=arch, dataset=dataset, n_slices=slices, weight=weights)
results_df = eval_subgroup_statistics(df)
print(results_df)

# Generate CSV with per subgroup statistics. 
### Use subgroup eval_subgroup_statistics()

In [11]:
n_slices = 15
weight = 10
arch = "clip"
dataset = "imagenet"

for bias_level in ["0.6", "0.7", "0.8"]:
    df = run_domino_2artifacts(arch=arch, dataset=dataset, n_slices=n_slices, weight=weight, bias_level=bias_level)
    #df = run_domino_l3(arch=arch, dataset=dataset, n_slices=n_slices, weight=weight)

    results_df = eval_subgroup_statistics(df)
    results_df.to_csv('../results/results_hypertag_valtest_{}_{}_{}_nslices{}_weight{}.csv'.format(bias_level, arch, dataset, n_slices, weight), index=False)


    # Initialize a list to store the results
    results = []

    for index, sample in df.iterrows():
        #print(sample)
        name = sample['name']
        argmax_domino = sample['argmax_domino']
        split_x = sample['split']
        sg_gt = sample['group']
        y_true = sample['gt']
        y_pred = sample['pred_1']

        # Generate a csv with columns 'name', 'argmax_domino', 'split', 'Contamination', 'gt', 'pred_1'
        results.append({
            'name': name,
            'discovered_subgroup_idx': argmax_domino,
            'split': split_x,
            'true_subgroup_idx': sg_gt,
            'y_true': y_true,
            'y_pred': y_pred
        })


    # Convert the results list to a DataFrame
    results_df = pd.DataFrame(results)

    # Save the DataFrame to a CSV file
    results_df.to_csv('../results/results_persample_hypertag_valtest_{}_{}_{}_nslices{}_weight{}.csv'.format(bias_level, arch, dataset, n_slices, weight), index=False) #l3
    #results_df.to_csv('../results/results_persample_hypertag_valtest_{}_{}_{}_nslices{}_weight{}.csv'.format(bias_level, arch, dataset, n_slices, weight), index=False)

    # Print the DataFrame
    #print(results_df)

#results_df.to_csv('../results/results_hypertag_valtest_{}_{}_{}_nslices{}_weight{}.csv'.format(bias_level, arch, dataset, n_slices, weight), index=False)

 21%|[38;2;241;122;74m██        [0m| 21/100 [00:28<01:47,  1.36s/it]
 38%|[38;2;241;122;74m███▊      [0m| 38/100 [00:52<01:26,  1.39s/it]
 25%|[38;2;241;122;74m██▌       [0m| 25/100 [00:35<01:47,  1.43s/it]


## Generate CSV with per-sample discovered_subgroup_idx, true_subgroud_idx (contamination no/yes), y_pred, y_true, y_pred_score?

In [79]:
from sklearn.metrics import balanced_accuracy_score, roc_auc_score,confusion_matrix
import numpy as np
import pandas as pd

# CLIP IMAGENET: 15 slices, Weight: 1. For some reason, with less slices it can not find the contaminated subgroup...

# Initialize a list to store the results
results = []

for index, sample in df.iterrows():
    #print(sample)
    name = sample['name']
    argmax_domino = sample['argmax_domino']
    split_x = sample['split']
    sg_gt = sample['group']
    y_true = sample['gt']
    y_pred = sample['pred_1']

    # Generate a csv with columns 'name', 'argmax_domino', 'split', 'Contamination', 'gt', 'pred_1'
    results.append({
        'name': name,
        'discovered_subgroup_idx': argmax_domino,
        'split': split_x,
        'true_subgroup_idx': sg_gt,
        'y_true': y_true,
        'y_pred': y_pred
    })


# Convert the results list to a DataFrame
results_df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
results_df.to_csv('../results/results_persample_l3_valtest_{}_{}_nslices{}_weight{}.csv'.format(arch, dataset, n_slices, weight), index=False) #l3
#results_df.to_csv('../results/results_persample_hypertag_valtest_{}_{}_{}_nslices{}_weight{}.csv'.format(bias_level, arch, dataset, n_slices, weight), index=False)

# Print the DataFrame
print(results_df)


                                               name  discovered_subgroup_idx  \
0       train/patient42231/study2/view1_frontal.png                        9   
1      train/patient18257/study22/view1_frontal.png                       14   
2       train/patient13108/study8/view1_frontal.png                       14   
3       train/patient14142/study3/view1_frontal.png                        5   
4       train/patient36982/study4/view1_frontal.png                       14   
...                                             ...                      ...   
44539  train/patient35480/study14/view1_frontal.png                        0   
44540   train/patient34885/study1/view1_frontal.png                        7   
44541   train/patient50614/study1/view1_frontal.png                        0   
44542   train/patient35196/study1/view1_frontal.png                       10   
44543   train/patient47946/study3/view1_frontal.png                        8   

      split  true_subgroup_idx  y_true 

# Generate a single file containing domino runs using multiple hyperparameters

In [5]:
from sklearn.metrics import balanced_accuracy_score, roc_auc_score,confusion_matrix
import numpy as np
import pandas as pd

# CLIP IMAGENET: 15 slices, Weight: 1. For some reason, with less slices it can not find the contaminated subgroup...

# Initialize a list to store the results
results = []
arch = "clip"
dataset = "imagenet"
bias_level = "0.7"
for seed in [42]: #range(20):
    for n_slices in [15]:
        #for weight in [50]:
        for weight in [0.001, 0.01, 0.1, 1, 5, 10, 50, 100, 500, 1000]:
            #df = run_domino_2artifacts(arch=arch, dataset=dataset, n_slices=n_slices, weight=weight, bias_level=bias_level)
            df = run_domino_l3_isic24(arch=arch, dataset=dataset, n_slices=n_slices, weight=weight, seed=seed)

            results_df = eval_subgroup_statistics(df)

            for index, sample in df.iterrows():
                #print(sample)
                name = sample['name']
                argmax_domino = sample['argmax_domino']
                split_x = sample['split']
                sg_gt = sample['group']
                y_true = sample['gt']
                y_pred = sample['pred_1']
                
                # Generate a csv with columns 'name', 'argmax_domino', 'split', 'Contamination', 'gt', 'pred_1'
                results.append({
                    'name': name,
                    'discovered_subgroup_idx': argmax_domino,
                    'split': split_x,
                    'true_subgroup_idx': sg_gt,
                    'y_true': y_true,
                    'y_pred': y_pred,
                    'weight': weight,
                    'seed': seed,
                    'n_slices': n_slices,
                })


# Convert the results list to a DataFrame
results_df = pd.DataFrame(results)
# Save the DataFrame to a CSV file
results_df.to_csv('../results/results_persample_l3_isic24_valtest_{}_{}_multiple.csv'.format(arch, dataset), index=False) #l3
#results_df.to_csv('../results/results_persample_hypertag_valtest_{}_{}_{}_multiple.csv'.format(bias_level, arch, dataset), index=False) #l2



 65%|[38;2;241;122;74m██████▌   [0m| 65/100 [00:08<00:04,  8.05it/s]
 65%|[38;2;241;122;74m██████▌   [0m| 65/100 [00:08<00:04,  8.03it/s]
 65%|[38;2;241;122;74m██████▌   [0m| 65/100 [00:08<00:04,  8.12it/s]
 64%|[38;2;241;122;74m██████▍   [0m| 64/100 [00:07<00:04,  8.03it/s]
 51%|[38;2;241;122;74m█████     [0m| 51/100 [00:06<00:06,  8.06it/s]
 48%|[38;2;241;122;74m████▊     [0m| 48/100 [00:05<00:06,  8.11it/s]
 26%|[38;2;241;122;74m██▌       [0m| 26/100 [00:03<00:09,  7.93it/s]
 85%|[38;2;241;122;74m████████▌ [0m| 85/100 [00:10<00:01,  8.24it/s]
 33%|[38;2;241;122;74m███▎      [0m| 33/100 [00:04<00:08,  8.23it/s]
  9%|[38;2;241;122;74m▉         [0m| 9/100 [00:01<00:12,  7.56it/s]
