In [1]:
import pandas as pd
import re

In [2]:
"""
Test parameters

input: 64 engineered features extracted from lidc max-slice images

model layer sizes = (64, 32, 32, 2)
groupdro_eta = 0.1
lr = 0.001
weight_decay = 0.005

batch_size = 40
proportional = True
epochs = 40
N = 120

"""

'\nTest parameters\n\ninput: 64 engineered features extracted from lidc max-slice images\n\nmodel layer sizes = (64, 32, 32, 2)\ngroupdro_eta = 0.1\nlr = 0.001\nweight_decay = 0.005\n\nbatch_size = 40\nproportional = True\nepochs = 40\nN = 120\n\n'

In [3]:
results = pd.read_csv("../results.csv").iloc[:, 1:].transpose()
results.columns = ["ERM", "GDRO"]

In [4]:
results

Unnamed: 0,ERM,GDRO
0,"(0.851037851037851, array([0.88208617, 0.90476...","(0.8461538461538461, array([0.85260771, 0.7936..."
1,"(0.8547008547008547, array([0.87528345, 0.9047...","(0.7667887667887668, array([0.70975057, 0.6190..."
2,"(0.8070818070818071, array([0.81405896, 0.7777...","(0.8070818070818071, array([0.81179138, 0.8095..."
3,"(0.8437118437118437, array([0.84807256, 0.8253...","(0.8241758241758241, array([0.83219955, 0.7301..."
4,"(0.8351648351648352, array([0.861678 , 0.9047...","(0.8083028083028083, array([0.78684807, 0.6666..."
5,"(0.8266178266178266, array([0.82086168, 0.8253...","(0.8034188034188035, array([0.78004535, 0.7460..."
6,"(0.8473748473748474, array([0.84126984, 0.7777...","(0.8241758241758241, array([0.79365079, 0.7936..."
7,"(0.8547008547008547, array([0.84580499, 0.8888...","(0.8046398046398047, array([0.79591837, 0.8888..."
8,"(0.8388278388278388, array([0.85941043, 0.7777...","(0.7973137973137974, array([0.7845805 , 0.7777..."
9,"(0.8449328449328449, array([0.85487528, 0.7936...","(0.811965811965812, array([0.80725624, 0.76190..."


In [5]:
split = [[[float(s) for s in re.sub("\(|\[|[a-z]| |\]|\)", "", v).split(",")] for v in row] for row in results.values]
split[0][0][0]

0.851037851037851

In [6]:
data_clean = [pd.DataFrame([[val[group] for val in row] for row in split], columns=["ERM","GDRO"]) for group in range(5)]

In [7]:
from scipy.stats import ttest_ind
import statistics

# group_names = ["Overall", "GE MEDICAL SYSTEMS", "Unknown", "SIEMENS", "TOSHIBA", "Philips"]
group_names = ["Overall", "unmarked_benign", "marked_benign", "marked_malignant", "unmarked_malignant"]
# group_names = ["Overall", "0benign", "1benign", "0malignant", "1malignant"]
for i in range(5):
    print(group_names[i])
    print(f"Mean ERM accuracy: {statistics.mean(data_clean[i]['ERM'])}")
    print(f"Mean GDRO accuracy: {statistics.mean(data_clean[i]['GDRO'])}")
    stat, p = ttest_ind(data_clean[i]["ERM"], data_clean[i]["GDRO"])
    if p < 0.05:
        print(f"{'ERM' if stat > 0 else 'GDRO'} higher accuracy (p = {p})")
    else:
        print(f"No statistically significant difference (p = {p})")
    print()
    

Overall
Mean ERM accuracy: 0.8396214896214896
Mean GDRO accuracy: 0.8092592592592592
ERM higher accuracy (p = 7.004659238558251e-17)

unmarked_benign
Mean ERM accuracy: 0.8585411938333334
Mean GDRO accuracy: 0.7895313678333333
ERM higher accuracy (p = 1.4276995761687367e-22)

marked_benign
Mean ERM accuracy: 0.8272486771666666
Mean GDRO accuracy: 0.7722222223333334
ERM higher accuracy (p = 1.0084849821442235e-05)

marked_malignant
Mean ERM accuracy: 0.8971781308333333
Mean GDRO accuracy: 0.8808641968333333
ERM higher accuracy (p = 0.01181317513867486)

unmarked_malignant
Mean ERM accuracy: 0.693253968
Mean GDRO accuracy: 0.7894179891666666
GDRO higher accuracy (p = 1.566950045975758e-17)



In [31]:
# old results with flawed malignancy labels + CNN subtypes
data_clean0

[          ERM      GDRO
 0    0.809211  0.812500
 1    0.822368  0.812500
 2    0.792763  0.812500
 3    0.792763  0.812500
 4    0.786184  0.809211
 ..        ...       ...
 115  0.812500  0.825658
 116  0.812500  0.786184
 117  0.809211  0.851974
 118  0.812500  0.786184
 119  0.805921  0.835526
 
 [120 rows x 2 columns],
           ERM      GDRO
 0    0.843750  0.864583
 1    0.833333  0.916667
 2    0.833333  0.770833
 3    0.968750  0.843750
 4    0.833333  0.760417
 ..        ...       ...
 115  0.895833  0.843750
 116  0.906250  0.791667
 117  0.916667  0.947917
 118  0.802083  0.864583
 119  0.864583  0.885417
 
 [120 rows x 2 columns],
           ERM      GDRO
 0    0.895833  0.864583
 1    0.854167  0.885417
 2    0.854167  0.864583
 3    0.875000  0.916667
 4    0.927083  0.854167
 ..        ...       ...
 115  0.875000  0.864583
 116  0.937500  0.791667
 117  0.875000  0.802083
 118  0.875000  0.781250
 119  0.906250  0.916667
 
 [120 rows x 2 columns],
          ERM     G