In [1]:
import pandas as pd
from mlcog.utils import io 

In [7]:
ling_test = pd.read_pickle('../data/features/ling_test.pkl')
y_test = ling_test['label']
lingprobs = pd.read_pickle('../data/test_eval_probs/ling_probs_classif.pkl')
lingprobs = lingprobs['Random Forest']
test_groups = pd.read_csv('../data/test_groups.csv')
test_groups

Unnamed: 0,adressfname,gender,dx,age,mmse,pid,mmse_split,age_split
0,adrso108,male,ProbableAD,65,19.0,1,ad-mod,60-69
1,adrso171,female,Control,57,27.0,10,cn,50-59
2,adrso184,female,Control,78,30.0,11,cn,70-80
3,adrso293,female,Control,57,30.0,12,cn,50-59
4,adrso113,female,ProbableAD,69,20.0,13,ad-mod,60-69
...,...,...,...,...,...,...,...,...
66,adrso135,male,ProbableAD,72,24.0,7,ad-mil,70-80
67,adrso214,male,ProbableAD,56,18.0,70,ad-mod,50-59
68,adrso083,male,ProbableAD,78,24.0,71,ad-mil,70-80
69,adrso037,male,ProbableAD,77,10.0,8,ad-mod,70-80


In [4]:
ling_test['pid']   = ling_test['pid'].astype(str).str.strip()
test_groups['pid'] = test_groups['pid'].astype(str).str.strip()

# Left-join on index
df = (
    ling_test.set_index('pid')
        .join(test_groups.set_index('pid'), how='left', rsuffix='_grp')
        .reset_index()
)

df['gender'] = df['gender'].map({'female': 'F', 'male': 'M'}).fillna('NA')
df.head()


Unnamed: 0,pid,label,data,adressfname,gender,dx,age,mmse,mmse_split,age_split
0,58,0,"[57, 47.02, 94.48, 1.0, 5.65, 8.14, 14.04, 89....",adrso013,F,Control,70,29.0,cn,70-80
1,64,1,"[57, 62.72, 70.59, 14.46, 74.93, 5.7, 15.79, 9...",adrso038,F,ProbableAD,65,24.0,ad-mil,60-69
2,70,1,"[62, 95.94, 97.31, 25.64, 6.37, 4.43, 6.45, 95...",adrso214,M,ProbableAD,56,18.0,ad-mod,50-59
3,71,1,"[156, 1.0, 99.0, 1.31, 49.66, 19.5, 13.46, 97....",adrso083,M,ProbableAD,78,24.0,ad-mil,70-80
4,65,0,"[107, 88.19, 94.26, 10.52, 48.73, 15.29, 12.15...",adrso166,F,Control,58,30.0,cn,50-59


In [5]:
df.data[0].shape

(100,)

In [6]:
split = df.copy()

##### *Female/male split*

In [9]:
male_idx = split[split['gender'] == 'M'].index.tolist()
female_idx = split[split['gender'] == 'F'].index.tolist()

print('Female')
print(io.groups(y_test, lingprobs, female_idx))

print('Male')
print(io.groups(y_test, lingprobs, male_idx))

Female
('Mean Accuracy: 76.4 (74.6 - 78.1)', 'Mean Likelihood of positive: 42.3 (38.6 - 46.0)')
Male
('Mean Accuracy: 76.7 (72.2 - 81.2)', 'Mean Likelihood of positive: 43.3 (37.9 - 48.8)')


##### *Age split*

In [11]:
age_1_idx = df[df.age_split == '50-59'].index.tolist()
age_2_idx = df[df.age_split == '60-69'].index.tolist()
age_3_idx = df[df.age_split == '70-80'].index.tolist()
len(age_1_idx), len(age_2_idx), len(age_3_idx)

age_1_1 = split[(split.age_split == '50-59') & (split.label == 1)].shape[0]
age_1_0 = split[(split.age_split == '50-59') & (split.label == 0)].shape[0]
age_2_1 = split[(split.age_split == '60-69') & (split.label == 1)].shape[0]
age_2_0 = split[(split.age_split == '60-69') & (split.label == 0)].shape[0]
age_3_1 = split[(split.age_split == '70-80') & (split.label == 1)].shape[0]
age_3_0 = split[(split.age_split == '70-80') & (split.label == 0)].shape[0]

round(age_1_0/age_1_1, 1), round(age_2_0/age_2_1, 1), round(age_3_0/age_3_1, 1)

(1.3, 1.2, 0.8)

In [12]:
print('Age 50-59')
print(io.groups(y_test, lingprobs, age_1_idx))

print('Age 60-69')
print(io.groups(y_test, lingprobs, age_2_idx))

print('Age 70-80')
print(io.groups(y_test, lingprobs, age_3_idx))

Age 50-59
('Mean Accuracy: 83.6 (78.2 - 89.0)', 'Mean Likelihood of positive: 42.1 (34.7 - 49.5)')
Age 60-69
('Mean Accuracy: 75.9 (72.6 - 79.3)', 'Mean Likelihood of positive: 37.4 (31.5 - 43.3)')
Age 70-80
('Mean Accuracy: 73.7 (71.0 - 76.3)', 'Mean Likelihood of positive: 47.7 (44.7 - 50.7)')


##### *MMSE split*

In [13]:
mmse_1_idx = df[df.mmse_split == 'cn'].index.tolist()
mmse_2_idx = df[df.mmse_split == 'ad-mil'].index.tolist()
mmse_3_idx = df[df.mmse_split == 'ad-mod'].index.tolist()
mmse_4_idx = df[df.mmse_split == 'ad-sev'].index.tolist()
len(mmse_1_idx), len(mmse_2_idx), len(mmse_3_idx), len(mmse_4_idx)

mmse_1_1 = split[(split.mmse_split == 'cn') & (split.label == 1)].shape[0]
mmse_1_0 = split[(split.mmse_split == 'cn') & (split.label == 0)].shape[0]
mmse_2_1 = split[(split.mmse_split == 'ad-mil') & (split.label == 1)].shape[0]
mmse_2_0 = split[(split.mmse_split == 'ad-mil') & (split.label == 0)].shape[0]
mmse_3_1 = split[(split.mmse_split == 'ad-mod') & (split.label == 1)].shape[0]
mmse_3_0 = split[(split.mmse_split == 'ad-mod') & (split.label == 0)].shape[0]
mmse_4_1 = split[(split.mmse_split == 'ad-sev') & (split.label == 1)].shape[0]
mmse_4_0 = split[(split.mmse_split == 'ad-sev') & (split.label == 0)].shape[0]
round(mmse_1_0/mmse_1_1, 1), round(mmse_2_0/mmse_2_1, 1), round(mmse_3_0/mmse_3_1, 1), round(mmse_4_0/mmse_4_1, 1)

(17.0, 0.1, 0.0, 0.0)

In [14]:
print('CN')
print(io.groups(y_test, lingprobs, mmse_1_idx))

print('Mild')
print(io.groups(y_test, lingprobs, mmse_2_idx))

print('Moderate')
print(io.groups(y_test, lingprobs, mmse_3_idx))

print('Severe')
print(io.groups(y_test, lingprobs, mmse_4_idx))

CN
('Mean Accuracy: 81.9 (76.8 - 87.1)', 'Mean Likelihood of positive: 20.8 (15.0 - 26.6)')
Mild
('Mean Accuracy: 48.2 (43.8 - 52.6)', 'Mean Likelihood of positive: 39.1 (34.7 - 43.5)')
Moderate
('Mean Accuracy: 78.6 (73.7 - 83.5)', 'Mean Likelihood of positive: 78.6 (73.7 - 83.5)')
Severe
('Mean Accuracy: 100.0 (100.0 - 100.0)', 'Mean Likelihood of positive: 100.0 (100.0 - 100.0)')
