In [1]:
import pandas as pd
import numpy as np
import sys
import ast
sys.path.append('../')
from modules import utils, constants
import matplotlib.pyplot as plt
%matplotlib inline

#### Computing score functions

In [2]:
criteria_weights = {'fever':2, 'leukopenia':3, 'thrombocytopenia':4, 'auto_immune_hemolysis':4, 'delirium':2, 'psychosis':3, 'seizure':5, 
                    'non_scarring_alopecia':2, 'oral_ulcers':2, 'subacute_cutaneous_lupus':4, 'discoid_lupus':4, 'acute_cutaneous_lupus':6, 
                    'pleural_effusion': 5, 'pericardial_effusion':5, 'acute_pericarditis':6, 'joint_involvement':6, 'proteinuria':4, 
                    'anti_cardioliphin_antibodies':2, 'anti_β2gp1_antibodies':2, 'lupus_anti_coagulant':2, 'low_c3':3, 'low_c4':3, 
                    'low_c3_and_low_c4':4,
                    'low_c3_or_low_c4':3, 'anti_dsdna_antibody':6, 'anti_smith_antibody':6}

# Dictionary mapping features to their respective domains
domains_feat_dict = {'constitutional': ['fever'],
                     'hematologic': ['leukopenia', 'thrombocytopenia', 'auto_immune_hemolysis'],
                     'neuropsychiatric': ['delirium', 'psychosis', 'seizure'],
                     'mucocutaneous': ['non_scarring_alopecia', 'oral_ulcers', 'cutaneous_lupus'],
                     'serosal': ['pleural_effusion', 'pericardial_effusion', 'acute_pericarditis'],
                     'musculoskeletal': ['joint_involvement'],
                     'renal': ['proteinuria'],
                     'antiphospholipid_antibodies': ['anti_cardioliphin_antibodies', 'anti_β2gp1_antibodies', 'lupus_anti_coagulant'],
                     'complement_proteins': ['low_c3', 'low_c4'],
                     'sle_specific_antibodies':['anti_dsdna_antibody', 'anti_smith_antibody']}

# Dicionary with the maximum possible score for each domain
domains_max_scores_dict = {'constitutional': 2, 'hematologic': 4, 'neuropsychiatric': 5, 'mucocutaneous': 6, 'serosal': 6, 'musculoskeletal': 6, 
                           'renal': 4, 'antiphospholipid_antibodies': 2, 'complement_proteins': 4, 'sle_specific_antibodies':6}



def get_cutaneous_lupus_score(cutaneous_type):
    '''
    Computes the score given a value for the cutaneous lupus feature
    '''
    if cutaneous_type == 0: #negative for any form of cutaneous lupus
        return 0
    elif cutaneous_type == 1: #subacute cutaneous lupus
        return criteria_weights['subacute_cutaneous_lupus']
    elif cutaneous_type == 2: #acute cutaneous lupus
        return criteria_weights['acute_cutaneous_lupus']
    elif cutaneous_type == 3: #discoid lupus
        return criteria_weights['discoid_lupus']
    else:
        raise Exception('Unknown cutaneous lupus type!')

def get_feat_score(row, feat):
    '''
    Computes the score for a given feature in a row
    '''
    if feat == 'cutaneous_lupus':
        feat_score = get_cutaneous_lupus_score(row[feat])
    elif row[feat] <= 0:
        feat_score = 0
    else:
        feat_score = criteria_weights[feat]
    return feat_score


def get_c3_c4_score(c3, c4): 
    '''
    Computes the score for the complements proteins domain
    '''
    if (c3 == 1) & (c4 == 1): # 1 is low, 0 is not low
        return criteria_weights['low_c3_and_low_c4']
    elif (c3 == 1) | (c4 == 1):
        return criteria_weights['low_c3_or_low_c4']
    else:
        return 0

def get_domain_score(row, domain):
    '''
    Computes the score for a given domain in a specified row
    '''
    domain_features = domains_feat_dict[domain] # a list of features in that domain
    domain_score = 0
    if domain == 'complement_proteins':
        domain_score = get_c3_c4_score(row.low_c3, row.low_c4)
    else:
        for feat in domain_features:
            feat_score = get_feat_score(row, feat)
            if feat_score > domain_score:
                domain_score = feat_score
    if domain_score > domains_max_scores_dict[domain]:
        raise Exception('The score is too large for this domain!')
    return domain_score

def compute_score(row):
    '''
    Computes the total score of a row in a dataframe
    '''
    if row['ana'] == 0: # 0=negative, 1=positive 
        return 0
    total_row_score = 0
    for domain in domains_feat_dict.keys():
        domain_score = get_domain_score(row, domain)
        total_row_score += domain_score
    return total_row_score

In [3]:
training_df = pd.read_csv('../data/missingness/0/training_set.csv')
training_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1
3,1,0,0,0,0,0,0,1,1,0,...,0,1,0,0,0,0,0,1,1,1
4,1,0,0,1,1,1,0,0,0,0,...,0,1,0,0,0,1,0,0,1,1


In [4]:
testing_df = pd.read_csv('../data/missingness/0/testing_set.csv')
testing_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
1,1,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,1
2,1,0,0,0,0,0,1,0,0,0,...,1,0,0,0,1,1,0,0,0,1
3,1,1,0,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,1,0,1
4,1,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,1,0,0,0,1


In [5]:
model_test_df = pd.read_csv('../test_dfs/negative_reward/step_24_correct_1_incorrect_-1.csv')#.drop(['index'], axis=1)
model_test_df.head()

Unnamed: 0,index,episode_length,reward,y_pred,y_actual,trajectory,terminated,is_success
0,0.0,6.0,0.861111,1.0,1.0,"['ana', 'proteinuria', 'cutaneous_lupus', 'ant...",0.0,1.0
1,1.0,14.0,0.638889,1.0,1.0,"['ana', 'proteinuria', 'non_scarring_alopecia'...",0.0,1.0
2,2.0,12.0,0.694444,1.0,1.0,"['ana', 'proteinuria', 'cutaneous_lupus', 'per...",0.0,1.0
3,3.0,18.0,0.527778,1.0,1.0,"['ana', 'proteinuria', 'cutaneous_lupus', 'per...",0.0,1.0
4,4.0,14.0,0.638889,1.0,1.0,"['ana', 'proteinuria', 'cutaneous_lupus', 'per...",0.0,1.0


In [6]:
utils.success_rate(model_test_df)[0]

94.85

In [7]:
model_test_df.iloc[0]['trajectory']

"['ana', 'proteinuria', 'cutaneous_lupus', 'anti_smith_antibody', 'joint_involvement', 'Lupus']"

In [8]:
len(model_test_df[model_test_df.trajectory == "['ana', 'No lupus']"])

3954

In [9]:
len(testing_df[testing_df.ana==0])

3954

#### Episode length == 2

In [None]:
length_2 = model_test_df[model_test_df.episode_length==2]
length_2.trajectory.value_counts()

#### Episode length == 3

In [None]:
length_3 = model_test_df[model_test_df.episode_length==3]
length_3.trajectory.value_counts()

In [None]:
length_4 = model_test_df[model_test_df.episode_length==7]
length_4.trajectory.value_counts()

In [None]:
sampling=testing_df.loc[length_4.index]
sampling.leukopenia.value_counts()

#### Misdiagnosed episodes

In [None]:
misdiag_df = model_test_df[model_test_df.y_actual != model_test_df.y_pred]
misdiag_df.head()

In [None]:
misdiag_0_df = misdiag_df[misdiag_df.y_pred==0]
misdiag_1_df = misdiag_df[misdiag_df.y_pred==1]

In [None]:
misdiag_0_df.trajectory.value_counts()[:5]

In [None]:
misdiag_1_df.trajectory.value_counts()[:5]

#### ANA = 1 Diagnosis = 0

In [None]:
ana_1_df = testing_df[(testing_df.ana==1)&(testing_df.label==0)]
len(ana_1_df)

In [None]:
ana_1_model_df = model_test_df.loc[ana_1_df.index]

In [None]:
ana_1_model_df.trajectory.value_counts()[:5]

In [None]:
utils.plot_confusion_matrix(ana_1_model_df['y_actual'], ana_1_model_df['y_pred'])

## Analyzing pathways vs my data

### Pathways that end in a No lupus diagnosis

In [10]:
model_copy_df = model_test_df.copy(deep=True)
# model_copy_df['trajectory'] = model_copy_df.trajectory.apply(ast.literal_eval)

In [11]:
model_copy_df.y_pred.value_counts()

0.0    7818
1.0    6182
Name: y_pred, dtype: int64

In [12]:
pred_no_lupus = model_copy_df[model_copy_df['y_pred']==0]
pred_no_lupus.trajectory.value_counts()[:5]

['ana', 'No lupus']                                                                                                                                                                                                                             3954
['ana', 'proteinuria', 'cutaneous_lupus', 'pericardial_effusion', 'low_c3', 'anti_smith_antibody', 'non_scarring_alopecia', 'low_c4', 'leukopenia', 'joint_involvement', 'No lupus']                                                             743
['ana', 'proteinuria', 'cutaneous_lupus', 'pericardial_effusion', 'low_c3', 'anti_smith_antibody', 'non_scarring_alopecia', 'joint_involvement', 'low_c4', 'leukopenia', 'thrombocytopenia', 'anti_dsdna_antibody', 'No lupus']                  349
['ana', 'proteinuria', 'cutaneous_lupus', 'pericardial_effusion', 'low_c3', 'anti_smith_antibody', 'non_scarring_alopecia', 'low_c4', 'leukopenia', 'psychosis', 'joint_involvement', 'thrombocytopenia', 'anti_dsdna_antibody', 'No lupus']     188
['ana', 'proteinuria

In [None]:
pred_no_lupus[pred_no_lupus.episode_length!=2].episode_length.mean()

#### Pathway 0

In [None]:
no_lupus_pathway_0 = pred_no_lupus.trajectory.value_counts().index[0]
no_lupus_pathway_0

In [None]:
no_lupus_pathway_0_training_df = training_df[training_df.ana == 0]
no_lupus_pathway_0_training_df.head()

In [None]:
len(no_lupus_pathway_0_training_df)

In [None]:
len(no_lupus_pathway_0_training_df[no_lupus_pathway_0_training_df.label==0])*100/len(no_lupus_pathway_0_training_df)

#### Pathway 1

In [None]:
no_lupus_pathway_1 = pred_no_lupus.trajectory.value_counts().index[1]
no_lupus_pathway_1

In [None]:
no_lupus_pathway_1_training_df = training_df[(((((training_df.ana==1)&(training_df.leukopenia==0))&(training_df.joint_involvement==0))&(training_df.pericardial_effusion==0))&(training_df.proteinuria==0))&(training_df.anti_dsdna_antibody==0)]
no_lupus_pathway_1_training_df.head()

In [None]:
no_lupus_pathway_1_training_df.label.value_counts()

In [None]:
len(no_lupus_pathway_1_training_df[no_lupus_pathway_1_training_df.label==0])*100/len(no_lupus_pathway_1_training_df)

#### Pathway 2

In [None]:
no_lupus_pathway_2 = pred_no_lupus.trajectory.value_counts().index[2]
no_lupus_pathway_2

In [None]:
no_lupus_pathway_2_training_df = training_df[((((((((((training_df.ana==1)&(training_df.leukopenia==1))&(training_df.psychosis==0))&(training_df.cutaneous_lupus==0))&(training_df.auto_immune_hemolysis==0))&(training_df.proteinuria==0))&(training_df.anti_β2gp1_antibodies==0))&(training_df.seizure==0))&(training_df.anti_dsdna_antibody==0))&(training_df.pericardial_effusion==0))&(training_df.joint_involvement==0)]
no_lupus_pathway_2_training_df.head()

In [None]:
len(no_lupus_pathway_2_training_df[no_lupus_pathway_2_training_df.label==0])*100/len(no_lupus_pathway_2_training_df)

#### Pathway 3

In [None]:
no_lupus_pathway_3 = pred_no_lupus.trajectory.value_counts().index[3]
no_lupus_pathway_3

In [None]:
no_lupus_pathway_3_training_df = training_df[((((((((((training_df.ana==1)&(training_df.leukopenia==0))&(training_df.joint_involvement==0))&(training_df.psychosis==0))&(training_df.pericardial_effusion==0))&(training_df.proteinuria==1))&(training_df.cutaneous_lupus==0))&(training_df.seizure==0))&(training_df.pleural_effusion==0))&(training_df.anti_dsdna_antibody==0)) & (training_df.non_scarring_alopecia==0)]
no_lupus_pathway_3_training_df.head()

In [None]:
len(no_lupus_pathway_3_training_df)

In [None]:
len(no_lupus_pathway_3_training_df[no_lupus_pathway_3_training_df.label==0])*100/len(no_lupus_pathway_3_training_df)

#### Pathway 4

In [None]:
no_lupus_pathway_4 = pred_no_lupus.trajectory.value_counts().index[4]
no_lupus_pathway_4

In [None]:
no_lupus_pathway_4_training_df = training_df[((((((((((((training_df.ana==1)&(training_df.leukopenia==0))&(training_df.joint_involvement==0))&(training_df.psychosis==0))&(training_df.pericardial_effusion==1))&(training_df.seizure==0))&(training_df.auto_immune_hemolysis==0))&(training_df.delirium==0))&(training_df.anti_dsdna_antibody==0))&(training_df.low_c4==0))&(training_df.proteinuria==0))&(training_df.cutaneous_lupus==0))&(training_df.low_c3==0)]
no_lupus_pathway_4_training_df.head()

In [None]:
len(no_lupus_pathway_4_training_df)

In [None]:
len(no_lupus_pathway_4_training_df[no_lupus_pathway_4_training_df.label==0])*100/len(no_lupus_pathway_4_training_df)

#### The shortest No lupus pathway # episode length == 11

In [None]:
pred_no_lupus.episode_length.unique()

In [None]:
pred_no_lupus[pred_no_lupus.episode_length==11].trajectory.unique()

#### The longest No lupus pathway

In [None]:
pred_no_lupus[pred_no_lupus.episode_length==pred_no_lupus.episode_length.max()].trajectory.unique()

### Pathways that end in a Lupus diagnosis

In [13]:
pred_lupus = model_copy_df[model_copy_df['y_pred']==1]

In [14]:
pred_lupus.reward.mean()

0.5258411517308185

#### Pathway 0

In [None]:
lupus_pathway_0 = pred_lupus.trajectory.value_counts().index[0]
lupus_pathway_0

In [None]:
lupus_pathway_0_training_df = training_df[((((((training_df.ana == 1)&(training_df.leukopenia==0))&(training_df.joint_involvement==1))&(training_df.seizure==0))&(training_df.delirium==1))&(training_df.psychosis==0))&(training_df.anti_β2gp1_antibodies==0)]
lupus_pathway_0_training_df.head()

In [None]:
len(lupus_pathway_0_training_df)

In [None]:
len(lupus_pathway_0_training_df[lupus_pathway_0_training_df.label==1])*100/len(lupus_pathway_0_training_df)

#### Pathway 1

In [None]:
lupus_pathway_1 = pred_lupus.trajectory.value_counts().index[1]
lupus_pathway_1

In [None]:
lupus_pathway_1_training_df = training_df[(((((((((training_df.ana==1)&(training_df.leukopenia==1))&(training_df.psychosis==0))&(training_df.cutaneous_lupus==0))&(training_df.auto_immune_hemolysis==0))&(training_df.proteinuria==0))&(training_df.anti_β2gp1_antibodies==0))&(training_df.seizure==0))&(training_df.anti_dsdna_antibody==1))&(training_df.pericardial_effusion==1)]
lupus_pathway_1_training_df.head()

In [None]:
len(lupus_pathway_1_training_df)

In [None]:
len(lupus_pathway_1_training_df[lupus_pathway_1_training_df.label==1])*100/len(lupus_pathway_1_training_df)

#### Pathway 2

In [None]:
lupus_pathway_2 = pred_lupus.trajectory.value_counts().index[2]
lupus_pathway_2

In [None]:
lupus_pathway_2_training_df = training_df[((((((training_df.ana==1)&(training_df.leukopenia==0))&(training_df.joint_involvement==1))&(training_df.seizure==0))&(training_df.delirium==0))&(training_df.psychosis==0))&(training_df.pericardial_effusion==1)]
lupus_pathway_2_training_df.head()

In [None]:
len(lupus_pathway_2_training_df)

In [None]:
len(lupus_pathway_2_training_df[lupus_pathway_2_training_df.label==1])*100/len(lupus_pathway_2_training_df)

#### Pathway 3

In [None]:
lupus_pathway_3 = pred_lupus.trajectory.value_counts().index[3]
lupus_pathway_3

In [None]:
lupus_pathway_3_training_df = training_df[((((((((((((training_df.ana==1)&(training_df.leukopenia==0))&(training_df.joint_involvement==1))&(training_df.seizure==0))&(training_df.delirium==0))&(training_df.psychosis==0))&(training_df.pericardial_effusion==0))&(training_df.cutaneous_lupus==0))&(training_df.auto_immune_hemolysis==0))&(training_df.fever==0))&(training_df.non_scarring_alopecia==1))&(training_df.lupus_anti_coagulant==0))&(training_df.anti_β2gp1_antibodies==1)]
lupus_pathway_3_training_df.head()

In [None]:
len(lupus_pathway_3_training_df)

In [None]:
len(lupus_pathway_3_training_df[lupus_pathway_3_training_df.label==1])*100/len(lupus_pathway_3_training_df)

#### Pathway 4

In [None]:
lupus_pathway_4 = pred_lupus.trajectory.value_counts().index[4]
lupus_pathway_4

In [None]:
lupus_pathway_4_training_df = training_df[((((((((training_df.ana==1)&(training_df.leukopenia==0))&(training_df.joint_involvement==0))&(training_df.psychosis==0))&(training_df.pericardial_effusion==0))&(training_df.proteinuria==1))&(training_df.seizure==0))&(training_df.pleural_effusion==0))&(training_df.anti_dsdna_antibody==1)]
lupus_pathway_4_training_df.head()

In [None]:
len(lupus_pathway_4_training_df)

In [None]:
len(lupus_pathway_4_training_df[lupus_pathway_4_training_df.label==1])*100/len(lupus_pathway_4_training_df)

#### Longest lupus pathway - only one episode

In [None]:
pred_lupus.episode_length.max()

In [None]:
longest_lupus_pathway = pred_lupus[pred_lupus.episode_length==pred_lupus.episode_length.max()].iloc[0]['trajectory']
longest_lupus_pathway

In [None]:
longest_lupus_pathway_4_training_df = training_df[(((((((((((((((training_df.ana==1)&(training_df.seizure==0))&(training_df.anti_dsdna_antibody==0))&(training_df.cutaneous_lupus==0))&(training_df.non_scarring_alopecia==0))&(training_df.proteinuria==0))&(training_df.anti_smith_antibody==0))&(training_df.thrombocytopenia==0))&(training_df.pericardial_effusion==0))&(training_df.lupus_anti_coagulant==0))&(training_df.pleural_effusion==0))&(training_df.joint_involvement==1))&(training_df.low_c3==0))&(training_df.psychosis==0))&(training_df.leukopenia==0))&(training_df.low_c4==0)]
longest_lupus_pathway_4_training_df.head()

In [None]:
# no_lupus_pathway_2_indices = pred_no_lupus[pred_no_lupus.trajectory == no_lupus_pathway_2].index
# no_lupus_pathway_2_indices[:5]

In [None]:
# no_lupus_pathway_2_testing_df = testing_df.loc[no_lupus_pathway_2_indices]
# no_lupus_pathway_2_testing_df['score'] = no_lupus_pathway_2_testing_df.apply(lambda row: compute_score(row), axis=1)
# no_lupus_pathway_2_testing_df.head()

In [None]:
len(no_lupus_pathway_2_testing_df[no_lupus_pathway_2_testing_df.score>=10]) #misdaignosed episodes

In [None]:
len(model_copy_df[(model_copy_df.trajectory == no_lupus_pathway_2) &(model_copy_df.y_actual != model_copy_df.y_pred)])

#### Pathway 3

In [None]:
no_lupus_pathway_3 = pred_no_lupus.trajectory.value_counts().index[2]
no_lupus_pathway_3

#### Computing the scores of the predicred pathways for the test data

In [15]:
# Dictionary with the weignts of the different features
criteria_weights = {'fever':2, 'leukopenia':3, 'thrombocytopenia':4, 'auto_immune_hemolysis':4, 'delirium':2, 'psychosis':3, 'seizure':5, 
                    'non_scarring_alopecia':2, 'oral_ulcers':2, 'subacute_cutaneous_lupus':4, 'discoid_lupus':4, 'acute_cutaneous_lupus':6, 
                    'pleural_effusion': 5, 'pericardial_effusion':5, 'acute_pericarditis':6, 'joint_involvement':6, 'proteinuria':4, 
                    'anti_cardioliphin_antibodies':2, 'anti_β2gp1_antibodies':2, 'lupus_anti_coagulant':2, 'low_c3':3, 'low_c4':3, 
                    'low_c3_and_low_c4':4, 'low_c3_or_low_c4':3, 'anti_dsdna_antibody':6, 'anti_smith_antibody':6}

# Dictionary mapping features to their respective domains
domains_feat_dict = {'constitutional': ['fever'],
                     'hematologic': ['leukopenia', 'thrombocytopenia', 'auto_immune_hemolysis'],
                     'neuropsychiatric': ['delirium', 'psychosis', 'seizure'],
                     'mucocutaneous': ['non_scarring_alopecia', 'oral_ulcers', 'cutaneous_lupus'],
                     'serosal': ['pleural_effusion', 'pericardial_effusion', 'acute_pericarditis'],
                     'musculoskeletal': ['joint_involvement'],
                     'renal': ['proteinuria'],
                     'antiphospholipid_antibodies': ['anti_cardioliphin_antibodies', 'anti_β2gp1_antibodies', 'lupus_anti_coagulant'],
                     'complement_proteins': ['low_c3', 'low_c4'],
                     'sle_specific_antibodies':['anti_dsdna_antibody', 'anti_smith_antibody']}

# Dicionary with the maximum possible score for each domain
domains_max_scores_dict = {'constitutional': 2, 'hematologic': 4, 'neuropsychiatric': 5, 'mucocutaneous': 6, 'serosal': 6, 'musculoskeletal': 6, 
                           'renal': 4, 'antiphospholipid_antibodies': 2, 'complement_proteins': 4, 'sle_specific_antibodies':6}



def get_cutaneous_lupus_score(cutaneous_type):
    '''
    Computes the score given a value for the cutaneous lupus feature
    '''
    if cutaneous_type == 0: #negative for any form of cutaneous lupus
        return 0
    elif cutaneous_type == 1: #subacute cutaneous lupus
        return criteria_weights['subacute_cutaneous_lupus']
    elif cutaneous_type == 2: #acute cutaneous lupus
        return criteria_weights['acute_cutaneous_lupus']
    elif cutaneous_type == 3: #discoid lupus
        return criteria_weights['discoid_lupus']
    else:
        raise Exception('Unknown cutaneous lupus type!')

def get_feat_score(row, feat):
    '''
    Computes the score for a given feature in a row
    '''
    if feat == 'cutaneous_lupus':
        feat_score = get_cutaneous_lupus_score(row[feat])
    elif row[feat] <= 0:
        feat_score = 0
    else:
        feat_score = criteria_weights[feat]
    return feat_score


def get_c3_c4_score(c3, c4): 
    '''
    Computes the score for the complements proteins domain
    '''
    if (c3 == 1) & (c4 == 1): # 1 is low, 0 is not low
        return criteria_weights['low_c3_and_low_c4']
    elif (c3 == 1) | (c4 == 1):
        return criteria_weights['low_c3_or_low_c4']
    else:
        return 0

def get_domain_score(row, domain):
    '''
    Computes the score for a given domain in a specified row
    '''
    domain_features = domains_feat_dict[domain] # a list of features in that domain
    domain_score = 0
    if domain == 'complement_proteins':
        domain_score = get_c3_c4_score(row.low_c3, row.low_c4)
    else:
        for feat in domain_features:
            feat_score = get_feat_score(row, feat)
            if feat_score > domain_score:
                domain_score = feat_score
    if domain_score > domains_max_scores_dict[domain]:
        raise Exception('The score is too large for this domain!')
    return domain_score

def compute_score(row):
    '''
    Computes the total score of a row in a dataframe
    '''
    if row['ana'] == 0: # 0=negative, 1=positive 
        return 0
    total_row_score = 0
    for domain in domains_feat_dict.keys():
        domain_score = get_domain_score(row, domain)
        total_row_score += domain_score
    return total_row_score

In [16]:
import itertools
features = ['ana']+list(itertools.chain.from_iterable(list(domains_feat_dict.values())))
features[:5]

['ana', 'fever', 'leukopenia', 'thrombocytopenia', 'auto_immune_hemolysis']

In [17]:
len(features)

23

In [18]:
def create_df_with_score(df): #df is a test df or a subset of a test df
    new_df = pd.DataFrame()
    for i, row in df.iterrows():
        row_dict = {}
        for feat in features:
            if feat in row.trajectory:
                feat_value = testing_df.iloc[i][feat]
            else:
                feat_value = 0
            row_dict[feat] = feat_value
        new_df = new_df.append(row_dict, ignore_index=True)
    new_df['score'] = new_df.apply(lambda row: compute_score(row), axis=1)
    return new_df

In [19]:
pred_no_lupus_with_score = create_df_with_score(pred_no_lupus)
pred_no_lupus_with_score.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,score
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,9
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [20]:
pred_no_lupus_with_score.score.mean()

2.318495778971604

In [21]:
pred_lupus_with_score = create_df_with_score(pred_lupus)
pred_lupus_with_score.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,score
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,10
2,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,17
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,10
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,16


In [22]:
pred_lupus_with_score.score.mean()

12.03655774830152

#### computing actual scores of original test_df