In [55]:
import pandas as pd
import pprint
pp = pprint.PrettyPrinter(indent=4)

# Compare validations and tests set

In [56]:
#import data
axa_data = pd.read_feather('files/data/axa_icd10/axa_icd10.feather')
mimic_axa_split = pd.read_feather('files/data/mimic_axa/mimic_axa_icd10_split.feather')
mimic_split = pd.read_feather('files/data/mimiciv_icd10/mimiciv_icd10_split.feather')

In [57]:
#check validation data 
val_mimic_split_val =  set(mimic_split[mimic_split['split']=='val']['_id'].unique())
val_mimic_axa_split_val =  set(mimic_axa_split[mimic_axa_split['split']=='val']['_id'].unique())

nb_val = len(val_mimic_split_val)
common = len(val_mimic_split_val.intersection(val_mimic_axa_split_val))
print(common/nb_val)

0.5846912841979369


In [58]:
#check test data
test_mimic_split_val =  set(mimic_split[mimic_split['split']=='test']['_id'].unique())
test_mimic_axa_split_val =  set(mimic_axa_split[mimic_axa_split['split']=='test']['_id'].unique())

nb_test = len(test_mimic_split_val)
common = len(test_mimic_split_val.intersection(test_mimic_axa_split_val))
print(common/nb_test)

0.9247045752954247


In [59]:
#checl number of axa data in val and test set
axa_id = set(axa_data['_id'].unique())
print(len(val_mimic_axa_split_val.intersection(axa_id)))
print(len(test_mimic_axa_split_val.intersection(axa_id)))

46
99


# Analyse MIMIC-AXA

In [60]:
axa_data = pd.read_feather('files/data/axa_icd10/axa_icd10.feather')
ids_axa = list(axa_data['_id'])

In [61]:
val_results = pd.read_feather('files/mimic_axa/predictions_val.feather')
val_results_axa = val_results[val_results['_id'].isin(ids_axa)]

In [62]:
def select_top_k(k, df):
    result_df = df[['_id', 'target']]
    result_df.reset_index(drop = True, inplace = True)
    
    df_values = df.loc[:, ~df.columns.isin(['_id', 'target'])]
    
    result_df_values = pd.DataFrame()
    #create columns 
    for i in range(k):
        result_df_values['top{}_column'.format(i+1)] = None
        result_df_values['top{}_value'.format(i+1)] = None
    
    #set values to the new columns
    for i  in range(len(df_values)):
        top_k = df_values.iloc[i].sort_values(ascending= False)[:k]
        keys = top_k.index
        values = top_k.values
        
        keys_values = [item for pair in zip(keys, values) for item in pair]
    
        result_df_values.loc[i] = keys_values
        
    result_df = pd.concat([result_df, result_df_values], axis=1)

    return result_df

In [63]:
val_results_axa_top_5 = select_top_k(k=5, df = val_results_axa)

In [64]:
val_results_axa_top_5

Unnamed: 0,_id,target,top1_column,top1_value,top2_column,top2_value,top3_column,top3_value,top4_column,top4_value,top5_column,top5_value
0,13117700,[Z71.8],Z32.1,0.269132,Z71.8,0.049279,S93,0.037659,M51.1,0.02275,U07.1,0.022151
1,13285005,[Z71.8],Z32.1,0.269132,Z71.8,0.049279,S93,0.037659,M51.1,0.02275,U07.1,0.022151
2,13067682,[J93.1],Z32.1,0.175665,Z71.8,0.075976,S93,0.055621,S01,0.043133,U07.1,0.033669
3,13230779,[A09],Z32.1,0.055442,Z71.8,0.045934,S93,0.035508,Z23.,0.02606,U07.1,0.018586
4,13430163,[M51.1],M51.1,0.708517,M48.061,0.064162,Z71.8,0.05899,Z32.1,0.057656,S93,0.047867
5,13199485,[Z32.1],Z32.1,0.524967,Z71.8,0.066648,S93,0.057682,Z23.,0.029644,M51.1,0.029338
6,12935394,[Z32.1],Z32.1,0.859061,S93,0.109357,Z71.8,0.088751,M51.1,0.0556,S01,0.032615
7,13100091,[Z32.1],Z32.1,0.807568,S93,0.058492,Z71.8,0.05571,Z23.,0.034882,U07.1,0.024374
8,13224756,[Q40.1],Z32.1,0.125357,S93,0.092911,Z71.8,0.078219,K21.9,0.06146,K44.9,0.059905
9,13386965,[Z32.1],Z32.1,0.816483,Z71.8,0.04666,S93,0.041094,Z23.,0.021045,U07.1,0.017104


In [65]:
val_results_axa_top_5 = val_results_axa_top_5.merge(axa_data[['_id', 'text']], on = '_id')

# Examples

In [66]:
def result_log(line):
    print('Text:')
    pp.pprint(val_results_axa_top_5.loc[line]["text"])
    print('Code predicted:')
    print(val_results_axa_top_5.loc[line]["top1_column"])
    print('Confidence:')
    print(val_results_axa_top_5.loc[line]["top1_value"])

In [67]:
result_log(45)

Text:
('current disease ailment the patient comes to vyveces par having presented '
 'care with a truma in the left hand for which he started candola and '
 'imitation e f edens it equiiny functional hand dollar to the pulp without '
 'distal neurocinelatin compromise ailment the patient comes to vyveces par '
 'having presented care with a truma in the left hand for which he started '
 'candola and imitation e f edens it equiiny functional hand dollar to the '
 'pulp without distal neurocinelatin compromise ailment the patient comes to '
 'vyveces par having presented care with a truma in the left hand for which he '
 'started candola and imitation e f edens it equiiny functional hand dollar to '
 'the pulp without distal neurocinelatin compromise cause of disease time '
 'manner and place where the injury occurred loss of full sustenance it is '
 'related to another condition yestime manner and place where the injury '
 'occurred loss of full sustenance it is related to another condi

In [68]:
result_log(37)

Text:
('current disease it began in with hypoesthesia in the lower limbs and '
 'stabbing pain in the medial aspect of the right thigh a thoracic lumbar '
 'spine mri was performed finding a hyperintense spinal cord lesion on t2 '
 'sequence and stir at the level of the ninth and tenth thoracic vertebrae of '
 'demyelinating characteristics it was complemented with mri of the skull for '
 'periventricular lesions the diagnosis of multiple sclerosis was made cause '
 'of disease relapsing remitting multiple sclerosis diagnostic relapsing '
 'remitting multiple sclerosis pain in the left leg that limits walking '
 'persistent pain in the proximal and posterolateral region of the thigh and '
 'leg of a neuropathic radicular type treatment teriflunomide mg dia')
Code predicted:
G35.
Confidence:
0.88612944


In [69]:
result_log(38)

Text:
('current disease a year old male patient presents docon casteldia aby dominac '
 'burgclismos pyrosis liquid evaluations with the presence of sonore despite '
 'having attended to the general physician clausultation there is no mejuria '
 'he is a prisoner20 year old male patient commonly epigastric mesogastrium '
 'borgmismotireregurgitations pyrosis docor vente in cord colic acute dock tor '
 'distensind abdul inac with nocardiac ref with severe gastroesophageal '
 'causing nocturnal hitogos of mocta mooscopy with cos findings it is '
 'determined cause of disease acode to castroein outpatient office te mologia '
 'uonte la situation of fish symptomatology se medimca endoscopy cooking '
 'diagnostic ci therapeutic diagnostic itiatc hernia type severe cardiofiatal '
 'incontinence gastrito erojive iemorrhagic erosive duodenitis seanexan images '
 'treatment')
Code predicted:
K29.80
Confidence:
0.5020244
