<a href="https://colab.research.google.com/github/samantamrityunjay/ICDcoding/blob/master/ICDCoding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


This notebook deals with the description of ICD codes and their conversion to ICD10 codes from tables of MIMIC III database. Both types of codes are described:
- [ICD9 Diagnosis codes](#icd9)
- [ICD9 Procedures codes](#icdpcs)
> The working aim and summaries are mentioned in the [google slide](https://docs.google.com/presentation/d/19tE0L4fJ9JjG6qf05m8iwDG0PH1cGIKFhiyBTqZ6Ofk/edit?usp=sharing)<br>
> The work with text and extracting relevant sections are in this [notebook](./Discharge%20summary.ipynb)

In [10]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
from collections import Counter

# <a id='icd9'>ICD9 Diagnosis Codes</a>
Tables used are:
- ADMISSIONS.csv: This has all patient records of the hospital
- D_ICD_DIAGNOSES.csv: This contain the ICD9 diagnosi code and definitions
- DIAGNOSES_ICD.csv: contains ICD codes corresponting to different admission ids

In [3]:
admissions=pd.read_csv('/content/drive/MyDrive/AlgoIntern/Data/ADMISSIONS.csv')
admissions.head()


Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1
1,22,23,152223,2153-09-03 07:15:00,2153-09-08 19:10:00,,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,,CATHOLIC,MARRIED,WHITE,,,CORONARY ARTERY DISEASE\CORONARY ARTERY BYPASS...,0,1
2,23,23,124321,2157-10-18 19:34:00,2157-10-25 14:00:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Medicare,ENGL,CATHOLIC,MARRIED,WHITE,,,BRAIN MASS,0,1
3,24,24,161859,2139-06-06 16:14:00,2139-06-09 12:48:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Private,,PROTESTANT QUAKER,SINGLE,WHITE,,,INTERIOR MYOCARDIAL INFARCTION,0,1
4,25,25,129635,2160-11-02 02:06:00,2160-11-05 14:55:00,,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,,UNOBTAINABLE,MARRIED,WHITE,2160-11-02 01:01:00,2160-11-02 04:27:00,ACUTE CORONARY SYNDROME,0,1


In [4]:
admissions.HADM_ID.nunique()

58976

In [5]:
icd9_def=pd.read_csv('/content/drive/MyDrive/AlgoIntern/Data/D_ICD_DIAGNOSES.csv')
icd9_def.head()

Unnamed: 0,ROW_ID,ICD9_CODE,SHORT_TITLE,LONG_TITLE
0,174,1166,TB pneumonia-oth test,"Tuberculous pneumonia [any form], tubercle bac..."
1,175,1170,TB pneumothorax-unspec,"Tuberculous pneumothorax, unspecified"
2,176,1171,TB pneumothorax-no exam,"Tuberculous pneumothorax, bacteriological or h..."
3,177,1172,TB pneumothorx-exam unkn,"Tuberculous pneumothorax, bacteriological or h..."
4,178,1173,TB pneumothorax-micro dx,"Tuberculous pneumothorax, tubercle bacilli fou..."


In [6]:
diag=pd.read_csv('/content/drive/MyDrive/AlgoIntern/Data/DIAGNOSES_ICD.csv')
diag.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,SEQ_NUM,ICD9_CODE
0,1297,109,172335,1.0,40301
1,1298,109,172335,2.0,486
2,1299,109,172335,3.0,58281
3,1300,109,172335,4.0,5855
4,1301,109,172335,5.0,4254


In [7]:
print("Number of Admission IDs that are null %d" %diag[diag.HADM_ID.isnull()].ROW_ID.count())
print("Number of Admission Ids that dont have ICD9 codes %d" %diag[diag.ICD9_CODE.isnull()].HADM_ID.count())

Number of Admission IDs that are null 0
Number of Admission Ids that dont have ICD9 codes 47


In [8]:
print("Total number of unique patients is %d" %diag.SUBJECT_ID.nunique())
print("Total number of unique patient admissions is %d " %diag.HADM_ID.nunique())
print("Total number of unique ICD codes is %d" %diag.ICD9_CODE.nunique())
print("Total number of admissions assigned ICD codes is %d " %diag[diag.ICD9_CODE.notnull()].HADM_ID.nunique())

Total number of unique patients is 46520
Total number of unique patient admissions is 58976 
Total number of unique ICD codes is 6984
Total number of admissions assigned ICD codes is 58929 


In [9]:
diag.dropna(subset=["ICD9_CODE"], inplace=True) #Dropped all those admids.Id that don't contain ICD codes
print("Total number of unique patient admissions is %d " %diag.HADM_ID.nunique())

Total number of unique patient admissions is 58929 


In [13]:
icd9_freq=sorted(dict(Counter(diag.ICD9_CODE.to_list())).items(),key=lambda x:x[1],reverse=True)

In [14]:
print(icd9_freq)

[('4019', 20703), ('4280', 13111), ('42731', 12891), ('41401', 12429), ('5849', 9119), ('25000', 9058), ('2724', 8690), ('51881', 7497), ('5990', 6555), ('53081', 6326), ('2720', 5930), ('V053', 5779), ('V290', 5519), ('2859', 5406), ('2449', 4917), ('486', 4839), ('2851', 4552), ('2762', 4528), ('496', 4431), ('99592', 3912), ('V5861', 3806), ('0389', 3725), ('5070', 3680), ('V3000', 3566), ('5859', 3435), ('311', 3431), ('40390', 3421), ('3051', 3358), ('412', 3278), ('2875', 3065), ('V4581', 3056), ('41071', 3055), ('2761', 3039), ('4240', 2926), ('V1582', 2811), ('V3001', 2758), ('5119', 2734), ('V4582', 2725), ('40391', 2630), ('78552', 2586), ('4241', 2550), ('V5867', 2538), ('42789', 2453), ('32723', 2380), ('9971', 2343), ('5845', 2287), ('2760', 2272), ('7742', 2264), ('49390', 2195), ('2767', 2169), ('5180', 2165), ('4168', 2148), ('45829', 2121), ('2749', 2082), ('4589', 2051), ('V502', 2016), ('73300', 1947), ('78039', 1934), ('5856', 1926), ('4271', 1811), ('5185', 1807), 

In [15]:
print(icd9_freq[:100])

[('4019', 20703), ('4280', 13111), ('42731', 12891), ('41401', 12429), ('5849', 9119), ('25000', 9058), ('2724', 8690), ('51881', 7497), ('5990', 6555), ('53081', 6326), ('2720', 5930), ('V053', 5779), ('V290', 5519), ('2859', 5406), ('2449', 4917), ('486', 4839), ('2851', 4552), ('2762', 4528), ('496', 4431), ('99592', 3912), ('V5861', 3806), ('0389', 3725), ('5070', 3680), ('V3000', 3566), ('5859', 3435), ('311', 3431), ('40390', 3421), ('3051', 3358), ('412', 3278), ('2875', 3065), ('V4581', 3056), ('41071', 3055), ('2761', 3039), ('4240', 2926), ('V1582', 2811), ('V3001', 2758), ('5119', 2734), ('V4582', 2725), ('40391', 2630), ('78552', 2586), ('4241', 2550), ('V5867', 2538), ('42789', 2453), ('32723', 2380), ('9971', 2343), ('5845', 2287), ('2760', 2272), ('7742', 2264), ('49390', 2195), ('2767', 2169), ('5180', 2165), ('4168', 2148), ('45829', 2121), ('2749', 2082), ('4589', 2051), ('V502', 2016), ('73300', 1947), ('78039', 1934), ('5856', 1926), ('4271', 1811), ('5185', 1807), 

In [17]:
icd9_def[icd9_def.ICD9_CODE=='2800']

Unnamed: 0,ROW_ID,ICD9_CODE,SHORT_TITLE,LONG_TITLE
2494,2218,2800,Chr blood loss anemia,Iron deficiency anemia secondary to blood loss...


> The below dictionary of ICD9 codes to ICD10 codes is made through <http://www.icd10codesearch.com/>

In [23]:
icd10_dict={'ICD9_CODE':['4019','4280','42731','41401','5849','25000','2724','2724','51881','51881','5990','53081','2720','V053','V290','2859','2449','486','2851','2762','496','99592','V5861','0389','5070','V3000','5859','311','40390','3051','412','2875','V4581','41071','2761','4240','4240','V1582','V3001','5119','V4582','40391','78552','4241','4241','4241','4241','4241','V5867','42789','42789','32723','9971','9971','9971','9971','5845','2760','7742','49390','49390','2767','5180','5180','4168','4168','45829','45829','2749','4589','V502'
                   ,'73300','78039','5856','4271','4254','4254','4111','V1251','3572','3572','3572','3572','3572','30000','27800','E8798','41400','60000','7907','2930','00845','2768','4439','5789','V4501','27651','28521','27652','431','4275','E8788','V4986','769','79902','70703','70703','70703','5715','5715','5715','V103','99591','2639','42832','42833','07054','42732','E8782','V1046'],
            'ICD10':['I10','I509','I4891','I2510','N179','E119','E784','E785','J9600','J9690','N390','K219','E780','Z23','P002','D649','E039','J189','D62','E872','J449','R6520','Z7901','A419','J690','Z3800','N189','F329','I129','F17200','I252','D696','Z951','I214','E871','I340','I348','Z87891','Z3801','J918','Z9861','I120','R6521','I350','I351','I352','I358','I359','Z794','I498','R001','G4733','I97710','I97790','I9788','I9789','N170','E870','P590','J45909','J45998','E875','J9811','J9819','I272','I2789','I952','I9581','M109','I959','Z412'
                    ,'M810','R569','N186','I472','I425','I428','I200','Z86718','E0842','E0942','E1042','E1142','E1342','F419','E669','Y848','I2510','N400','R7881','F05','A047','E876','I739','K922','Z950','E860','D631','E861','I619','I469','Y838','Z66','P220','R0902','L89139','L89149','L89159','K740','K7460','K7469','Z853','A419','E46','I5032','I5033','B182','I4892','Y832','Z8546'],
            'long_title':['ESSENTIAL (PRIMARY) HYPERTENSION','HEART FAILURE, UNSPECIFIED','UNSPECIFIED ATRIAL FIBRILLATION','ATHEROSCLEROTIC HEART DISEASE OF NATIVE CORONARY ARTERY WITHOUT ANGINA PECTORIS'
                         ,'ACUTE KIDNEY FAILURE, UNSPECIFIED','TYPE 2 DIABETES MELLITUS WITHOUT COMPLICATIONS','OTHER HYPERLIPIDEMIA','HYPERLIPIDEMIA, UNSPECIFIED',
                         'ACUTE RESPIRATORY FAILURE, UNSPECIFIED WHETHER WITH HYPOXIA OR HYPERCAPNIA','RESPIRATORY FAILURE, UNSPECIFIED, UNSPECIFIED WHETHER WITH HYPOXIA OR HYPERCAPNIA'
                         ,'URINARY TRACT INFECTION, SITE NOT SPECIFIED','GASTRO-ESOPHAGEAL REFLUX DISEASE WITHOUT ESOPHAGITIS','PURE HYPERCHOLESTEROLEMIA','ENCOUNTER FOR IMMUNIZATION'
                         ,'NEWBORN (SUSPECTED TO BE) AFFECTED BY MATERNAL INFECTIOUS AND PARASITIC DISEASES','ANEMIA, UNSPECIFIED','HYPOTHYROIDISM, UNSPECIFIED','PNEUMONIA, UNSPECIFIED ORGANISM'
                         ,'ACUTE POSTHEMORRHAGIC ANEMIA','ACIDOSIS','CHRONIC OBSTRUCTIVE PULMONARY DISEASE, UNSPECIFIED','SEVERE SEPSIS WITHOUT SEPTIC SHOCK','LONG TERM (CURRENT) USE OF ANTICOAGULANTS'
                         ,'SEPSIS, UNSPECIFIED ORGANISM','PNEUMONITIS DUE TO INHALATION OF FOOD AND VOMIT','SINGLE LIVEBORN INFANT, DELIVERED VAGINALLY','CHRONIC KIDNEY DISEASE, UNSPECIFIED','MAJOR DEPRESSIVE DISORDER, SINGLE EPISODE, UNSPECIFIED'
                         ,'HYPERTENSIVE CHRONIC KIDNEY DISEASE WITH STAGE 1 THROUGH STAGE 4 CHRONIC KIDNEY DISEASE, OR UNSPECIFIED CHRONIC KIDNEY DISEASE','NICOTINE DEPENDENCE, UNSPECIFIED, UNCOMPLICATED','OLD MYOCARDIAL INFARCTION','THROMBOCYTOPENIA, UNSPECIFIED'
                         ,'PRESENCE OF AORTOCORONARY BYPASS GRAFT','NON-ST ELEVATION (NSTEMI) MYOCARDIAL INFARCTION','HYPO-OSMOLALITY AND HYPONATREMIA','NONRHEUMATIC MITRAL (VALVE) INSUFFICIENCY','OTHER NONRHEUMATIC MITRAL VALVE DISORDERS'
                         ,'PERSONAL HISTORY OF NICOTINE DEPENDENCE','SINGLE LIVEBORN INFANT, DELIVERED BY CESAREAN','PLEURAL EFFUSION IN OTHER CONDITIONS CLASSIFIED ELSEWHERE','CORONARY ANGIOPLASTY STATUS','HYPERTENSIVE CHRONIC KIDNEY DISEASE WITH STAGE 5 CHRONIC KIDNEY DISEASE OR END STAGE RENAL DISEASE','SEVERE SEPSIS WITH SEPTIC SHOCK'
                         ,'NONRHEUMATIC AORTIC (VALVE) STENOSIS','NONRHEUMATIC AORTIC (VALVE) INSUFFICIENCY','NONRHEUMATIC AORTIC (VALVE) STENOSIS WITH INSUFFICIENCY','OTHER NONRHEUMATIC AORTIC VALVE DISORDERS','ONRHEUMATIC AORTIC VALVE DISORDER, UNSPECIFIED','LONG TERM (CURRENT) USE OF INSULIN','OTHER SPECIFIED CARDIAC ARRHYTHMIAS','BRADYCARDIA, UNSPECIFIED'
                         ,'OBSTRUCTIVE SLEEP APNEA (ADULT) (PEDIATRIC)','INTRAOPERATIVE CARDIAC ARREST DURING CARDIAC SURGERY','OTHER INTRAOPERATIVE CARDIAC FUNCTIONAL DISTURBANCES DURING CARDIAC SURGERY','OTHER INTRAOPERATIVE COMPLICATIONS OF THE CIRCULATORY SYSTEM, NOT ELSEWHERE CLASSIFIED','OTHER POSTPROCEDURAL COMPLICATIONS AND DISORDERS OF THE CIRCULATORY SYSTEM, NOT ELSEWHERE CLASSIFIED'
                         ,'ACUTE KIDNEY FAILURE WITH TUBULAR NECROSIS','HYPEROSMOLALITY AND HYPERNATREMIA','NEONATAL JAUNDICE ASSOCIATED WITH PRETERM DELIVERY','UNSPECIFIED ASTHMA, UNCOMPLICATED','OTHER ASTHMA','HYPERKALEMIA','ATELECTASIS','OTHER PULMONARY COLLAPSE','OTHER SECONDARY PULMONARY HYPERTENSION','OTHER SPECIFIED PULMONARY HEART DISEASES'
                         ,'HYPOTENSION DUE TO DRUGS','POSTPROCEDURAL HYPOTENSION','GOUT, UNSPECIFIED','HYPOTENSION, UNSPECIFIED','ENCOUNTER FOR ROUTINE AND RITUAL MALE CIRCUMCISION'
                         ,'AGE-RELATED OSTEOPOROSIS WITHOUT CURRENT PATHOLOGICAL FRACTURE','UNSPECIFIED CONVULSIONS','END STAGE RENAL DISEASE','VENTRICULAR TACHYCARDIA','OTHER RESTRICTIVE CARDIOMYOPATHY','OTHER CARDIOMYOPATHIES','UNSTABLE ANGINA','PERSONAL HISTORY OF OTHER VENOUS THROMBOSIS AND EMBOLISM'
                         ,'DIABETES MELLITUS DUE TO UNDERLYING CONDITION WITH DIABETIC POLYNEUROPATHY','DRUG OR CHEMICAL INDUCED DIABETES MELLITUS WITH NEUROLOGICAL COMPLICATIONS WITH DIABETIC POLYNEUROPATHY','TYPE 1 DIABETES MELLITUS WITH DIABETIC POLYNEUROPATHY','TYPE 2 DIABETES MELLITUS WITH DIABETIC POLYNEUROPATHY','OTHER SPECIFIED DIABETES MELLITUS WITH DIABETIC POLYNEUROPATHY'
                         ,'ANXIETY DISORDER, UNSPECIFIED','OBESITY, UNSPECIFIED','OTHER MEDICAL PROCEDURES AS THE CAUSE OF ABNORMAL REACTION OF THE PATIENT, OR OF LATER COMPLICATION, WITHOUT MENTION OF MISADVENTURE AT THE TIME OF THE PROCEDURE','ATHEROSCLEROTIC HEART DISEASE OF NATIVE CORONARY ARTERY WITHOUT ANGINA PECTORIS'
                         ,'ENLARGED PROSTATE WITHOUT LOWER URINARY TRACT SYMPTOMS','BACTEREMIA','DELIRIUM DUE TO KNOWN PHYSIOLOGICAL CONDITION','ENTEROCOLITIS DUE TO CLOSTRIDIUM DIFFICILE','HYPOKALEMIA','PERIPHERAL VASCULAR DISEASE, UNSPECIFIED','GASTROINTESTINAL HEMORRHAGE, UNSPECIFIED','PRESENCE OF CARDIAC PACEMAKER','DEHYDRATION','ANEMIA IN CHRONIC KIDNEY DISEASE'
                         ,'HYPOVOLEMIA','NONTRAUMATIC INTRACEREBRAL HEMORRHAGE, UNSPECIFIED','CARDIAC ARREST, CAUSE UNSPECIFIED','OTHER SURGICAL PROCEDURES AS THE CAUSE OF ABNORMAL REACTION OF THE PATIENT, OR OF LATER COMPLICATION, WITHOUT MENTION OF MISADVENTURE AT THE TIME OF THE PROCEDURE','DO NOT RESUSCITATE','RESPIRATORY DISTRESS SYNDROME OF NEWBORN','HYPOXEMIA',
                          'PRESSURE ULCER OF RIGHT LOWER BACK, UNSPECIFIED STAGE','PRESSURE ULCER OF LEFT LOWER BACK, UNSPECIFIED STAGE','PRESSURE ULCER OF SACRAL REGION, UNSPECIFIED STAGE','HEPATIC FIBROSIS','UNSPECIFIED CIRRHOSIS OF LIVER','OTHER CIRRHOSIS OF LIVER','PERSONAL HISTORY OF MALIGNANT NEOPLASM OF BREAST','SEPSIS, UNSPECIFIED ORGANISM','UNSPECIFIED PROTEIN-CALORIE MALNUTRITION','CHRONIC DIASTOLIC (CONGESTIVE) HEART FAILURE'
                         ,'ACUTE ON CHRONIC DIASTOLIC (CONGESTIVE) HEART FAILURE','CHRONIC VIRAL HEPATITIS C','UNSPECIFIED ATRIAL FLUTTER','SURGICAL OPERATION WITH ANASTOMOSIS, BYPASS OR GRAFT AS THE CAUSE OF ABNORMAL REACTION OF THE PATIENT, OR OF LATER COMPLICATION, WITHOUT MENTION OF MISADVENTURE AT THE TIME OF THE PROCEDURE','PERSONAL HISTORY OF MALIGNANT NEOPLASM OF PROSTATE']}

In [24]:
icd10= pd.DataFrame.from_dict(icd10_dict)

In [25]:
icd10

Unnamed: 0,ICD9_CODE,ICD10,long_title
0,4019,I10,ESSENTIAL (PRIMARY) HYPERTENSION
1,4280,I509,"HEART FAILURE, UNSPECIFIED"
2,42731,I4891,UNSPECIFIED ATRIAL FIBRILLATION
3,41401,I2510,ATHEROSCLEROTIC HEART DISEASE OF NATIVE CORONA...
4,5849,N179,"ACUTE KIDNEY FAILURE, UNSPECIFIED"
...,...,...,...
115,42833,I5033,ACUTE ON CHRONIC DIASTOLIC (CONGESTIVE) HEART ...
116,07054,B182,CHRONIC VIRAL HEPATITIS C
117,42732,I4892,UNSPECIFIED ATRIAL FLUTTER
118,E8782,Y832,"SURGICAL OPERATION WITH ANASTOMOSIS, BYPASS OR..."


In [38]:
icd10.ICD10.nunique()

118

In [28]:
frequent_icd9code = [code[0] for code in icd9_freq[:100]]

In [29]:
print(frequent_icd9code)

['4019', '4280', '42731', '41401', '5849', '25000', '2724', '51881', '5990', '53081', '2720', 'V053', 'V290', '2859', '2449', '486', '2851', '2762', '496', '99592', 'V5861', '0389', '5070', 'V3000', '5859', '311', '40390', '3051', '412', '2875', 'V4581', '41071', '2761', '4240', 'V1582', 'V3001', '5119', 'V4582', '40391', '78552', '4241', 'V5867', '42789', '32723', '9971', '5845', '2760', '7742', '49390', '2767', '5180', '4168', '45829', '2749', '4589', 'V502', '73300', '78039', '5856', '4271', '5185', '4254', '4111', 'V1251', '3572', '30000', '99811', '27800', 'E8798', '41400', '60000', '7907', '2930', '00845', '2768', '4439', '5789', 'V4501', '27651', '28521', '27652', '431', '4275', 'E8788', '2765', 'V4986', '769', '79902', '70703', '5715', 'V103', '99591', '2639', '42832', '99812', '42833', '07054', '42732', 'E8782', 'V1046']


> ICD10 codes that correspond to more than 1 ICD9 codes

In [31]:
icd10[icd10.duplicated(['ICD10'],keep=False)] 


Unnamed: 0,ICD9_CODE,ICD10,long_title
3,41401,I2510,ATHEROSCLEROTIC HEART DISEASE OF NATIVE CORONA...
23,389,A419,"SEPSIS, UNSPECIFIED ORGANISM"
87,41400,I2510,ATHEROSCLEROTIC HEART DISEASE OF NATIVE CORONA...
112,99591,A419,"SEPSIS, UNSPECIFIED ORGANISM"


> ICD9 codes that have correspond to more than 5 ICD10 codes have been leftout

In [33]:
print("Number of unique ICD9 codes that have been converted to ICD10 :%d" %icd10.ICD9_CODE.nunique())
left_out_ICD9=[item for item in frequent_icd9code if item not in icd10.ICD9_CODE.to_list()]
print(left_out_ICD9)

Number of unique ICD9 codes that have been converted to ICD10 :96
['5185', '99811', '2765', '99812']


In [34]:
# finding the index of lefout ICD9 codes to find their frequency
for item in left_out_ICD9:
  print("frequency ranking of {} is {}".format(item,frequent_icd9code.index(item)))

frequency ranking of 5185 is 60
frequency ranking of 99811 is 66
frequency ranking of 2765 is 84
frequency ranking of 99812 is 94


In [35]:
admid_icd=pd.merge(diag[['HADM_ID','ICD9_CODE']],icd10[['ICD9_CODE','ICD10']],how='left',on='ICD9_CODE')

In [37]:
admid_icd['ICD10']=admid_icd['ICD10'].fillna('Misc')
admid_icd

Unnamed: 0,HADM_ID,ICD9_CODE,ICD10
0,172335,40301,Misc
1,172335,486,J189
2,172335,58281,Misc
3,172335,5855,Misc
4,172335,4254,I425
...,...,...,...
711616,188195,20280,Misc
711617,188195,V5869,Misc
711618,188195,V1279,Misc
711619,188195,5275,Misc


In [61]:
admid_icd_agg=admid_icd.groupby('HADM_ID',as_index=False).agg({'ICD9_CODE':lambda x: ','.join(x),'ICD10':lambda y: ','.join(y)})

In [62]:
admid_icd_agg

Unnamed: 0,HADM_ID,ICD9_CODE,ICD10
0,100001,"25013,3371,5849,5780,V5867,25063,5363,4580,250...","Misc,Misc,N179,Misc,Z794,Misc,Misc,Misc,Misc,I..."
1,100003,"53100,2851,07054,5715,5715,5715,45621,53789,40...","Misc,D62,B182,K740,K7460,K7469,Misc,Misc,I10,M..."
2,100006,"49320,51881,51881,486,20300,2761,7850,3090,V12...","Misc,J9600,J9690,J189,Misc,E871,Misc,Misc,Z867..."
3,100007,56081557099734864019,"Misc,Misc,Misc,J189,I10"
4,100009,"41401,99604,4142,25000,27800,V8535,4148,4111,V...","I2510,Misc,Misc,E119,E669,Misc,Misc,I200,Z9861..."
...,...,...,...
58924,199993,"41031,42821,42731,4271,5180,5180,4240,4240,276...","Misc,Misc,I4891,I472,J9811,J9819,I340,I348,E87..."
58925,199994,4864280518815188139704964169585427322762,"J189,I509,J9600,J9690,Misc,J449,Misc,Misc,I489..."
58926,199995,"4210,7464,42971,30401,4412,44284,V1259,04111,3...","Misc,Misc,Misc,Misc,Misc,Misc,Misc,Misc,Misc,F..."
58927,199998,"41401,9971,9971,9971,9971,9975,42731,78820,411...","I2510,I97710,I97790,I9788,I9789,Misc,I4891,Mis..."


In [63]:
admid_icd_agg[(admid_icd_agg.HADM_ID==189797) |(admid_icd_agg.HADM_ID==100003)|(admid_icd_agg.HADM_ID==100006)|(admid_icd_agg.HADM_ID==100007) ].reset_index(drop=True)

Unnamed: 0,HADM_ID,ICD9_CODE,ICD10
0,100003,"53100,2851,07054,5715,5715,5715,45621,53789,40...","Misc,D62,B182,K740,K7460,K7469,Misc,Misc,I10,M..."
1,100006,"49320,51881,51881,486,20300,2761,7850,3090,V12...","Misc,J9600,J9690,J189,Misc,E871,Misc,Misc,Z867..."
2,100007,56081557099734864019,"Misc,Misc,Misc,J189,I10"
3,189797,"V3100,76518,7756,7706,V290,V053","Misc,Misc,Misc,Misc,P002,Z23"


In [64]:
def split_tolist(x):
  list_codes=list(set(re.split(',',x)))
  return ','.join(list_codes)


In [65]:
admid_icd_agg['ICD9_CODE']=admid_icd_agg['ICD9_CODE'].apply(split_tolist)
admid_icd_agg['ICD10'] = admid_icd_agg['ICD10'].apply(split_tolist)

In [66]:
admid_icd_agg

Unnamed: 0,HADM_ID,ICD9_CODE,ICD10
0,100001,"5363,5853,36201,7078,V5867,25013,5849,25083,25...","N179,I129,Misc,Z794"
1,100003,53789401928515715456215310078230705453550,"I10,K740,K7460,K7469,Misc,D62,B182"
2,100006,"V1251,7850,20300,3090,2761,486,51881,V1582,49320","J189,Z86718,J9600,Z87891,Misc,J9690,E871"
3,100007,40199973557048656081,"J189,I10,Misc"
4,100009,"V153,4019,V4502,99604,41401,27800,4111,V5867,2...","I200,I2510,Z794,D649,I10,Z9861,E669,Z87891,Mis..."
...,...,...,...
58924,199993,518042821427314240511942715184276041031,"J9819,I340,I4891,J918,Misc,J9811,E870,I472,I348"
58925,199994,42805853970427322762486518814964169,"J189,E872,J9600,I4892,Misc,I509,J449,J9690"
58926,199995,"4210,04111,3051,V1259,4412,44284,7464,30503,30...","F17200,Misc"
58927,199998,"4019,42731,41401,4111,4439,4400,9975,V4589,997...","I200,I2510,I10,I97710,I9788,Z9861,I4891,I739,I..."


In [67]:
icd10

Unnamed: 0,ICD9_CODE,ICD10,long_title
0,4019,I10,ESSENTIAL (PRIMARY) HYPERTENSION
1,4280,I509,"HEART FAILURE, UNSPECIFIED"
2,42731,I4891,UNSPECIFIED ATRIAL FIBRILLATION
3,41401,I2510,ATHEROSCLEROTIC HEART DISEASE OF NATIVE CORONA...
4,5849,N179,"ACUTE KIDNEY FAILURE, UNSPECIFIED"
...,...,...,...
115,42833,I5033,ACUTE ON CHRONIC DIASTOLIC (CONGESTIVE) HEART ...
116,07054,B182,CHRONIC VIRAL HEPATITIS C
117,42732,I4892,UNSPECIFIED ATRIAL FLUTTER
118,E8782,Y832,"SURGICAL OPERATION WITH ANASTOMOSIS, BYPASS OR..."


In [68]:
#icd10.to_csv('/content/drive/MyDrive/AlgoIntern/Data/Data1.0/icd10_def.csv',index=False)

In [69]:
admid_icd_agg

Unnamed: 0,HADM_ID,ICD9_CODE,ICD10
0,100001,"5363,5853,36201,7078,V5867,25013,5849,25083,25...","N179,I129,Misc,Z794"
1,100003,53789401928515715456215310078230705453550,"I10,K740,K7460,K7469,Misc,D62,B182"
2,100006,"V1251,7850,20300,3090,2761,486,51881,V1582,49320","J189,Z86718,J9600,Z87891,Misc,J9690,E871"
3,100007,40199973557048656081,"J189,I10,Misc"
4,100009,"V153,4019,V4502,99604,41401,27800,4111,V5867,2...","I200,I2510,Z794,D649,I10,Z9861,E669,Z87891,Mis..."
...,...,...,...
58924,199993,518042821427314240511942715184276041031,"J9819,I340,I4891,J918,Misc,J9811,E870,I472,I348"
58925,199994,42805853970427322762486518814964169,"J189,E872,J9600,I4892,Misc,I509,J449,J9690"
58926,199995,"4210,04111,3051,V1259,4412,44284,7464,30503,30...","F17200,Misc"
58927,199998,"4019,42731,41401,4111,4439,4400,9975,V4589,997...","I200,I2510,I10,I97710,I9788,Z9861,I4891,I739,I..."


In [70]:
#admid_icd_agg.to_csv('/content/drive/MyDrive/AlgoIntern/Data/Data1.0/adm_icd.csv',index=False)

# <a id="icdpcs">ICD PCS CODES</a>
 Table used are:
 - PROCEDURES_ICD.csv: contains ICD9 codes corresponding to AdmID
 - D_ICD_PROCEDURES.csv: contains ICD9 PCS definitions

In [74]:
procedure=pd.read_csv('/content/drive/MyDrive/AlgoIntern/Data/PROCEDURES_ICD.csv')

In [75]:
procedure

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,SEQ_NUM,ICD9_CODE
0,944,62641,154460,3,3404
1,945,2592,130856,1,9671
2,946,2592,130856,2,3893
3,947,55357,119355,1,9672
4,948,55357,119355,2,331
...,...,...,...,...,...
240090,228330,67415,150871,5,3736
240091,228331,67415,150871,6,3893
240092,228332,67415,150871,7,8872
240093,228333,67415,150871,8,3893


In [76]:
print("Number of records that dont have AdmIds :%d" %procedure[procedure.HADM_ID.isnull()].ROW_ID.count())
print("Number of admIds that dont have ICD codes :%d" %procedure[procedure.ICD9_CODE.isnull()].HADM_ID.count())

Number of records that dont have AdmIds :0
Number of admIds that dont have ICD codes :0


In [77]:
# number of unique admids with ICD9PCS
procedure.HADM_ID.nunique()

52243

In [81]:
icd9pcs_freq = sorted(dict(Counter(procedure.ICD9_CODE.to_list())).items(),key=lambda x: x[1],reverse=True)

In [82]:
print(icd9pcs_freq)

[(3893, 14731), (9604, 10333), (966, 9300), (9671, 9100), (9904, 7244), (3961, 6838), (9672, 6048), (9955, 5842), (8856, 5337), (3891, 4737), (3615, 4401), (9915, 4244), (8872, 3548), (3722, 3311), (3324, 3269), (3995, 3254), (4513, 2919), (9390, 2727), (3723, 2711), (9983, 2402), (5491, 2217), (331, 2208), (640, 2123), (8853, 2113), (9907, 2028), (3612, 1903), (3491, 1834), (3895, 1825), (311, 1776), (9920, 1740), (3404, 1717), (3897, 1681), (40, 1656), (8841, 1579), (4311, 1536), (3613, 1504), (3521, 1419), (66, 1330), (3322, 1264), (3606, 1261), (9905, 1235), (3607, 1185), (3323, 1173), (3601, 1111), (8964, 1088), (3761, 1028), (3611, 953), (45, 916), (8855, 854), (17, 832), (9962, 819), (4523, 794), (8842, 764), (387, 761), (9605, 759), (3950, 757), (14, 741), (8847, 721), (8659, 719), (9910, 715), (8622, 698), (5459, 695), (4443, 672), (159, 645), (9960, 643), (8848, 624), (4516, 603), (3721, 571), (9607, 561), (8852, 561), (9925, 529), (3845, 525), (3522, 510), (9929, 510), (131,

In [83]:
len(icd9pcs_freq)

2009

In [84]:
icd9pcs_def=pd.read_csv('/content/drive/MyDrive/AlgoIntern/Data/D_ICD_PROCEDURES.csv') 

In [85]:
icd9pcs_def

Unnamed: 0,ROW_ID,ICD9_CODE,SHORT_TITLE,LONG_TITLE
0,264,851,Canthotomy,Canthotomy
1,265,852,Blepharorrhaphy,Blepharorrhaphy
2,266,859,Adjust lid position NEC,Other adjustment of lid position
3,267,861,Lid reconst w skin graft,Reconstruction of eyelid with skin flap or graft
4,268,862,Lid reconst w muc graft,Reconstruction of eyelid with mucous membrane ...
...,...,...,...,...
3877,3344,9959,Vaccination/innocula NEC,Other vaccination and inoculation
3878,3345,9960,Cardiopulm resuscita NOS,"Cardiopulmonary resuscitation, not otherwise s..."
3879,3346,9961,Atrial cardioversion,Atrial cardioversion
3880,3347,9962,Heart countershock NEC,Other electric countershock of heart


In [86]:
icd9pcs_def.ICD9_CODE.nunique()

3809

In [88]:
icd9pcs_def[icd9pcs_def.ICD9_CODE.duplicated()]

Unnamed: 0,ROW_ID,ICD9_CODE,SHORT_TITLE,LONG_TITLE
274,86,64,Perc ins extracran stent,Percutaneous insertion of other extracranial a...
276,88,66,PTCA,Percutaneous transluminal coronary angioplasty...
277,89,67,Intravas msmnt thorc art,Intravascular pressure measurement of intratho...
283,95,73,Rev hip repl-liner/head,"Revision of hip replacement, acetabular liner ..."
298,110,93,Transplant cadaver donor,Transplant from cadaver
...,...,...,...,...
3092,2820,719,Other female genital ops,Other operations on female genital organs
3127,2855,721,Low forceps w episiotomy,Low forceps operation with episiotomy
3235,2963,729,Instrument delivery NOS,Unspecified instrumental delivery
3399,3479,863,Other local destruc skin,Other local excision or destruction of lesion ...


In [95]:
frequent_icd9pcs=[code[0] for code in icd9pcs_freq[:100]]

In [96]:
print(frequent_icd9pcs)

[3893, 9604, 966, 9671, 9904, 3961, 9672, 9955, 8856, 3891, 3615, 9915, 8872, 3722, 3324, 3995, 4513, 9390, 3723, 9983, 5491, 331, 640, 8853, 9907, 3612, 3491, 3895, 311, 9920, 3404, 3897, 40, 8841, 4311, 3613, 3521, 66, 3322, 3606, 9905, 3607, 3323, 3601, 8964, 3761, 3611, 45, 8855, 17, 9962, 4523, 8842, 387, 9605, 3950, 14, 8847, 8659, 9910, 8622, 5459, 4443, 159, 9960, 8848, 4516, 3721, 9607, 8852, 9925, 3845, 3522, 9929, 131, 5187, 3990, 3321, 22, 309, 3778, 4639, 3783, 5122, 3772, 5185, 4562, 3734, 46, 3614, 3726, 370, 3512, 3979, 4131, 3972, 13, 3892, 8162, 9359]


> Below dictionary of ICD9 PCS codes to ICD10 codes is converted through <https://icd.codes/convert/icd9-to-icd10-pcs>

In [90]:
icd10pcs_dict={'ICD9_CODE':[9604,9604,966,966,966,9671,9671,3961,9672,9955,9955,9915,9915,9915,9915,3722,3722,3995,3995,4513,9390,9390,9390,3723,3723,9983,640,640,640,3491,3491,3895,3895,3404,3404,3404,3404,4311,4311,3322,3322,3322,9905,9905,9905,9905,3323,3323,3323,3761,3761,9962,4523,9605,9605,9605,9605,9910,9910,9910,9910,9910,4443,159,159,159,159,9960,3721,3721,9607,9607,9925,9925,9925,9925,3522,3522,3321,3778,3778,3783,3783,3783,3783,5122,3722,3722,5185,5185,3726,3726,3726,3512,3512,3512,3512,3512,3892],
                'icd10_pcs':['0BH.17EZ','0BH.18EZ','0DH.67UZ','0DH.68UZ','3E0.G36Z','5A1.935Z','5A1.945Z','5A1.221Z','5A1.955Z','3E0.134Z','3E0.234Z','3E0.336Z','3E0.436Z','3E0.536Z','3E0.636Z','4A0.20N7','4A0.23N7','5A1.D00Z','5A1.D60Z','0DJ.08ZZ','5A0.9357','5A0.9457','5A0.9557','4A0.20N8','4A0.23N8','6A6.00ZZ','0VT.T0ZZ','0VT.T4ZZ','0VT.TXZZ','0W9.93ZZ','0W9.B3ZZ','05H.Y33Z','06H.Y33Z','0W9.930Z','0W9.940Z','0W9.B30Z','0W9.B40Z','0DH.63UZ','0DH.64UZ','0BJ.08ZZ','0BJ.K8ZZ','0BJ.L8ZZ','302.33R1','302.43R1','302.53R1','302.63R1','0BJ.08ZZ','0BJ.K8ZZ','0BJ.L8ZZ',
                            '5A0.2110','5A0.2210','5A2.204Z','0DJ.D8ZZ','0B7.17DZ','0B7.18DZ','0BH.07DZ','0WH.Q7YZ','3E0.3317','3E0.4317','3E0.5317','3E0.6317','3E0.8317','0W3.P8ZZ','08Q.L0ZZ','08Q.L3ZZ','08Q.M0ZZ','08Q.M3ZZ','5A1.2012','4A0.20N6','4A0.23N6','0D9.670Z','0D9.680Z','3E0.3305','3E0.4305','XW0.3351','XW0.4351','02R.F0JZ','02R.F4JZ','0BJ.08ZZ','5A1.213Z','5A1.223Z','0JH.606Z','0JH.636Z','0JH.806Z','0JH.836Z','0FT.40ZZ','4A0.20N7','4A0.23N7','0F9.C80Z','0F9.C8ZZ','3E0.53KZ','3E0.63KZ','4A0.23FZ','027.G04Z','027.G0DZ','027.G0ZZ','02N.G0ZZ','02Q.G0ZZ','06H.033T'],
                'long_title':['Insertion of Endotracheal Airway into Trachea, Via Natural or Artificial Opening','Insertion of Endotracheal Airway into Trachea, Via Natural or Artificial Opening Endoscopic','Insertion of Feeding Device into Stomach, Via Natural or Artificial Opening','Insertion of Feeding Device into Stomach, Via Natural or Artificial Opening Endoscopic','Introduction of Nutritional Substance into Upper GI, Percutaneous Approach','Respiratory Ventilation, Less than 24 Consecutive Hours','Respiratory Ventilation, 24-96 Consecutive Hours',
                             'Performance of Cardiac Output, Continuous','Respiratory Ventilation, Greater than 96 Consecutive Hours','Introduction of Serum, Toxoid and Vaccine into Subcutaneous Tissue, Percutaneous Approach','Introduction of Serum, Toxoid and Vaccine into Muscle, Percutaneous Approach','Introduction of Nutritional Substance into Peripheral Vein, Percutaneous Approach','Introduction of Nutritional Substance into Central Vein, Percutaneous Approach','Introduction of Nutritional Substance into Peripheral Artery, Percutaneous Approach','Introduction of Nutritional Substance into Central Artery, Percutaneous Approach',
                             'Measurement of Cardiac Sampling and Pressure, Left Heart, Open Approach','Measurement of Cardiac Sampling and Pressure, Left Heart, Percutaneous Approach','Performance of Urinary Filtration, Single','Performance of Urinary Filtration, Multiple','Inspection of Upper Intestinal Tract, Via Natural or Artificial Opening Endoscopic','Assistance with Respiratory Ventilation, Less than 24 Consecutive Hours, Continuous Positive Airway Pressure','Assistance with Respiratory Ventilation, 24-96 Consecutive Hours, Continuous Positive Airway Pressure',
                             'Assistance with Respiratory Ventilation, Greater than 96 Consecutive Hours, Continuous Positive Airway Pressure','Measurement of Cardiac Sampling and Pressure, Bilateral, Open Approach','Measurement of Cardiac Sampling and Pressure, Bilateral, Percutaneous Approach','Phototherapy of Skin, Single','Resection of Prepuce, Open Approach','Resection of Prepuce, Percutaneous Endoscopic Approach','Resection of Prepuce, External Approach','Drainage of Right Pleural Cavity, Percutaneous Approach','Drainage of Left Pleural Cavity, Percutaneous Approach','Insertion of Infusion Device into Upper Vein, Percutaneous Approach',
                             'Insertion of Infusion Device into Lower Vein, Percutaneous Approach','Drainage of Right Pleural Cavity with Drainage Device, Percutaneous Approach','Drainage of Right Pleural Cavity with Drainage Device, Percutaneous Endoscopic Approach','Drainage of Left Pleural Cavity with Drainage Device, Percutaneous Approach',' Drainage of Left Pleural Cavity with Drainage Device, Percutaneous Endoscopic Approach','nsertion of Feeding Device into Stomach, Percutaneous Approach','Insertion of Feeding Device into Stomach, Percutaneous Endoscopic Approach','Inspection of Tracheobronchial Tree, Via Natural or Artificial Opening Endoscopic',
                             'Inspection of Right Lung, Via Natural or Artificial Opening Endoscopic','Inspection of Left Lung, Via Natural or Artificial Opening Endoscopic','Transfusion of Nonautologous Platelets into Peripheral Vein, Percutaneous Approach','Transfusion of Nonautologous Platelets into Central Vein, Percutaneous Approach','Transfusion of Nonautologous Platelets into Peripheral Artery, Percutaneous Approach','Transfusion of Nonautologous Platelets into Central Artery, Percutaneous Approach','Inspection of Tracheobronchial Tree, Via Natural or Artificial Opening Endoscopic',
                             'Inspection of Right Lung, Via Natural or Artificial Opening Endoscopic','Inspection of Left Lung, Via Natural or Artificial Opening Endoscopic','Assistance with Cardiac Output using Balloon Pump, Intermittent','Assistance with Cardiac Output using Balloon Pump, Continuous','Restoration of Cardiac Rhythm, Single','Inspection of Lower Intestinal Tract, Via Natural or Artificial Opening Endoscopic','Dilation of Trachea with Intraluminal Device, Via Natural or Artificial Opening','Dilation of Trachea with Intraluminal Device, Via Natural or Artificial Opening Endoscopic',
                             'Insertion of Intraluminal Device into Tracheobronchial Tree, Via Natural or Artificial Opening','Insertion of Other Device into Respiratory Tract, Via Natural or Artificial Opening','Introduction of Other Thrombolytic into Peripheral Vein, Percutaneous Approach','Introduction of Other Thrombolytic into Central Vein, Percutaneous Approach','Introduction of Other Thrombolytic into Peripheral Artery, Percutaneous Approach','Introduction of Other Thrombolytic into Central Artery, Percutaneous Approach','Introduction of Other Thrombolytic into Heart, Percutaneous Approach',
                             'Control Bleeding in Gastrointestinal Tract, Via Natural or Artificial Opening Endoscopic','Repair Right Extraocular Muscle, Open Approach','Repair Right Extraocular Muscle, Percutaneous Approach','Repair Left Extraocular Muscle, Open Approach','Repair Left Extraocular Muscle, Percutaneous Approach','Performance of Cardiac Output, Single, Manual','Measurement of Cardiac Sampling and Pressure, Right Heart, Open Approach','Measurement of Cardiac Sampling and Pressure, Right Heart, Percutaneous Approach','Drainage of Stomach with Drainage Device, Via Natural or Artificial Opening',
                             'Drainage of Stomach with Drainage Device, Via Natural or Artificial Opening Endoscopic','Introduction of Other Antineoplastic into Peripheral Vein, Percutaneous Approach','Introduction of Other Antineoplastic into Central Vein, Percutaneous Approach','Introduction of Blinatumomab Antineoplastic Immunotherapy into Peripheral Vein, Percutaneous Approach, New Technology Group 1','Introduction of Blinatumomab Antineoplastic Immunotherapy into Central Vein, Percutaneous Approach, New Technology Group 1','Replacement of Aortic Valve with Synthetic Substitute, Open Approach',
                             'Replacement of Aortic Valve with Synthetic Substitute, Percutaneous Endoscopic Approach','Inspection of Tracheobronchial Tree, Via Natural or Artificial Opening Endoscopic','Performance of Cardiac Pacing, Intermittent','Performance of Cardiac Pacing, Continuous','Insertion of Pacemaker, Dual Chamber into Chest Subcutaneous Tissue and Fascia, Open Approach','Insertion of Pacemaker, Dual Chamber into Chest Subcutaneous Tissue and Fascia, Percutaneous Approach','Insertion of Pacemaker, Dual Chamber into Abdomen Subcutaneous Tissue and Fascia, Open Approach','Insertion of Pacemaker, Dual Chamber into Abdomen Subcutaneous Tissue and Fascia, Percutaneous Approach',
                             'Resection of Gallbladder, Open Approach','Measurement of Cardiac Sampling and Pressure, Left Heart, Open Approach','Measurement of Cardiac Sampling and Pressure, Left Heart, Percutaneous Approach','Drainage of Ampulla of Vater with Drainage Device, Via Natural or Artificial Opening Endoscopic','Drainage of Ampulla of Vater, Via Natural or Artificial Opening Endoscopic','Introduction of Other Diagnostic Substance into Peripheral Artery, Percutaneous Approach','Introduction of Other Diagnostic Substance into Central Artery, Percutaneous Approach','Measurement of Cardiac Rhythm, Percutaneous Approach',
                             'Dilation of Mitral Valve with Drug-eluting Intraluminal Device, Open Approach','Dilation of Mitral Valve with Intraluminal Device, Open Approach','Dilation of Mitral Valve, Open Approach','Release Mitral Valve, Open Approach','Repair Mitral Valve, Open Approach','Insertion of Infusion Device, Via Umbilical Vein, into Inferior Vena Cava, Percutaneous Approach']}

In [91]:
icd10pcs=pd.DataFrame.from_dict(icd10pcs_dict)

In [92]:
icd10pcs

Unnamed: 0,ICD9_CODE,icd10_pcs,long_title
0,9604,0BH.17EZ,"Insertion of Endotracheal Airway into Trachea,..."
1,9604,0BH.18EZ,"Insertion of Endotracheal Airway into Trachea,..."
2,966,0DH.67UZ,"Insertion of Feeding Device into Stomach, Via ..."
3,966,0DH.68UZ,"Insertion of Feeding Device into Stomach, Via ..."
4,966,3E0.G36Z,Introduction of Nutritional Substance into Upp...
...,...,...,...
94,3512,027.G0DZ,Dilation of Mitral Valve with Intraluminal Dev...
95,3512,027.G0ZZ,"Dilation of Mitral Valve, Open Approach"
96,3512,02N.G0ZZ,"Release Mitral Valve, Open Approach"
97,3512,02Q.G0ZZ,"Repair Mitral Valve, Open Approach"


> ICD10 pcs code that have more than 1 ICD9 codes

In [93]:
icd10pcs[icd10pcs.duplicated(['icd10_pcs'],keep=False)]


Unnamed: 0,ICD9_CODE,icd10_pcs,long_title
15,3722,4A0.20N7,"Measurement of Cardiac Sampling and Pressure, ..."
16,3722,4A0.23N7,"Measurement of Cardiac Sampling and Pressure, ..."
39,3322,0BJ.08ZZ,"Inspection of Tracheobronchial Tree, Via Natur..."
40,3322,0BJ.K8ZZ,"Inspection of Right Lung, Via Natural or Artif..."
41,3322,0BJ.L8ZZ,"Inspection of Left Lung, Via Natural or Artifi..."
46,3323,0BJ.08ZZ,"Inspection of Tracheobronchial Tree, Via Natur..."
47,3323,0BJ.K8ZZ,"Inspection of Right Lung, Via Natural or Artif..."
48,3323,0BJ.L8ZZ,"Inspection of Left Lung, Via Natural or Artifi..."
78,3321,0BJ.08ZZ,"Inspection of Tracheobronchial Tree, Via Natur..."
86,3722,4A0.20N7,"Measurement of Cardiac Sampling and Pressure, ..."


> List of ICD9 code that more than 5 ICD10 PCS correspondence and left out

In [97]:
print("Number of ICD9PCS codes that have been mapped to ICD10PCS",icd10pcs.ICD9_CODE.nunique())
left_out_icd9pcs=[item for item in frequent_icd9pcs if item not in icd10pcs.ICD9_CODE.to_list()]
print(left_out_icd9pcs)

Number of ICD9PCS codes that have been mapped to ICD10PCS 41
[3893, 9904, 8856, 3891, 3615, 8872, 3324, 5491, 331, 8853, 9907, 3612, 311, 9920, 3897, 40, 8841, 3613, 3521, 66, 3606, 3607, 3601, 8964, 3611, 45, 8855, 17, 8842, 387, 3950, 14, 8847, 8659, 8622, 5459, 8848, 4516, 8852, 3845, 9929, 131, 5187, 3990, 22, 309, 4639, 3772, 4562, 3734, 46, 3614, 370, 3979, 4131, 3972, 13, 8162, 9359]


In [99]:
len(left_out_icd9pcs)

59

In [98]:
# finding the index of lefout ICD9 PCS codes to find their frequency
for item in left_out_icd9pcs:
  print("frequency ranking of {} is {}".format(item,frequent_icd9pcs.index(item)))

frequency ranking of 3893 is 0
frequency ranking of 9904 is 4
frequency ranking of 8856 is 8
frequency ranking of 3891 is 9
frequency ranking of 3615 is 10
frequency ranking of 8872 is 12
frequency ranking of 3324 is 14
frequency ranking of 5491 is 20
frequency ranking of 331 is 21
frequency ranking of 8853 is 23
frequency ranking of 9907 is 24
frequency ranking of 3612 is 25
frequency ranking of 311 is 28
frequency ranking of 9920 is 29
frequency ranking of 3897 is 31
frequency ranking of 40 is 32
frequency ranking of 8841 is 33
frequency ranking of 3613 is 35
frequency ranking of 3521 is 36
frequency ranking of 66 is 37
frequency ranking of 3606 is 39
frequency ranking of 3607 is 41
frequency ranking of 3601 is 43
frequency ranking of 8964 is 44
frequency ranking of 3611 is 46
frequency ranking of 45 is 47
frequency ranking of 8855 is 48
frequency ranking of 17 is 49
frequency ranking of 8842 is 52
frequency ranking of 387 is 53
frequency ranking of 3950 is 55
frequency ranking of 14

In [100]:
admid_icd_pcs=pd.merge(procedure[['HADM_ID','ICD9_CODE']],icd10pcs[['ICD9_CODE','icd10_pcs']],how='left',on='ICD9_CODE')

In [101]:
admid_icd_pcs

Unnamed: 0,HADM_ID,ICD9_CODE,icd10_pcs
0,154460,3404,0W9.930Z
1,154460,3404,0W9.940Z
2,154460,3404,0W9.B30Z
3,154460,3404,0W9.B40Z
4,130856,9671,5A1.935Z
...,...,...,...
357296,150871,3893,
357297,150871,3722,4A0.20N7
357298,150871,3722,4A0.23N7
357299,150871,3722,4A0.20N7


In [102]:
admid_icd_pcs['icd10_pcs']=admid_icd_pcs['icd10_pcs'].fillna('Misc')

In [103]:
admid_icd_pcs_agg=admid_icd_pcs.groupby('HADM_ID',as_index=False).agg({'ICD9_CODE':lambda x:','.join(map(str,x)),'icd10_pcs':lambda y: ','.join(y)})

In [105]:
admid_icd_pcs_agg.head(5)

Unnamed: 0,HADM_ID,ICD9_CODE,icd10_pcs
0,100003,44439607960799043893,"0W3.P8ZZ,0D9.670Z,0D9.680Z,Misc,Misc"
1,100006,9390939093909925992599259925,"5A0.9357,5A0.9457,5A0.9557,3E0.3305,3E0.4305,X..."
2,100007,45625459,"Misc,Misc"
3,100009,3613361537953961,"Misc,Misc,Misc,5A1.221Z"
4,100010,5551540403,"Misc,Misc,Misc"


In [106]:
admid_icd_pcs_agg['ICD9_CODE']=admid_icd_pcs_agg.ICD9_CODE.apply(split_tolist)
admid_icd_pcs_agg['icd10_pcs']=admid_icd_pcs_agg.icd10_pcs.apply(split_tolist)

In [107]:
admid_icd_pcs_agg

Unnamed: 0,HADM_ID,ICD9_CODE,icd10_pcs
0,100003,3893990496074443,"0D9.670Z,Misc,0W3.P8ZZ,0D9.680Z"
1,100006,99259390,"5A0.9557,3E0.3305,5A0.9457,XW0.3351,3E0.4305,5..."
2,100007,45625459,Misc
3,100009,3615396137953613,"5A1.221Z,Misc"
4,100010,5551540403,Misc
...,...,...,...
52238,199993,"8848,9604,3512,3761,3614,9672,9390,3404,8842,3...","0W9.B30Z,02N.G0ZZ,5A0.9557,02Q.G0ZZ,5A0.9457,0..."
52239,199994,9671399596043891,"5A1.D00Z,5A1.945Z,0BH.18EZ,Misc,5A1.935Z,5A1.D..."
52240,199995,352199298841384535393961369988478872,"5A1.221Z,Misc"
52241,199998,361536123964,Misc


In [108]:
#admid_icd_pcs_agg.to_csv('/content/drive/MyDrive/AlgoIntern/Data/Data1.0/adm_icdpcs.csv',index=False)

In [109]:
icd10pcs

Unnamed: 0,ICD9_CODE,icd10_pcs,long_title
0,9604,0BH.17EZ,"Insertion of Endotracheal Airway into Trachea,..."
1,9604,0BH.18EZ,"Insertion of Endotracheal Airway into Trachea,..."
2,966,0DH.67UZ,"Insertion of Feeding Device into Stomach, Via ..."
3,966,0DH.68UZ,"Insertion of Feeding Device into Stomach, Via ..."
4,966,3E0.G36Z,Introduction of Nutritional Substance into Upp...
...,...,...,...
94,3512,027.G0DZ,Dilation of Mitral Valve with Intraluminal Dev...
95,3512,027.G0ZZ,"Dilation of Mitral Valve, Open Approach"
96,3512,02N.G0ZZ,"Release Mitral Valve, Open Approach"
97,3512,02Q.G0ZZ,"Repair Mitral Valve, Open Approach"


In [110]:
#icd10pcs.to_csv('/content/drive/MyDrive/AlgoIntern/Data/Data1.0/icd10pcs_def.csv',index=False)