# Project: ICD-AIS conversion using Deep Learning utilizing ICD10

This script translates the AIS codes observed in the NTDB to a standardized format to match the translations.

## Setup

In [1]:
import numpy as np
import pandas as pd
import math

#### Files

In [2]:
# test ICD codes and demographics
ais_obs_file = "../Data/NTDB_combine/ntdb_test_ais.csv"
test_demo_file = "../Data/NTDB_combine/ntdb_test_demo.csv"

# ais codes
ais_codes_file = "../Tools/AIS08_codes.csv"

# output of translation
ais_obs_reformat_file = "../Results/Translations/ais_obs.csv"

## Load data

In [3]:
# load demographics
test_demo = pd.read_csv(test_demo_file)

In [4]:
# load icd data
ais_obs = pd.read_csv(ais_obs_file).rename(columns={'AISCODE':'aiscode'})

In [5]:
# get predot code
ais_obs['ais_predot'] = ais_obs.aiscode.apply(lambda x: math.floor(x))

In [6]:
ais_obs.head()

Unnamed: 0,inc_key,aiscode,ais_predot
0,190026915434,210402.1,210402
1,190026915434,751351.2,751351
2,190026915434,816011.1,816011
3,190026915434,816015.1,816015
4,190026915434,816019.1,816019


In [7]:
len(ais_obs.inc_key.unique())

10000

In [8]:
len(test_demo.inc_key.unique())

10000

## Set up AIS reference

In [9]:
# load AIS codes
ais_codes = pd.read_csv(ais_codes_file, header=0, encoding='iso-8859-1')

# get predot code
ais_codes['predot'] = ais_codes.code.apply(lambda x: math.floor(x))

# make dictionary of codes
ais_dot = pd.Series(ais_codes.code.values, index=ais_codes.predot).to_dict()
ais_dot[0] = 0.0
ais_dot[-1] = -1.0

In [10]:
# extract severity
ais_codes['severity'] = (ais_codes.code %1)*10

In [11]:
# get chapter number (first digit of code)
ais_codes['chapter'] = np.floor(ais_codes.code/100_000)

In [12]:
# function to map body region descriptions to numbers
regions = dict({'head_neck':1,'chest':2,'abd_pelvis':3,'extremity':4,'face':5,'external':6})

In [13]:
# replace region descriptors with integer value
ais_codes = ais_codes.replace({'region':regions})

In [14]:
ais_codes.head()

Unnamed: 0,code,region,Description,to_AIS98,from_AIS98,FCI,predot,severity,chapter
0,10000.1,6,"Hypothermia NFS [primary injury, not treatment...",NONE,NONE,,10000,1.0,0.0
1,10002.1,6,"Hypothermia [primary injury, not treatment-rel...",NONE,NONE,,10002,1.0,0.0
2,10004.2,6,"Hypothermia [primary injury, not treatment-rel...",NONE,NONE,,10004,2.0,0.0
3,10006.3,6,"Hypothermia [primary injury, not treatment-rel...",NONE,NONE,,10006,3.0,0.0
4,10008.4,6,"Hypothermia [primary injury, not treatment-rel...",NONE,NONE,,10008,4.0,0.0


## Merge with AIS dictionary

In [15]:
# merge codes with data
ais_obs = ais_obs.merge(ais_codes, left_on='ais_predot', right_on='predot', how='left')

In [16]:
# select necessary columns
ais_obs = ais_obs[['inc_key','aiscode','ais_predot','severity','chapter','region']]

In [17]:
ais_obs.head()

Unnamed: 0,inc_key,aiscode,ais_predot,severity,chapter,region
0,190026915434,210402.1,210402,1.0,2.0,6.0
1,190026915434,751351.2,751351,2.0,7.0,4.0
2,190026915434,816011.1,816011,1.0,8.0,6.0
3,190026915434,816015.1,816015,1.0,8.0,6.0
4,190026915434,816019.1,816019,1.0,8.0,6.0


## Add rows with patients with no AIS codes

In [18]:
# find keys patient with no AIS codes
missing_pt = np.setdiff1d(test_demo.inc_key.unique(), ais_obs.inc_key.unique())

In [19]:
# loop through missing patients
for pt in missing_pt:
    
    # create datframe for patient
    pt_df = pd.DataFrame({'inc_key':[pt], 'aiscode':[0],'ais_predot':[0],'severity':[0],'chapter':[0],'region':[0]})
    
    ais_obs = ais_obs.append(pt_df, ignore_index=True).sort_values('inc_key').reset_index(drop=True)

In [20]:
ais_obs.tail()

Unnamed: 0,inc_key,aiscode,ais_predot,severity,chapter,region
33492,190046127989,140693.2,140693,2.0,1.0,1.0
33493,190046127989,210602.1,210602,1.0,2.0,6.0
33494,190046128077,853151.3,853151,3.0,8.0,4.0
33495,190046128321,110600.1,110600,1.0,1.0,6.0
33496,190046128321,853161.3,853161,3.0,8.0,4.0


## Store results

In [21]:
# write out results
ais_obs.to_csv(ais_obs_reformat_file, index=False)