# Project: ICD-AIS conversion using Deep Learning utilizing ICD10

This script translates checks the translations against the actual AIS codes that were recorded in NTDB.

## Setup

In [1]:
import numpy as np
import pandas as pd
import math

#### Files

In [2]:
# nmt translation
nmt_ais_file = "../Results/Translations/NMT_sentence_proc_trans.csv"

# aaam map file
ais_codes_file = "../Tools/AIS08_codes.csv"

# test data
test_demo_file = "../Data/NTDB_combine/ntdb_test_demo.csv"

In [3]:
# reformated output file
nmt_trans_file = "../Results/Translations/nmt_ais.csv"

## Set up AIS reference

In [4]:
# load AIS codes
ais_codes = pd.read_csv(ais_codes_file, header=0, encoding='iso-8859-1')

# get predot code
ais_codes['predot'] = ais_codes.code.apply(lambda x: math.floor(x))

# make dictionary of codes
ais_dot = pd.Series(ais_codes.code.values, index=ais_codes.predot).to_dict()
ais_dot[0] = 0.0
ais_dot[-1] = -1.0

In [5]:
# extract severity
ais_codes['severity'] = (ais_codes.code %1)*10

In [6]:
# get chapter number (first digit of code)
ais_codes['chapter'] = np.floor(ais_codes.code/100_000)

In [7]:
# function to map body region descriptions to numbers
regions = dict({'head_neck':1,'chest':2,'abd_pelvis':3,'extremity':4,'face':5,'external':6})

In [8]:
# replace region descriptors with integer value
ais_codes = ais_codes.replace({'region':regions})

In [9]:
ais_codes.head()

Unnamed: 0,code,region,Description,to_AIS98,from_AIS98,FCI,predot,severity,chapter
0,10000.1,6,"Hypothermia NFS [primary injury, not treatment...",NONE,NONE,,10000,1.0,0.0
1,10002.1,6,"Hypothermia [primary injury, not treatment-rel...",NONE,NONE,,10002,1.0,0.0
2,10004.2,6,"Hypothermia [primary injury, not treatment-rel...",NONE,NONE,,10004,2.0,0.0
3,10006.3,6,"Hypothermia [primary injury, not treatment-rel...",NONE,NONE,,10006,3.0,0.0
4,10008.4,6,"Hypothermia [primary injury, not treatment-rel...",NONE,NONE,,10008,4.0,0.0


## Process AIS codes 
- Convert to list 
- Remove missing and unknown predicted values
- Convert to numbers

In [10]:
# load translations
nmt = pd.read_csv(nmt_ais_file, header=None, skip_blank_lines=False,names=["AIS05CODE"])

In [11]:
nmt.tail()

Unnamed: 0,AIS05CODE
9995,210602 241200
9996,840602
9997,110602 140694 210602
9998,853151
9999,110602 853161


In [12]:
# convert string to list
nmt['AIS05CODE'] = nmt.AIS05CODE.str.split(" ")

In [13]:
# replace predicted NaN (no prediction for patient) with 0 
nmt['AIS05CODE'] = [ ["0"] if x is np.NaN else x for x in nmt['AIS05CODE']]

# replace <unk> (unknown code encountered) with -1
nmt['AIS05CODE'] = nmt.apply(lambda x: ["-1" if val=="<unk>" else val for val in x['AIS05CODE']], axis=1)

In [14]:
# convert to numbers
nmt['AIS05CODE'] = nmt.apply(lambda x: [int(val) for val in x['AIS05CODE']], axis=1)

In [15]:
nmt.head(5)

Unnamed: 0,AIS05CODE
0,"[210402, 751351, 816002, 816015, 816019, 874089]"
1,"[110402, 240402, 410402]"
2,[640200]
3,"[210202, 810402, 810602, 854455]"
4,"[110402, 110800, 210202, 210402, 810402, 854455]"


## Add patient ID

In [16]:
# load icd test data
test_demo = pd.read_csv(test_demo_file)

In [17]:
nmt['inc_key'] = test_demo.inc_key

## Convert to Long format

In [18]:
# convert list of ais codes to a string
nmt['ais_str'] = nmt['AIS05CODE'].apply(lambda x: ','.join(map(str, x)))

In [19]:
# spread string to multiple columns
nmt_ais = nmt.ais_str.str.split(',', expand=True).rename(columns = lambda x: "AIS"+str(x+1))

In [20]:
# add key from demographics data
nmt_ais['inc_key'] = nmt.inc_key

In [21]:
# convert to long data and drop rows with NA
nmt_long = pd.wide_to_long(nmt_ais, 'AIS', 'inc_key','num').reset_index().dropna()

In [22]:
# drop number column
nmt_long = nmt_long[['inc_key','AIS']]

In [23]:
# rename column
nmt_long = nmt_long.rename(columns={'AIS':'ais_predot'})

In [24]:
# convert to integer
nmt_long['ais_predot'] = nmt_long.ais_predot.astype(int)

## Get severity and region

In [25]:
# merge with AIS codes
nmt_long = nmt_long.merge(ais_codes, left_on='ais_predot', right_on='predot', how='left')

In [26]:
# make full code
nmt_long['aiscode'] = nmt_long.ais_predot + (nmt_long.severity/10)

In [27]:
# select necessary columns
nmt_long = nmt_long[['inc_key','aiscode','ais_predot','severity','chapter','region']]

## Write to file

In [28]:
# order results by key and AIS code
nmt_long = nmt_long.sort_values(['inc_key','ais_predot']).reset_index(drop=True)

In [29]:
# write file
nmt_long.to_csv(nmt_trans_file, index=False)