# Project: ICD-AIS conversion using Deep Learning utilizing ICD10

This script analyzes the get patient demographics and injury information for Table 1.

## Setup

In [1]:
import numpy as np
import pandas as pd
import math
from sklearn import metrics
import seaborn as sb
import matplotlib.pyplot as plt
from scipy.stats import iqr

#### Files

In [2]:
# training
train_demo_file = "../Data/NTDB_combine/ntdb_train_demo.csv"
train_ais_file = "../Data/NTDB_combine/ntdb_train_ais.csv"

# validation
valid_demo_file = "../Data/NTDB_combine/ntdb_valid_demo.csv"
valid_ais_file = "../Data/NTDB_combine/ntdb_valid_ais.csv"

# testing
test_demo_file = "../Data/NTDB_combine/ntdb_test_demo.csv"
test_ais_file = "../Data/NTDB_combine/ntdb_test_ais.csv"

## Load data

In [3]:
# Load demographics files
train_demo = pd.read_csv(train_demo_file)
valid_demo = pd.read_csv(valid_demo_file)
test_demo = pd.read_csv(test_demo_file)

# Load injury files
train_ais = pd.read_csv(train_ais_file)
valid_ais = pd.read_csv(valid_ais_file)
test_ais = pd.read_csv(test_ais_file)


In [4]:
# combine training and validation data
train_demo = train_demo.append(valid_demo)
train_ais = train_ais.append(valid_ais)

## Get number of patients and injuries

In [5]:
# create data frame with number of patients and injuries
res = pd.DataFrame({'num_pts':[len(train_demo), len(test_demo)],\
                         'num_injuries':[len(train_ais), len(test_ais)]})

## Calculate median number of injuries and IQR

In [6]:
res['inj_med'] = [train_ais.groupby('inc_key').count()['AISCODE'].median(), \
                  test_ais.groupby('inc_key').count()['AISCODE'].median()]

In [7]:
res['inj_iqr'] = [[train_ais.groupby('inc_key').count()['AISCODE'].quantile(.25),
                   train_ais.groupby('inc_key').count()['AISCODE'].quantile(.75)],
                  [test_ais.groupby('inc_key').count()['AISCODE'].quantile(.25),
                   test_ais.groupby('inc_key').count()['AISCODE'].quantile(.75)]]

## Calculate median age and IQR

In [8]:
res['age_med'] = [train_demo.AGEYEARS.median(), test_demo.AGEYEARS.median()]

In [9]:
res['age_iqr'] = [[train_demo.AGEYEARS.quantile(0.25),
                   train_demo.AGEYEARS.quantile(0.75)],
                  [test_demo.AGEYEARS.quantile(0.25),
                   test_demo.AGEYEARS.quantile(0.75)],]

In [10]:
res

Unnamed: 0,num_pts,num_injuries,inj_med,inj_iqr,age_med,age_iqr
0,2032246,6973435,2.0,"[1.0, 4.0]",47.0,"[23.0, 68.0]"
1,10000,33497,2.0,"[1.0, 4.0]",49.0,"[24.0, 70.0]"


## Sex of patients

In [16]:
res['sex'] = [len(train_demo[train_demo.SEX==1.0])/len(train_demo), 
                len(test_demo[test_demo.SEX==1.0])/len(test_demo)]

In [17]:
res

Unnamed: 0,num_pts,num_injuries,inj_med,inj_iqr,age_med,age_iqr,sex
0,2032246,6973435,2.0,"[1.0, 4.0]",47.0,"[23.0, 68.0]",0.596977
1,10000,33497,2.0,"[1.0, 4.0]",49.0,"[24.0, 70.0]",0.5808


## Determine mechanism of injury

In [11]:
# get mechanism of injury letter
train_demo['mech_letter'] = train_demo.PRIMARYECODEICD10.astype(str).str[0]
test_demo['mech_letter'] = test_demo.PRIMARYECODEICD10.astype(str).str[0]

In [12]:
# get mechanism of injury number
train_demo['mech_num'] = train_demo.PRIMARYECODEICD10.astype(str).str[1:3].astype(int)
test_demo['mech_num'] = test_demo.PRIMARYECODEICD10.astype(str).str[1:3].astype(int)

In [13]:
def det_moi(row):
    if row.mech_letter=='V':
        return 'MVC'
    elif (row.mech_letter=='W') & (row.mech_num <= 19):
        return 'Fall'
    elif (row.mech_letter=='X') & (row.mech_num >= 92):
        return 'Assault'
    elif (row.mech_letter=='Y') & (row.mech_num <= 9):
        return 'Assault'
    elif (row.mech_letter=='X') & (row.mech_num >=71) & (row.mech_num <=83):
        return 'Self-harm'
    else:
        return 'Other'

In [14]:
# determine training MOI
train_demo['mech'] = train_demo.apply(det_moi, axis=1)
train_demo.groupby('mech').inc_key.count()/len(train_demo)

mech
Assault      0.091755
Fall         0.465897
MVC          0.321852
Other        0.107204
Self-harm    0.013293
Name: inc_key, dtype: float64

In [15]:
# determine testing MOI
test_demo['mech'] = test_demo.apply(det_moi, axis=1)
test_demo.groupby('mech').inc_key.count()/len(test_demo)

mech
Assault      0.0854
Fall         0.4932
MVC          0.3033
Other        0.1060
Self-harm    0.0121
Name: inc_key, dtype: float64

## Calculate median ISS and IQR

In [16]:
res['iss_med'] = [train_demo.ISS_05.median(), test_demo.ISS_05.median()]

In [17]:
res['iss_iqr'] = [[train_demo.ISS_05.quantile(0.25),
                   train_demo.ISS_05.quantile(0.75)],
                  [test_demo.ISS_05.quantile(0.25),
                   test_demo.ISS_05.quantile(0.75)],]

## Calculate percent ISS >= 16

In [18]:
# flag patients with ISS >=16
train_demo['ISS16'] = [1 if iss >= 16 else 0 for iss in train_demo.ISS_05]
test_demo['ISS16'] = [1 if iss >= 16 else 0 for iss in test_demo.ISS_05]

In [19]:
res['iss16'] = [sum(train_demo.ISS16) / len(train_demo),
                sum(test_demo.ISS16) / len(test_demo)]

In [20]:
res

Unnamed: 0,num_pts,num_injuries,inj_med,inj_iqr,age_med,age_iqr,iss_med,iss_iqr,iss16
0,2032246,6973435,2.0,"[1.0, 4.0]",47.0,"[23.0, 68.0]",8.0,"[4.0, 10.0]",0.1576
1,10000,33497,2.0,"[1.0, 4.0]",49.0,"[24.0, 70.0]",8.0,"[4.0, 10.0]",0.1478


## Calculate MAIS

In [21]:
# calculate severity
train_ais['severity'] = ((train_ais.AISCODE % 1) * 10).astype(int)
test_ais['severity'] = ((test_ais.AISCODE % 1) * 10).astype(int)

In [22]:
# fix severities
train_ais['severity'] = train_ais.severity.apply(lambda x: 1 if (x > 6) | (x==0) else x)
test_ais['severity'] = test_ais.severity.apply(lambda x: 1 if (x > 6) | (x==0) else x)

In [23]:
# get mais
mais_train = train_ais.sort_values('severity', ascending=False).drop_duplicates('inc_key').severity
mais_test = test_ais.sort_values('severity', ascending=False).drop_duplicates('inc_key').severity

In [24]:
# find median MAIS
res['mais_med'] = [mais_train.median(), mais_test.median()]

In [25]:
# find MAIS IQR
res['mais_iqr'] = [[mais_train.quantile(0.25), mais_train.quantile(0.75)],
                  [mais_test.quantile(0.25),mais_test.quantile(0.75)],]

## Calculate MAIS 3+ and 2+

In [26]:
# find percent MAIS 3+
res['mais3'] = [mais_train[mais_train >=3].count() / len(mais_train),
                mais_test[mais_test >=3].count() / len(mais_test)]

In [27]:
# find percent MAIS 2+
res['mais2'] = [mais_train[mais_train >=2].count() / len(mais_train),
                mais_test[mais_test >=2].count() / len(mais_test)]

In [28]:
res

Unnamed: 0,num_pts,num_injuries,inj_med,inj_iqr,age_med,age_iqr,iss_med,iss_iqr,iss16,mais_med,mais_iqr,mais3,mais2
0,2032246,6973435,2.0,"[1.0, 4.0]",47.0,"[23.0, 68.0]",8.0,"[4.0, 10.0]",0.1576,2.0,"[1.0, 3.0]",0.310213,0.600094
1,10000,33497,2.0,"[1.0, 4.0]",49.0,"[24.0, 70.0]",8.0,"[4.0, 10.0]",0.1478,2.0,"[1.0, 3.0]",0.3112,0.6015
