In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np

In [3]:
#Read all data.

main_puf = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2021/PUF_TRAUMA.csv", index_col = 'inc_key', low_memory = False)
preexisting_conditions = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2021/PUF_PREEXISTINGCONDITIONS_PIVOT.csv", index_col = 'Inc_Key', low_memory = False)
hospital_events = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2021/PUF_HOSPITALEVENTS_PIVOT.csv", index_col = 'Inc_Key', low_memory = False)
ais = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2021/PUF_AISDIAGNOSIS_PIVOT.csv", index_col = 'inc_key', low_memory = False)
ecode = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2021/PUF_ECODE.csv", index_col = 'inc_key', low_memory = False)
interventions = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2021/PUF_INTERVENTIONS.csv", index_col = 'Inc_Key', low_memory = False)

In [4]:
data = pd.concat([main_puf, preexisting_conditions, hospital_events, ais, ecode, interventions], axis=1)
data.columns = map(str.upper, data.columns)
data.shape

(1209097, 301)

#Inclusion and Exclusion Criteria

In [5]:
#Identify the patient population.

data = data[(data['TOTALGCS'] <= 14)]
data = data[(data['VERIFICATIONLEVEL'] == 1) | (data['VERIFICATIONLEVEL'] == 2)]

included = data.shape[0]

print('Number of patients included: ', included)

Number of patients included:  147458


In [6]:
#Exclude pediatric patients.

before = data.shape[0]
data = data[(data['AGEYEARS'] >= 16)]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  147458
Number of patients after exclusion:  131519
Number of patients excluded with this criteria:  15939


In [7]:
#Exclude AIS injury severity score ≥ 3 to neck.

before = data.shape[0]
data = data[data['AISSEVERITY3'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  131519
Number of patients after exclusion:  128468
Number of patients excluded with this criteria:  3051


In [8]:
#Exclude AIS injury severity score ≥ 3 to thorax.

before = data.shape[0]
data = data[data['AISSEVERITY4'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  128468
Number of patients after exclusion:  99517
Number of patients excluded with this criteria:  28951


In [9]:
#Exclude AIS injury severity score ≥ 3 to abdomen.

before = data.shape[0]
data = data[data['AISSEVERITY5'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  99517
Number of patients after exclusion:  95482
Number of patients excluded with this criteria:  4035


In [10]:
#Exclude AIS injury severity score ≥ 3 to spine.

before = data.shape[0]
data = data[data['AISSEVERITY6'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  95482
Number of patients after exclusion:  91498
Number of patients excluded with this criteria:  3984


In [11]:
#Exclude AIS injury severity score ≥ 3 to upper extremity.

before = data.shape[0]
data = data[data['AISSEVERITY7'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  91498
Number of patients after exclusion:  90403
Number of patients excluded with this criteria:  1095


In [12]:
#Exclude AIS injury severity score ≥ 3 to lower extremity.

before = data.shape[0]
data = data[data['AISSEVERITY8'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  90403
Number of patients after exclusion:  78565
Number of patients excluded with this criteria:  11838


In [13]:
#Exclude AIS injury severity score ≥ 3 to unspecified body regions.

before = data.shape[0]
data = data[data['AISSEVERITY9'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  78565
Number of patients after exclusion:  78462
Number of patients excluded with this criteria:  103


In [14]:
#Exclude patients dead on arrival (SBP=0).

before = data.shape[0]
data = data[data['SBP'] != 0]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  78462
Number of patients after exclusion:  76772
Number of patients excluded with this criteria:  1690


In [15]:
#Exclude patients with advanced directive limiting care.

before = data.shape[0]
data = data[data['CC_ADLC'] != 'Yes']
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  76772
Number of patients after exclusion:  72955
Number of patients excluded with this criteria:  3817


In [16]:
#Drop patients with invalid GCS.

before = data.shape[0]
data = data[data['PMGCSQ_VALID'] == 1]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  72955
Number of patients after exclusion:  31123
Number of patients excluded with this criteria:  41832


In [17]:
#Drop patients with unknown GCS.

before = data.shape[0]
data = data[data['TOTALGCS'].notna()]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  31123
Number of patients after exclusion:  31123
Number of patients excluded with this criteria:  0


In [18]:
#Drop patients with unknown GCS-Motor.

before = data.shape[0]
data = data[data['GCSMOTOR'].notna()]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  31123
Number of patients after exclusion:  31011
Number of patients excluded with this criteria:  112


In [19]:
#Drop patients with unknown GCS-Verbal.

before = data.shape[0]
data = data[data['GCSVERBAL'].notna()]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  31011
Number of patients after exclusion:  31005
Number of patients excluded with this criteria:  6


In [20]:
#Drop patients with unknown GCS-Eye.

before = data.shape[0]
data = data[data['GCSEYE'].notna()]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  31005
Number of patients after exclusion:  31003
Number of patients excluded with this criteria:  2


In [21]:
#Drop patients with unknown pupillary response.

before = data.shape[0]
data = data[data['TBIPUPILLARYRESPONSE'].notna()]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  31003
Number of patients after exclusion:  30293
Number of patients excluded with this criteria:  710


In [22]:
#Save data.

data.to_csv('/content/drive/MyDrive/TQP-MOST/2021_crash.csv')