In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np

In [3]:
#Read all data.

main_puf = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2020/PUF_TRAUMA.csv", index_col = 'inc_key', low_memory = False)
preexisting_conditions = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2020/PUF_PREEXISTINGCONDITIONS_PIVOT.csv", index_col = 'Inc_Key', low_memory = False)
hospital_events = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2020/PUF_HOSPITALEVENTS_PIVOT.csv", index_col = 'Inc_Key', low_memory = False)
ais = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2020/PUF_AISDIAGNOSIS_PIVOT.csv", index_col = 'inc_key', low_memory = False)
ecode = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2020/PUF_ECODE.csv", index_col = 'inc_key', low_memory = False)
interventions = pd.read_csv("/content/drive/MyDrive/NTDB-PUFs/NTDB_2020/PUF_INTERVENTIONS.csv", index_col = 'Inc_Key', low_memory = False)

In [4]:
data = pd.concat([main_puf, preexisting_conditions, hospital_events, ais, ecode, interventions], axis=1)
data.columns = map(str.upper, data.columns)
data.shape

(1135018, 352)

#Inclusion and Exclusion Criteria

In [5]:
#Identify the patient population.

data = data[(data['TOTALGCS'] <= 14)]
data = data[(data['VERIFICATIONLEVEL'] == 1) | (data['VERIFICATIONLEVEL'] == 2)]

included = data.shape[0]

print('Number of patients included: ', included)

Number of patients included:  135923


In [6]:
#Exclude pediatric patients.

before = data.shape[0]
data = data[(data['AGEYEARS'] >= 16)]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  135923
Number of patients after exclusion:  121438
Number of patients excluded with this criteria:  14485


In [7]:
#Exclude AIS injury severity score ≥ 3 to neck.

before = data.shape[0]
data = data[data['AISSEVERITY3'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  121438
Number of patients after exclusion:  118894
Number of patients excluded with this criteria:  2544


In [8]:
#Exclude AIS injury severity score ≥ 3 to thorax.

before = data.shape[0]
data = data[data['AISSEVERITY4'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  118894
Number of patients after exclusion:  92226
Number of patients excluded with this criteria:  26668


In [9]:
#Exclude AIS injury severity score ≥ 3 to abdomen.

before = data.shape[0]
data = data[data['AISSEVERITY5'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  92226
Number of patients after exclusion:  88571
Number of patients excluded with this criteria:  3655


In [10]:
#Exclude AIS injury severity score ≥ 3 to spine.

before = data.shape[0]
data = data[data['AISSEVERITY6'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  88571
Number of patients after exclusion:  84836
Number of patients excluded with this criteria:  3735


In [11]:
#Exclude AIS injury severity score ≥ 3 to upper extremity.

before = data.shape[0]
data = data[data['AISSEVERITY7'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  84836
Number of patients after exclusion:  83777
Number of patients excluded with this criteria:  1059


In [12]:
#Exclude AIS injury severity score ≥ 3 to lower extremity.

before = data.shape[0]
data = data[data['AISSEVERITY8'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  83777
Number of patients after exclusion:  73055
Number of patients excluded with this criteria:  10722


In [13]:
#Exclude AIS injury severity score ≥ 3 to unspecified body regions.

before = data.shape[0]
data = data[data['AISSEVERITY9'] < 3]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  73055
Number of patients after exclusion:  72743
Number of patients excluded with this criteria:  312


In [14]:
#Exclude patients dead on arrival (SBP=0).

before = data.shape[0]
data = data[data['SBP'] != 0]
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  72743
Number of patients after exclusion:  71269
Number of patients excluded with this criteria:  1474


In [15]:
#Exclude patients with advanced directive limiting care.

before = data.shape[0]
data = data[data['CC_ADLC'] != 'Yes']
after = data.shape[0]
excluded = before - after

print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  71269
Number of patients after exclusion:  67698
Number of patients excluded with this criteria:  3571


In [16]:
#Drop patients with eye obstruction.

before = data.shape[0]
data = data[data['GCSQ_EYEOBSTRUCTION'] == 0]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  67698
Number of patients after exclusion:  67108
Number of patients excluded with this criteria:  590


In [17]:
#Drop patients chemically sedated or paralyzed.

before = data.shape[0]
data = data[data['GCSQ_SEDATEDPARALYZED'] == 0]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  67108
Number of patients after exclusion:  59337
Number of patients excluded with this criteria:  7771


In [18]:
#Drop patients chemically sedated or paralyzed.

before = data.shape[0]
data = data[data['GCSQ_INTUBATED'] == 0]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  59337
Number of patients after exclusion:  56128
Number of patients excluded with this criteria:  3209


In [19]:
#Drop patients with unknown GCS.

before = data.shape[0]
data = data[data['GCSQ_UK'] == 0]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  56128
Number of patients after exclusion:  55156
Number of patients excluded with this criteria:  972


In [20]:
#Drop patients with unknown GCS.

before = data.shape[0]
data = data[data['TOTALGCS'].notna()]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  55156
Number of patients after exclusion:  55156
Number of patients excluded with this criteria:  0


In [21]:
#Assign GCS_VERBAL to 0 for intubated patients.

data.loc[data['GCSQ_INTUBATED'] == 1, 'GCSVERBAL'] = 0

data['GCSVERBAL'].value_counts(normalize=False, dropna=False)

4.0    36551
1.0     7407
5.0     4208
2.0     3841
3.0     2939
NaN      210
Name: GCSVERBAL, dtype: int64

In [22]:
#Drop patients with unknown GCS-Motor.

before = data.shape[0]
data = data[data['GCSMOTOR'].notna()]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  55156
Number of patients after exclusion:  54948
Number of patients excluded with this criteria:  208


In [23]:
#Drop patients with unknown GCS-Verbal.

before = data.shape[0]
data = data[data['GCSVERBAL'].notna()]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  54948
Number of patients after exclusion:  54944
Number of patients excluded with this criteria:  4


In [24]:
#Drop patients with unknown GCS-Eye.

before = data.shape[0]
data = data[data['GCSEYE'].notna()]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  54944
Number of patients after exclusion:  54943
Number of patients excluded with this criteria:  1


In [25]:
#Drop patients with unknown pupillary response.

before = data.shape[0]
data = data[data['TBIPUPILLARYRESPONSE'].notna()]
after = data.shape[0]

excluded = before - after
print('Number of patients before exclusion: ', before)
print('Number of patients after exclusion: ', after)
print('Number of patients excluded with this criteria: ', excluded)

Number of patients before exclusion:  54943
Number of patients after exclusion:  33981
Number of patients excluded with this criteria:  20962


In [26]:
#Save data.

data.to_csv('/content/drive/MyDrive/TQP-MOST/2020_crash.csv')