In [1]:
import numpy as np
import pandas as pd
import pyreadstat
import matplotlib.pyplot as plt
from sas7bdat import SAS7BDAT
from tableone import TableOne

In [2]:
inclusioncriteria = pd.read_excel('/Volumes/Research/GoldenHourData/InclusionCriteria/InclusionCriteriaData.xlsx')
inclusioncriteria

Unnamed: 0,PcrKey,eDisposition_16,eDisposition_21,ePatient_15,transport_method,desired_team,eDisposition_24
0,268435605,4216005,4221003,23,ground,1,4224003
1,234881233,4216005,4221003,81,ground,1,4224003
2,268435832,4216003,4221003,58,air,1,4224003
3,251658622,4216005,4221003,77,ground,1,4224017
4,234881417,4216005,4221003,81,ground,1,4224003
...,...,...,...,...,...,...,...
281860,268435068,4216005,4221003,72,ground,1,4224017
281861,234880937,4216005,4221003,66,ground,1,4224003
281862,268435397,4216005,4221003,32,ground,1,4224017
281863,234880971,4216005,4221003,53,ground,1,4224003


In [3]:
race_df, _ = pyreadstat.read_sas7bdat('/Volumes/Research/GoldenHourData/NEMSISRawFiles/pcrpatientracegroup.sas7bdat')
race_df

Unnamed: 0,PcrPatientRaceGroupKey,PcrKey,ePatient_14
0,237780154.0,181572130.0,2514001
1,241441778.0,183973531.0,2514011
2,245920207.0,184118335.0,2514005
3,243238626.0,185890862.0,2514011
4,259154006.0,198833989.0,2514011
...,...,...,...
54765541,385749113.0,289284619.0,7701003
54765542,385749132.0,289284637.0,7701003
54765543,385749193.0,289284698.0,7701003
54765544,385749205.0,289284710.0,7701003


In [4]:
#extracting only gender, age, and dispositions
gender_age_mortality_df, _ = pyreadstat.read_sas7bdat(
    '/Volumes/Research/GoldenHourData/NEMSISRawFiles/pub_pcrevents.sas7bdat', 
    usecols=["PcrKey", "ePatient_13", "ePatient_15", "eOutcome_01", "eOutcome_02"])
gender_age_mortality_df

Unnamed: 0,PcrKey,eOutcome_01,eOutcome_02,ePatient_13,ePatient_15
0,80475190.0,7701003,7701003,7701001,7701001.0
1,95460809.0,7701003,7701003,7701003,7701003.0
2,99195706.0,7701003,7701003,7701003,7701003.0
3,167978751.0,7701003,7701003,7701003,7701003.0
4,171760717.0,7701003,7701003,9906003,43.0
...,...,...,...,...,...
54190574,289285731.0,7701003,7701003,9906001,7701003.0
54190575,289285905.0,7701003,7701003,9906003,25.0
54190576,289286250.0,7701003,7701003,9906003,54.0
54190577,289286356.0,7701003,7701003,9906001,67.0


In [5]:
#adding race onto dataframe
gender_age_mortality_race_df = pd.merge(gender_age_mortality_df, race_df[['PcrKey', 'ePatient_14']], on='PcrKey', how='left')
gender_age_mortality_race_df

Unnamed: 0,PcrKey,eOutcome_01,eOutcome_02,ePatient_13,ePatient_15,ePatient_14
0,80475190.0,7701003,7701003,7701001,7701001.0,7701003
1,95460809.0,7701003,7701003,7701003,7701003.0,7701003
2,99195706.0,7701003,7701003,7701003,7701003.0,7701003
3,167978751.0,7701003,7701003,7701003,7701003.0,7701003
4,171760717.0,7701003,7701003,9906003,43.0,7701003
...,...,...,...,...,...,...
54765541,289285731.0,7701003,7701003,9906001,7701003.0,2514011
54765542,289285905.0,7701003,7701003,9906003,25.0,2514011
54765543,289286250.0,7701003,7701003,9906003,54.0,2514005
54765544,289286356.0,7701003,7701003,9906001,67.0,2514011


In [6]:
#reading in NISS scores from R
niss_df = pd.read_csv('/Volumes/Research/GoldenHourData/InclusionCriteria/NISS.csv')
niss_df

Unnamed: 0,pcrkey,niss
0,225614208,0
1,225616504,0
2,225616872,0
3,225617087,0
4,225618934,0
...,...,...
281860,289215928,0
281861,289278280,0
281862,289280833,0
281863,289280835,0


In [7]:
#adding NISS scores onto dataframe
gender_age_mortality_race_NISS_df = pd.merge(
    gender_age_mortality_race_df, 
    niss_df[['pcrkey', 'niss']], 
    left_on='PcrKey',
    right_on='pcrkey',
    how='left'
)
gender_age_mortality_race_NISS_df

Unnamed: 0,PcrKey,eOutcome_01,eOutcome_02,ePatient_13,ePatient_15,ePatient_14,pcrkey,niss
0,80475190.0,7701003,7701003,7701001,7701001.0,7701003,,
1,95460809.0,7701003,7701003,7701003,7701003.0,7701003,,
2,99195706.0,7701003,7701003,7701003,7701003.0,7701003,,
3,167978751.0,7701003,7701003,7701003,7701003.0,7701003,,
4,171760717.0,7701003,7701003,9906003,43.0,7701003,,
...,...,...,...,...,...,...,...,...
54765541,289285731.0,7701003,7701003,9906001,7701003.0,2514011,,
54765542,289285905.0,7701003,7701003,9906003,25.0,2514011,,
54765543,289286250.0,7701003,7701003,9906003,54.0,2514005,,
54765544,289286356.0,7701003,7701003,9906001,67.0,2514011,,


In [8]:
#I miscapitalized 'pcrkey' in the R file for NISS so after mapping it, dropping the additional column
gender_age_mortality_race_NISS_df.drop(columns=['pcrkey'], inplace=True)
gender_age_mortality_race_NISS_df

Unnamed: 0,PcrKey,eOutcome_01,eOutcome_02,ePatient_13,ePatient_15,ePatient_14,niss
0,80475190.0,7701003,7701003,7701001,7701001.0,7701003,
1,95460809.0,7701003,7701003,7701003,7701003.0,7701003,
2,99195706.0,7701003,7701003,7701003,7701003.0,7701003,
3,167978751.0,7701003,7701003,7701003,7701003.0,7701003,
4,171760717.0,7701003,7701003,9906003,43.0,7701003,
...,...,...,...,...,...,...,...
54765541,289285731.0,7701003,7701003,9906001,7701003.0,2514011,
54765542,289285905.0,7701003,7701003,9906003,25.0,2514011,
54765543,289286250.0,7701003,7701003,9906003,54.0,2514005,
54765544,289286356.0,7701003,7701003,9906001,67.0,2514011,


In [9]:
#rename columns
gender_age_mortality_race_NISS_df.columns = gender_age_mortality_race_NISS_df.columns.str.strip()
gender_age_mortality_race_NISS_df.rename(columns={'ePatient_13': 'Gender',
                           'ePatient_15' : 'Age',
                           'ePatient_14' : 'Race',
                           'eOutcome_01' : 'ED Disposition',
                            'eOutcome_02' : 'Hospital Disposition'}, inplace=True)
gender_age_mortality_race_NISS_df


Unnamed: 0,PcrKey,ED Disposition,Hospital Disposition,Gender,Age,Race,niss
0,80475190.0,7701003,7701003,7701001,7701001.0,7701003,
1,95460809.0,7701003,7701003,7701003,7701003.0,7701003,
2,99195706.0,7701003,7701003,7701003,7701003.0,7701003,
3,167978751.0,7701003,7701003,7701003,7701003.0,7701003,
4,171760717.0,7701003,7701003,9906003,43.0,7701003,
...,...,...,...,...,...,...,...
54765541,289285731.0,7701003,7701003,9906001,7701003.0,2514011,
54765542,289285905.0,7701003,7701003,9906003,25.0,2514011,
54765543,289286250.0,7701003,7701003,9906003,54.0,2514005,
54765544,289286356.0,7701003,7701003,9906001,67.0,2514011,


In [10]:
#renaming each gender entry with actual values based on data dictionary

gender_mapping = {
    '9906001': 'Female',
    '9906003': 'Male',
    '9906005': 'Unknown'
}
gender_mapped_df = gender_age_mortality_race_NISS_df.copy()
gender_mapped_df['Gender'] = gender_mapped_df['Gender'].replace(gender_mapping)
gender_mapped_df

Unnamed: 0,PcrKey,ED Disposition,Hospital Disposition,Gender,Age,Race,niss
0,80475190.0,7701003,7701003,7701001,7701001.0,7701003,
1,95460809.0,7701003,7701003,7701003,7701003.0,7701003,
2,99195706.0,7701003,7701003,7701003,7701003.0,7701003,
3,167978751.0,7701003,7701003,7701003,7701003.0,7701003,
4,171760717.0,7701003,7701003,Male,43.0,7701003,
...,...,...,...,...,...,...,...
54765541,289285731.0,7701003,7701003,Female,7701003.0,2514011,
54765542,289285905.0,7701003,7701003,Male,25.0,2514011,
54765543,289286250.0,7701003,7701003,Male,54.0,2514005,
54765544,289286356.0,7701003,7701003,Female,67.0,2514011,


In [11]:
#renaming each race entry with actual values based on data dictionary
race_mapping = {
    '2514001': 'American Indian or Alaska Native',
    '2514003': 'Asian',
    '2514005': 'Black or African American',
    '2514007': 'Hispanic or Latino',
    '2514009': 'Native Hawaiian or Other Pacific Islander',
    '2514011': 'White'
}

race_gender_mapped_df = gender_mapped_df.copy()
race_gender_mapped_df['Race'] = race_gender_mapped_df['Race'].replace(race_mapping)
race_gender_mapped_df


Unnamed: 0,PcrKey,ED Disposition,Hospital Disposition,Gender,Age,Race,niss
0,80475190.0,7701003,7701003,7701001,7701001.0,7701003,
1,95460809.0,7701003,7701003,7701003,7701003.0,7701003,
2,99195706.0,7701003,7701003,7701003,7701003.0,7701003,
3,167978751.0,7701003,7701003,7701003,7701003.0,7701003,
4,171760717.0,7701003,7701003,Male,43.0,7701003,
...,...,...,...,...,...,...,...
54765541,289285731.0,7701003,7701003,Female,7701003.0,White,
54765542,289285905.0,7701003,7701003,Male,25.0,White,
54765543,289286250.0,7701003,7701003,Male,54.0,Black or African American,
54765544,289286356.0,7701003,7701003,Female,67.0,White,


In [12]:
#adding on severity score category
race_gender_mapped_df['Severity'] = np.where(race_gender_mapped_df['niss'].isna(),
                                             'Unknown',
                                             np.where(race_gender_mapped_df['niss']< 16,
                                                      'Mild/Moderate', 'Severe/Profound'))
race_gender_mapped_df

Unnamed: 0,PcrKey,ED Disposition,Hospital Disposition,Gender,Age,Race,niss,Severity
0,80475190.0,7701003,7701003,7701001,7701001.0,7701003,,Unknown
1,95460809.0,7701003,7701003,7701003,7701003.0,7701003,,Unknown
2,99195706.0,7701003,7701003,7701003,7701003.0,7701003,,Unknown
3,167978751.0,7701003,7701003,7701003,7701003.0,7701003,,Unknown
4,171760717.0,7701003,7701003,Male,43.0,7701003,,Unknown
...,...,...,...,...,...,...,...,...
54765541,289285731.0,7701003,7701003,Female,7701003.0,White,,Unknown
54765542,289285905.0,7701003,7701003,Male,25.0,White,,Unknown
54765543,289286250.0,7701003,7701003,Male,54.0,Black or African American,,Unknown
54765544,289286356.0,7701003,7701003,Female,67.0,White,,Unknown


In [13]:
#mapping mortality data
mortality_mapping = {
    '77010011': 'Not Applicable',
    '7701001': 'Not Applicable',
    '7701003': 'Not Recorded',
    '20' : 'Died'
}

morality_race_gender_mapped_df = race_gender_mapped_df.copy()
morality_race_gender_mapped_df['ED Disposition'] = morality_race_gender_mapped_df['ED Disposition'].replace(mortality_mapping)
morality_race_gender_mapped_df['Hospital Disposition'] = morality_race_gender_mapped_df['Hospital Disposition'].replace(mortality_mapping)

morality_race_gender_mapped_df

Unnamed: 0,PcrKey,ED Disposition,Hospital Disposition,Gender,Age,Race,niss,Severity
0,80475190.0,Not Recorded,Not Recorded,7701001,7701001.0,7701003,,Unknown
1,95460809.0,Not Recorded,Not Recorded,7701003,7701003.0,7701003,,Unknown
2,99195706.0,Not Recorded,Not Recorded,7701003,7701003.0,7701003,,Unknown
3,167978751.0,Not Recorded,Not Recorded,7701003,7701003.0,7701003,,Unknown
4,171760717.0,Not Recorded,Not Recorded,Male,43.0,7701003,,Unknown
...,...,...,...,...,...,...,...,...
54765541,289285731.0,Not Recorded,Not Recorded,Female,7701003.0,White,,Unknown
54765542,289285905.0,Not Recorded,Not Recorded,Male,25.0,White,,Unknown
54765543,289286250.0,Not Recorded,Not Recorded,Male,54.0,Black or African American,,Unknown
54765544,289286356.0,Not Recorded,Not Recorded,Female,67.0,White,,Unknown


In [14]:
#remove duplicate entries of PcrKey
#Keeping the first occurrence of each duplicate

duplicate_cleaned_df = morality_race_gender_mapped_df.drop_duplicates(subset='PcrKey', keep='first')
duplicate_cleaned_df

Unnamed: 0,PcrKey,ED Disposition,Hospital Disposition,Gender,Age,Race,niss,Severity
0,80475190.0,Not Recorded,Not Recorded,7701001,7701001.0,7701003,,Unknown
1,95460809.0,Not Recorded,Not Recorded,7701003,7701003.0,7701003,,Unknown
2,99195706.0,Not Recorded,Not Recorded,7701003,7701003.0,7701003,,Unknown
3,167978751.0,Not Recorded,Not Recorded,7701003,7701003.0,7701003,,Unknown
4,171760717.0,Not Recorded,Not Recorded,Male,43.0,7701003,,Unknown
...,...,...,...,...,...,...,...,...
54765541,289285731.0,Not Recorded,Not Recorded,Female,7701003.0,White,,Unknown
54765542,289285905.0,Not Recorded,Not Recorded,Male,25.0,White,,Unknown
54765543,289286250.0,Not Recorded,Not Recorded,Male,54.0,Black or African American,,Unknown
54765544,289286356.0,Not Recorded,Not Recorded,Female,67.0,White,,Unknown


In [15]:
#sanity check
unique_severity = duplicate_cleaned_df['Severity'].unique()
severity_counts = duplicate_cleaned_df['Severity'].value_counts()

print("Unique Severity Values:", unique_severity)

Unique Severity Values: ['Unknown' 'Mild/Moderate' 'Severe/Profound']


In [16]:
#sanity check why are 'severe/profound' dropping out after the next function
def select_severe_entries(df):
    """
    Selects all entries from the DataFrame where the Severity column is 'Severe/Profound'.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame to filter.
    
    Returns:
    pd.DataFrame: A DataFrame containing only the entries with 'Severe/Profound' in the Severity column.
    """
    # Filter the DataFrame for entries with 'Severe/Profound' in the Severity column
    severe_entries = df[df['Severity'] == 'Severe/Profound']
    
    return severe_entries

# Example usage
# Assuming duplicate_cleaned_df is already defined
severe_entries_df = select_severe_entries(duplicate_cleaned_df)
severe_entries_df

Unnamed: 0,PcrKey,ED Disposition,Hospital Disposition,Gender,Age,Race,niss,Severity
1108908,227979975.0,Not Recorded,Not Recorded,Female,25.0,White,18.0,Severe/Profound
1446980,228557525.0,Not Recorded,Not Recorded,Male,36.0,Black or African American,25.0,Severe/Profound
3665032,231355250.0,Not Recorded,Not Recorded,Male,40.0,White,18.0,Severe/Profound
3865317,231611167.0,Not Recorded,Not Recorded,Male,50.0,White,25.0,Severe/Profound
3865330,231611180.0,Not Recorded,Not Recorded,Male,46.0,White,25.0,Severe/Profound
...,...,...,...,...,...,...,...,...
50345856,279663704.0,Not Recorded,Not Recorded,Female,32.0,Black or African American,25.0,Severe/Profound
50947019,280302351.0,Not Recorded,Not Recorded,Male,60.0,White,25.0,Severe/Profound
52445519,281788343.0,Not Recorded,Not Recorded,Male,20.0,Black or African American,25.0,Severe/Profound
53679122,283972167.0,Not Recorded,Not Recorded,Male,58.0,Black or African American,25.0,Severe/Profound


In [17]:
#Mortality Data not included

not_recorded_categories = ['Not Recorded', 'Not Applicable']

# Step 1: Count how many entries are 'Not Recorded' or 'Not Applicable' in both columns
ed_not_recorded_count = duplicate_cleaned_df['ED Disposition'].isin(not_recorded_categories).sum()
hospital_not_recorded_count = duplicate_cleaned_df['Hospital Disposition'].isin(not_recorded_categories).sum()

print(f"Number of 'Not Recorded' or 'Not Applicable' entries in 'ED Disposition': {ed_not_recorded_count}")
print(f"Number of 'Not Recorded' or 'Not Applicable' entries in 'Hospital Disposition': {hospital_not_recorded_count}")

# Step 2: Create a new DataFrame excluding those entries in both columns
nomortality_filtered_df = duplicate_cleaned_df[
    ~((duplicate_cleaned_df['ED Disposition'].isin(not_recorded_categories)) | 
    (duplicate_cleaned_df['Hospital Disposition'].isin(not_recorded_categories)))
]
nomortality_filtered_df


Number of 'Not Recorded' or 'Not Applicable' entries in 'ED Disposition': 53079890
Number of 'Not Recorded' or 'Not Applicable' entries in 'Hospital Disposition': 53384592


Unnamed: 0,PcrKey,ED Disposition,Hospital Disposition,Gender,Age,Race,niss,Severity
355,225614872.0,09,01,Female,57.0,White,,Unknown
563,225615463.0,09,01,Male,57.0,Black or African American,,Unknown
579,225615523.0,01,01,Female,40.0,White,,Unknown
747,225616013.0,Died,Died,Female,79.0,Black or African American,,Unknown
883,225616415.0,09,01,Female,39.0,Black or African American,,Unknown
...,...,...,...,...,...,...,...,...
54759080,289270752.0,09,07,Male,32.0,White,,Unknown
54759095,289270768.0,09,62,Female,76.0,White,,Unknown
54759096,289270769.0,09,01,Female,64.0,White,,Unknown
54759102,289270775.0,09,05,Male,78.0,White,,Unknown


In [18]:
#sanity check, what happens to the severe cases??
unique_severity = nomortality_filtered_df['Severity'].unique()
severity_counts = nomortality_filtered_df['Severity'].value_counts()

print("Unique Severity Values:", unique_severity)

Unique Severity Values: ['Unknown' 'Mild/Moderate']


In [19]:
#Classify all others as lived
# Step 1: Rename entries in 'ED Disposition' that are not 'Died' to 'Lived'
nomortality_filtered_df['ED Disposition'] = nomortality_filtered_df['ED Disposition'].apply(
   lambda x: 'Lived' if x != 'Died' else x
)

# Step 2: Rename entries in 'Hospital Disposition' that are not 'Died' to 'Lived'
nomortality_filtered_df['Hospital Disposition'] = nomortality_filtered_df['Hospital Disposition'].apply(
    lambda x: 'Lived' if x != 'Died' else x
)

nomortality_filtered_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nomortality_filtered_df['ED Disposition'] = nomortality_filtered_df['ED Disposition'].apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nomortality_filtered_df['Hospital Disposition'] = nomortality_filtered_df['Hospital Disposition'].apply(


Unnamed: 0,PcrKey,ED Disposition,Hospital Disposition,Gender,Age,Race,niss,Severity
355,225614872.0,Lived,Lived,Female,57.0,White,,Unknown
563,225615463.0,Lived,Lived,Male,57.0,Black or African American,,Unknown
579,225615523.0,Lived,Lived,Female,40.0,White,,Unknown
747,225616013.0,Died,Died,Female,79.0,Black or African American,,Unknown
883,225616415.0,Lived,Lived,Female,39.0,Black or African American,,Unknown
...,...,...,...,...,...,...,...,...
54759080,289270752.0,Lived,Lived,Male,32.0,White,,Unknown
54759095,289270768.0,Lived,Lived,Female,76.0,White,,Unknown
54759096,289270769.0,Lived,Lived,Female,64.0,White,,Unknown
54759102,289270775.0,Lived,Lived,Male,78.0,White,,Unknown


In [20]:
# Step 1: Create the 'Final Mortality' column
nomortality_filtered_df['Final Mortality'] = nomortality_filtered_df.apply(
    lambda row: 'Died' if row['ED Disposition'] == 'Died' or row['Hospital Disposition'] == 'Died' else 'Lived',
    axis=1
)

# Display the updated DataFrame to confirm the changes
print(nomortality_filtered_df[['ED Disposition', 'Hospital Disposition', 'Final Mortality']].head())

    ED Disposition Hospital Disposition Final Mortality
355          Lived                Lived           Lived
563          Lived                Lived           Lived
579          Lived                Lived           Lived
747           Died                 Died            Died
883          Lived                Lived           Lived


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nomortality_filtered_df['Final Mortality'] = nomortality_filtered_df.apply(


In [21]:
#Calculating how many entries dropped out at different stages
#filtering out the patients where no NISS score was calculated bc we did not use based on inclusion criteria and the file was too big (ran inclusion criteria before NISS score calculation)
#these are from the population selection file which was filtered from ICD codes, age, destination, transportation, method, injury burden

nan_count = nomortality_filtered_df['niss'].isna().sum()
print(f"Number of NaN entries in the 'niss' column: {nan_count}")

# Step 2: Create a new DataFrame excluding entries with NaN in the 'niss' column
included_df = nomortality_filtered_df[nomortality_filtered_df['niss'].notna()]
included_df

Number of NaN entries in the 'niss' column: 611263


Unnamed: 0,PcrKey,ED Disposition,Hospital Disposition,Gender,Age,Race,niss,Severity,Final Mortality
25323,225786099.0,Lived,Lived,Male,66.0,Black or African American,0.0,Mild/Moderate,Lived
30270,225800981.0,Lived,Lived,Male,30.0,Black or African American,0.0,Mild/Moderate,Lived
55798,225968094.0,Lived,Lived,Male,70.0,White,0.0,Mild/Moderate,Lived
79403,226135057.0,Lived,Lived,Male,20.0,7701003,0.0,Mild/Moderate,Lived
79414,226135240.0,Lived,Lived,Male,41.0,White,0.0,Mild/Moderate,Lived
...,...,...,...,...,...,...,...,...,...
54736162,289247665.0,Lived,Died,Female,64.0,White,0.0,Mild/Moderate,Died
54740098,289251627.0,Lived,Lived,Male,62.0,Hispanic or Latino,0.0,Mild/Moderate,Lived
54741311,289252841.0,Lived,Lived,Female,37.0,Black or African American,0.0,Mild/Moderate,Lived
54748564,289260113.0,Lived,Lived,Male,42.0,Hispanic or Latino,0.0,Mild/Moderate,Lived


In [22]:
#final mortality outcome Step 1: Create the 'Final Mortality' column

morality_race_gender_mapped_df['Final Mortality'] = morality_race_gender_mapped_df.apply(
    lambda row: 'Died' if row['ED Disposition'] == 'Died' or row['Hospital Disposition'] == 'Died' else None,
    axis=1
)

# Display the updated DataFrame to confirm the changes
print(morality_race_gender_mapped_df[['ED Disposition', 'Hospital Disposition', 'Final Mortality']].head())



  ED Disposition Hospital Disposition Final Mortality
0   Not Recorded         Not Recorded            None
1   Not Recorded         Not Recorded            None
2   Not Recorded         Not Recorded            None
3   Not Recorded         Not Recorded            None
4   Not Recorded         Not Recorded            None


In [29]:
included_df.to_csv("/Volumes/Research/GoldenHourData/InclusionCriteria/demographics_with_NISS.csv", index=False)

In [None]:
#checking why tableone stratification throws an error– no more severe cases left due to missing information
unique_severity = included_df['Severity'].unique()
severity_counts = included_df['Severity'].value_counts()
print("Unique Severity Values:", unique_severity)

In [None]:
#table one

categorical_vars = ['Gender', 'Race', 'Severity', 'Final Mortality']  # Replace with your actual categorical variables
continuous_vars = ['Age', 'niss']               # Replace with your actual continuous variables

# Create the Table 1
table_one = TableOne(included_df, 
                     columns=continuous_vars + categorical_vars, 
                     categorical=categorical_vars, 
                     nonnormal=continuous_vars,  # Specify if any continuous variables are non-normal
                     pval=True,
                    groupby= 'Severity')                  # Include p-values

# Display the Table 1
print(table_one)

In [None]:
tableone_age_gender_race_NISS_df.to_csv('/Volumes/Research/GoldenHourData/InclusionCriteria/TableOneData', index=False)