## Project: Estimation of accuracy of MOI for MVCs 
Created by: Thomas Hartka, MD, MSDS  
Date created: 12/14/21  
  
This notebook combined the data from NASS and CISS. 

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
import itertools

## Read in NASS and CISS data

In [2]:
nass = pd.read_csv("../Data/NASS/NASS-2000_2015-unfiltered.csv")
ciss = pd.read_csv("../Data/CISS/CISS-2017_2019-unfiltered.csv")

# set year
nass['dataset'] = "NASS"
ciss['dataset'] = "CISS"

## Combine datasets

In [3]:
# combine years
nass_ciss = nass.append(ciss).reset_index(drop=True)

In [4]:
# number of cases
print("Total cases: ", len(nass_ciss))
print("Total cases (weighted): ", nass_ciss.casewgt.sum())

# number of injury cases
print("ISS>=16: ", len(nass_ciss[nass_ciss.iss>=16]))
print("ISS>=16 (weighted): ", nass_ciss[nass_ciss.iss>=16].casewgt.sum())

# number of non-injury cases
print("ISS<16: ", len(nass_ciss[nass_ciss.iss<16]))
print("ISS<16 (weighted): ", nass_ciss[nass_ciss.iss<16].casewgt.sum())

Total cases:  150683
Total cases (weighted):  73423188.70586148
ISS>=16:  15335
ISS>=16 (weighted):  1878458.3051364317
ISS<16:  135348
ISS<16 (weighted):  71544730.40072504


## Make sex binary (male=0, female=1)

In [5]:
nass_ciss['sex'] = nass_ciss.apply(lambda x: 1 if (x['sex']>=2) else x['sex']-1, axis=1)

## Make variable for front row (versus all other rows)

In [6]:
nass_ciss['front_row'] = nass_ciss.apply(lambda x: 0 if (x['seat_row']>=2) else 1, axis=1)

## Add outcome flags 

In [7]:
# AIS 2+ 
nass_ciss['mais_head2'] = nass_ciss.apply(lambda x: 1 if (x['mais_head']>=2) else 0, axis=1)
nass_ciss['mais_thorax2'] = nass_ciss.apply(lambda x: 1 if (x['mais_thorax']>=2) else 0, axis=1)
nass_ciss['mais_abd2'] = nass_ciss.apply(lambda x: 1 if (x['mais_abd']>=2) else 0, axis=1)
nass_ciss['mais2'] = nass_ciss.apply(lambda x: 1 if ((x['mais_head']>=2)|(x['mais_thorax']>=2)|(x['mais_abd']>=2)) else 0, axis=1)

# AIS 3+ 
nass_ciss['mais_head3'] = nass_ciss.apply(lambda x: 1 if (x['mais_head']>=3) else 0, axis=1)
nass_ciss['mais_thorax3'] = nass_ciss.apply(lambda x: 1 if (x['mais_thorax']>=3) else 0, axis=1)
nass_ciss['mais_abd3'] = nass_ciss.apply(lambda x: 1 if (x['mais_abd']>=3) else 0, axis=1)
nass_ciss['mais3'] = nass_ciss.apply(lambda x: 1 if ((x['mais_head']>=3)|(x['mais_thorax']>=3)|(x['mais_abd']>=3)) else 0, axis=1)

nass_ciss['iss24' ] = nass_ciss.apply(lambda x: 1 if (x['iss']>=24) else 0, axis=1)

## Select columns needed for analysis

In [8]:
variables = ['age','prop_restraint','any_restraint','abdeply','dvtotal',
             'splimit','multicoll','pdof_nearside','rolled', 'roll_turns',
             'int18','int12occ','ejection','other_death','entrapment',
             'casewgt','dataset','died','mais','mais3','year','iss16']

In [9]:
nass_ciss = nass_ciss[variables]

## Store data

In [10]:
nass_ciss.to_csv("../Data/NASS_CISS-2000_2019-unfiltered.csv", index=False)