# Task for Today  

***

## Automobile Accident Severity Prediction  

Given *data about accidents in the US*, let's try to predict the **severity** of a given accident.  
  
We will use a TensorFlow ANN to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [None]:
data = pd.read_csv('../input/us-accidents/US_Accidents_June20.csv', nrows=400000)

In [None]:
data

In [None]:
data.info()

# Missing Values

In [None]:
data.isna().mean()

In [None]:
null_columns = ['End_Lat', 'End_Lng', 'Number', 'Wind_Chill(F)', 'Precipitation(in)']

data = data.drop(null_columns, axis=1)

In [None]:
data.isna().sum()

In [None]:
data = data.dropna(axis=0).reset_index(drop=True)

In [None]:
print("Total missing values:", data.isna().sum().sum())

In [None]:
data

# Unnecessary Columns

In [None]:
{column: len(data[column].unique()) for column in data.columns if data.dtypes[column] == 'object'}

In [None]:
unneeded_columns = ['ID', 'Description', 'Street', 'City', 'Zipcode', 'Country']

data = data.drop(unneeded_columns, axis=1)

In [None]:
data

In [None]:
def get_years(df, column):
    return df[column].apply(lambda date: date[0:4])

def get_months(df, column):
    return df[column].apply(lambda date: date[5:7])

In [None]:
data['Start_Time_Month'] = get_months(data, 'Start_Time')
data['Start_Time_Year'] = get_years(data, 'Start_Time')

data['End_Time_Month'] = get_months(data, 'End_Time')
data['End_Time_Year'] = get_years(data, 'End_Time')

data['Weather_Timestamp_Month'] = get_months(data, 'Weather_Timestamp')
data['Weather_Timestamp_Year'] = get_years(data, 'Weather_Timestamp')


data = data.drop(['Start_Time', 'End_Time', 'Weather_Timestamp'], axis=1)

In [None]:
data

# Encoding

In [None]:
def onehot_encode(df, columns, prefixes):
    df = df.copy()
    for column, prefix in zip(columns, prefixes):
        dummies = pd.get_dummies(df[column], prefix=prefix)
        df = pd.concat([df, dummies], axis=1)
        df = df.drop(column, axis=1)
    return df

In [None]:
{column: len(data[column].unique()) for column in data.columns if data.dtypes[column] == 'object'}

In [None]:
data = onehot_encode(
    data,
    columns=['Side', 'County', 'State', 'Timezone', 'Airport_Code', 'Wind_Direction', 'Weather_Condition'],
    prefixes=['SI', 'CO', 'ST', 'TZ', 'AC', 'WD', 'WC']
)

In [None]:
data

In [None]:
def get_binary_column(df, column):
    if column == 'Source':
        return df[column].apply(lambda x: 1 if x == 'MapQuest' else 0)
    else:
        return df[column].apply(lambda x: 1 if x == 'Day' else 0)

In [None]:
data['Source'] = get_binary_column(data, 'Source')

data['Sunrise_Sunset'] = get_binary_column(data, 'Sunrise_Sunset')
data['Civil_Twilight'] = get_binary_column(data, 'Civil_Twilight')
data['Nautical_Twilight'] = get_binary_column(data, 'Nautical_Twilight')
data['Astronomical_Twilight'] = get_binary_column(data, 'Astronomical_Twilight')

In [None]:
data

# Splitting/Scaling

In [None]:
y = data['Severity'].copy()
X = data.drop('Severity', axis=1).copy()

In [None]:
y.unique()

In [None]:
y = y - 1

In [None]:
X = X.astype(np.float)

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=100)

# Training

In [None]:
X.shape

In [None]:
inputs = tf.keras.Input(shape=(X.shape[1],))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(4, activation='softmax')(x)

model = tf.keras.Model(inputs, outputs)

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

batch_size = 32
epochs = 20

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[
        tf.keras.callbacks.ReduceLROnPlateau(),
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )
    ]
)

# Results

In [None]:
print("Test Accuracy:", model.evaluate(X_test, y_test, verbose=0)[1])

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/hB6Wx7HX0c4