# Predicting Location of Accidents

## Imports

In [1]:
import pandas as pd
import numpy as np
import sklearn 
from scipy import stats
import datetime as dt
#visualization packages
import matplotlib
import matplotlib.dates as md
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.figure_factory as ff
import plotly
from sklearn import preprocessing 
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

## Load Data

In [None]:
df = pd.read_csv('data/df_with_feature_engineering.csv')

## Classification Model

In [None]:
# remove all rows where fips is na
df2 = df.dropna(subset=['fips'])
X = df2[['Severity','Distance(mi)','Temperature(F)','Wind_Chill(F)', 'Humidity(%)', 'Pressure(in)',
       'Visibility(mi)', 'Wind_Direction', 'Wind_Speed(mph)',
       'Precipitation(in)', 'Weather_Condition', 'Amenity', 'Bump', 'Crossing',
       'Give_Way', 'Junction', 'No_Exit', 'Railway', 'Roundabout', 'Station',
       'Stop', 'Traffic_Calming', 'Traffic_Signal', 'Turning_Loop',
       'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight','Astronomical_Twilight','Hour','Duration_mins']]
y = df2['fips']
# change type from object to int
y = y.astype(int)

In [None]:
# need to label encode object type variables
cat_vars = X.select_dtypes('object').astype(str).apply(preprocessing.LabelEncoder().fit_transform)

In [None]:
# remove the object vars so we can concatenate both pieces
X2 = X.select_dtypes(exclude=['object'])

In [None]:
X = pd.concat([X2,cat_vars],axis=1)
X.head()

In [None]:
# impute na by mean
X = X.fillna(X.mean())

In [None]:
# since we have a few million rows of data we can train a bigger proportion of it
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [None]:
# Create a Classifier
clf=RandomForestClassifier(n_estimators=100)

# Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X_train,y_train)

y_pred=clf.predict(X_test)

In [None]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))