In [11]:
import pandas as pd
import numpy as np
import random
import math
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#https://towardsdatascience.com/logistic-regression-a-simplified-approach-using-python-c4bc81a87c31
#https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html#examples-using-sklearn-linear-model-logisticregression

data = pd.read_csv('processedOneHotData.csv')
data.shape

(9374, 55)

In [19]:
#remove columns that are not important to this analysis
excluded = ['Accident_Index', 'Location_Northing_OSGR', 'Location_Easting_OSGR', 'Local_Authority_(Highway)', 'Local_Authority_(District)', 'LSOA_of_Accident_Location', '1st_Road_Number', '2nd_Road_Number']
targetCol = 'Number_of_Casualties'
excluded.append(targetCol)

In [20]:
#partition data into test an training sets
x = data.drop(columns=excluded)
y = data[targetCol]
x_trn, x_tst, y_trn, y_tst = train_test_split(x, y, test_size = 0.25)

#create regression algorithm
#saga for fast multinomial/multiclass data
model = LogisticRegression(max_iter = 9000, solver = 'saga')
model.fit(x_trn, y_trn)

#Test accuracy
predic = model.predict(x_tst)
accur = accuracy_score(y_tst, predic)
accur

0.882679180887372

In [14]:
# depeding on the number of possible classes/labes the logistic 
# regression creates as many formulas for each possible answer
# when making a prediction it runs all formulas and the one closest 
# to 1 is probably the most appropriate label/class

In [42]:
model.classes_

array([1, 2, 3, 6])

In [43]:
model.coef_

array([[-1.91535635e-03,  2.69761086e-03,  2.18940890e-03,
         1.55554317e-02, -2.20151604e-03, -1.12264732e-02,
        -5.62952690e-03,  1.43514344e-02,  1.46032300e-04,
        -1.69547680e-02, -1.04312439e-01,  3.40699461e-04,
        -3.98945536e-02, -1.59433270e-03, -3.19669600e-03,
         3.81213806e-04,  1.09390325e-03,  4.26783750e-03,
        -4.66866122e-05, -1.19324958e-04],
       [ 1.49232857e-03, -5.75915062e-03, -1.80149240e-04,
        -3.64341389e-03, -8.31431860e-04,  4.93297976e-03,
        -1.88123911e-02, -1.34239683e-02, -1.95916349e-04,
        -9.31127959e-03,  2.45230883e-02, -4.51327178e-05,
         1.49355343e-02,  5.33782390e-04,  6.84674097e-04,
         1.51636627e-03, -5.20574028e-04, -2.60885127e-04,
        -2.24715487e-04,  3.33185657e-05],
       [ 1.16176854e-03, -9.62450182e-02, -7.23562921e-03,
         7.64972808e-03,  2.06508992e-03, -1.43941695e-02,
        -6.80797572e-03, -6.83936210e-02,  4.34143503e-05,
         1.62273541e-01,  1.9

In [44]:
model.intercept_

array([ 4.73903189e-05, -1.09692254e-04, -1.88118120e-03,  1.94348314e-03])