In [2]:
import joblib

# Data Processing
import pandas as pd
import numpy as np

# Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint
import matplotlib.pyplot as plt

# Tree Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
import graphviz
from sklearn.metrics import f1_score
from sklearn import tree

In [3]:
rcf=joblib.load('randomfc.pkl')

In [4]:
df=pd.read_csv('mumbai_accidental_zones.csv')

In [5]:
df

Unnamed: 0,Latitude,Longitude,Location Name,Number of Accidents,Number of Casualties
0,19.070492,72.89568,Colaba,64,9.0
1,19.139944,72.821071,Dadar,33,1.0
2,19.074142,72.834193,Andheri,59,65.09
3,19.134322,72.825798,Bandra,12,17.49
4,19.120213,72.890733,Chembur,87,103.14
5,19.131142,72.844253,Goregaon,37,47.93
6,19.060648,72.84899,Kurla,13,19.31
7,19.058694,72.834087,Malad,16,18.48
8,18.998331,72.851004,Mulund,91,114.13
9,19.038346,72.862792,Santacruz,51,69.08


In [6]:
df.rename(columns={'Number of Casualties':'No. of Fatalities'}, inplace="True")

In [7]:
df.rename(columns={'Number of Accidents':'No. of Accident'}, inplace="True")

In [8]:
df

Unnamed: 0,Latitude,Longitude,Location Name,No. of Accident,No. of Fatalities
0,19.070492,72.89568,Colaba,64,9.0
1,19.139944,72.821071,Dadar,33,1.0
2,19.074142,72.834193,Andheri,59,65.09
3,19.134322,72.825798,Bandra,12,17.49
4,19.120213,72.890733,Chembur,87,103.14
5,19.131142,72.844253,Goregaon,37,47.93
6,19.060648,72.84899,Kurla,13,19.31
7,19.058694,72.834087,Malad,16,18.48
8,18.998331,72.851004,Mulund,91,114.13
9,19.038346,72.862792,Santacruz,51,69.08


In [9]:
X=df[["No. of Accident","No. of Fatalities"]]

In [10]:
y_pred=rcf.predict(X)

In [11]:
y_pred

array([2., 1., 3., 1., 3., 3., 2., 2., 3., 3., 3., 2., 2., 1., 3., 2., 3.,
       1., 3., 1., 3., 2., 3., 3., 3., 3., 3., 2., 3., 3., 2., 2., 2., 2.,
       3., 1., 3., 2., 2., 1., 1., 2., 2., 2., 3., 1., 1., 2., 3., 1.])

In [12]:
df['Severity']=y_pred

In [13]:
df.to_csv('mumbai_severity.csv', index=False)

In [14]:
group1 = df[(df['Severity'] <= 1) & (df['Severity'] < 2)]
group2 = df[(df['Severity'] >= 2) & (df['Severity'] <3)]
group3 = df[df['Severity']>=3]

In [15]:
group1

Unnamed: 0,Latitude,Longitude,Location Name,No. of Accident,No. of Fatalities,Severity
1,19.139944,72.821071,Dadar,33,1.0,1.0
3,19.134322,72.825798,Bandra,12,17.49,1.0
13,19.056077,72.834701,Thane,11,13.0,1.0
17,19.06378,72.833042,Juhu,36,4.0,1.0
19,19.051755,72.921762,Parel,6,6.87,1.0
35,19.144412,72.928342,Byculla,9,6.0,1.0
39,19.133102,72.856777,Kanjurmarg,17,6.0,1.0
40,19.12389,72.897601,Kokilaben Hospital,72,1.0,1.0
45,19.040686,72.841116,Charkop,99,4.0,1.0
46,18.988788,72.833651,Malad West,30,2.0,1.0


In [16]:
group2

Unnamed: 0,Latitude,Longitude,Location Name,No. of Accident,No. of Fatalities,Severity
0,19.070492,72.89568,Colaba,64,9.0,2.0
6,19.060648,72.84899,Kurla,13,19.31,2.0
7,19.058694,72.834087,Malad,16,18.48,2.0
11,19.08573,72.848059,Versova,72,14.0,2.0
12,19.006215,72.83089,Worli,86,8.0,2.0
15,19.067791,72.888724,Borivali,70,14.0,2.0
21,19.069659,72.839627,Bhayandar,13,18.01,2.0
27,19.056425,72.938067,Kharghar,93,9.0,2.0
30,19.165554,72.855615,Khar,46,11.0,2.0
31,19.079245,72.867174,Marine Lines,78,12.0,2.0


In [17]:
group3

Unnamed: 0,Latitude,Longitude,Location Name,No. of Accident,No. of Fatalities,Severity
2,19.074142,72.834193,Andheri,59,65.09,3.0
4,19.120213,72.890733,Chembur,87,103.14,3.0
5,19.131142,72.844253,Goregaon,37,47.93,3.0
8,18.998331,72.851004,Mulund,91,114.13,3.0
9,19.038346,72.862792,Santacruz,51,69.08,3.0
10,19.048558,72.863947,Vashi,21,31.47,3.0
14,19.059664,72.828893,Powai,92,127.77,3.0
16,19.039617,72.845415,Navi Mumbai,92,117.46,3.0
18,19.053597,72.923719,Lower Parel,87,118.19,3.0
20,19.074319,72.870999,Vile Parle,50,61.5,3.0


In [18]:
group1.to_csv('mumbai_severity1.csv', index=False)

In [19]:
group2.to_csv('mumbai_severity2.csv', index=False)

In [20]:
group3.to_csv('mumbai_severity3.csv', index=False)

In [21]:
df

Unnamed: 0,Latitude,Longitude,Location Name,No. of Accident,No. of Fatalities,Severity
0,19.070492,72.89568,Colaba,64,9.0,2.0
1,19.139944,72.821071,Dadar,33,1.0,1.0
2,19.074142,72.834193,Andheri,59,65.09,3.0
3,19.134322,72.825798,Bandra,12,17.49,1.0
4,19.120213,72.890733,Chembur,87,103.14,3.0
5,19.131142,72.844253,Goregaon,37,47.93,3.0
6,19.060648,72.84899,Kurla,13,19.31,2.0
7,19.058694,72.834087,Malad,16,18.48,2.0
8,18.998331,72.851004,Mulund,91,114.13,3.0
9,19.038346,72.862792,Santacruz,51,69.08,3.0
