In [151]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, LogisticRegression
import matplotlib.pyplot as plt
import pickle
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sn
from sklearn import datasets
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
import cv2
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from scipy.stats import randint

df = pd.read_csv("train.csv")
df.head()

Unnamed: 0,Severity,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric,Accident_ID
0,Minor_Damage_And_Injuries,49.223744,14,22,71.285324,0.272118,78.04,2,31335.476824,3,0.424352,7570
1,Minor_Damage_And_Injuries,62.465753,10,27,72.288058,0.423939,84.54,2,26024.711057,2,0.35235,12128
2,Significant_Damage_And_Fatalities,63.059361,13,16,66.362808,0.322604,78.86,7,39269.053927,3,0.003364,2181
3,Significant_Damage_And_Serious_Injuries,48.082192,11,9,74.703737,0.337029,81.79,3,42771.4992,1,0.211728,5946
4,Significant_Damage_And_Fatalities,26.484018,13,25,47.948952,0.54114,77.16,3,35509.228515,2,0.176883,9054


In [152]:
df = df.drop(["Accident_ID"], axis="columns")
df.head()

Unnamed: 0,Severity,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric
0,Minor_Damage_And_Injuries,49.223744,14,22,71.285324,0.272118,78.04,2,31335.476824,3,0.424352
1,Minor_Damage_And_Injuries,62.465753,10,27,72.288058,0.423939,84.54,2,26024.711057,2,0.35235
2,Significant_Damage_And_Fatalities,63.059361,13,16,66.362808,0.322604,78.86,7,39269.053927,3,0.003364
3,Significant_Damage_And_Serious_Injuries,48.082192,11,9,74.703737,0.337029,81.79,3,42771.4992,1,0.211728
4,Significant_Damage_And_Fatalities,26.484018,13,25,47.948952,0.54114,77.16,3,35509.228515,2,0.176883


In [153]:
severity = ['Minor_Damage_And_Injuries', 'Significant_Damage_And_Fatalities', 'Significant_Damage_And_Serious_Injuries', 'Highly_Fatal_And_Damaging']

cnt = 0
severity_index = {}

for i in severity:
    severity_index[i] = cnt
    cnt += 1

In [154]:
y = df["Severity"].apply(lambda x: severity_index[x])
y.head()

0    0
1    0
2    1
3    2
4    1
Name: Severity, dtype: int64

In [155]:
X = df.drop(["Severity"], axis="columns")
X.head()

Unnamed: 0,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric
0,49.223744,14,22,71.285324,0.272118,78.04,2,31335.476824,3,0.424352
1,62.465753,10,27,72.288058,0.423939,84.54,2,26024.711057,2,0.35235
2,63.059361,13,16,66.362808,0.322604,78.86,7,39269.053927,3,0.003364
3,48.082192,11,9,74.703737,0.337029,81.79,3,42771.4992,1,0.211728
4,26.484018,13,25,47.948952,0.54114,77.16,3,35509.228515,2,0.176883


In [156]:
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

In [157]:
model = RandomForestClassifier(criterion='entropy', max_depth=15, max_features=7, min_samples_leaf=3, min_samples_split=2, n_estimators=22)
model.fit(x_train, y_train)
model.score(x_test, y_test)

0.9435

In [158]:
test = pd.read_csv("test.csv")
result = pd.DataFrame()
test.head()

Unnamed: 0,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric,Accident_ID
0,19.497717,16,6,72.151322,0.388959,78.32,4,37949.724386,2,0.069692,1
1,58.173516,15,3,64.585232,0.250841,78.6,7,30194.805567,2,0.002777,10
2,33.287671,15,3,64.721969,0.336669,86.96,6,17572.925484,1,0.004316,14
3,3.287671,21,5,66.362808,0.421775,80.86,3,40209.186341,2,0.19999,17
4,10.86758,18,2,56.107566,0.313228,79.22,2,35495.525408,2,0.483696,21


In [159]:
result["Accident_ID"] = test["Accident_ID"]
test = test.drop(["Accident_ID"], axis="columns")
result.isna().sum()

Accident_ID    0
dtype: int64

In [162]:
severity_index

{'Minor_Damage_And_Injuries': 0,
 'Significant_Damage_And_Fatalities': 1,
 'Significant_Damage_And_Serious_Injuries': 2,
 'Highly_Fatal_And_Damaging': 3}

In [164]:
result["Severity"] = model.predict(test)
# result["Severity"] = result["Severity"]
result["Severity"] = result["Severity"].apply(lambda x: severity[x])
result.head()

Unnamed: 0,Accident_ID,Severity
0,1,Highly_Fatal_And_Damaging
1,10,Significant_Damage_And_Fatalities
2,14,Significant_Damage_And_Serious_Injuries
3,17,Highly_Fatal_And_Damaging
4,21,Significant_Damage_And_Fatalities


In [165]:
result.to_csv("submission.csv", index=False)