## Importing all the required libraries

In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier,ExtraTreeClassifier
from sklearn.multiclass import OneVsRestClassifier,OneVsOneClassifier 
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,BaggingClassifier,GradientBoostingClassifier,VotingClassifier
from xgboost import XGBClassifier

### Reading the training dataset to Pandas DataFrame

In [2]:
data = pd.read_csv('train.csv')
data.head()

Unnamed: 0,Severity,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric,Accident_ID
0,Minor_Damage_And_Injuries,49.223744,14,22,71.285324,0.272118,78.04,2,31335.476824,3,0.424352,7570
1,Minor_Damage_And_Injuries,62.465753,10,27,72.288058,0.423939,84.54,2,26024.711057,2,0.35235,12128
2,Significant_Damage_And_Fatalities,63.059361,13,16,66.362808,0.322604,78.86,7,39269.053927,3,0.003364,2181
3,Significant_Damage_And_Serious_Injuries,48.082192,11,9,74.703737,0.337029,81.79,3,42771.4992,1,0.211728,5946
4,Significant_Damage_And_Fatalities,26.484018,13,25,47.948952,0.54114,77.16,3,35509.228515,2,0.176883,9054


### Getting the target variables to Y variable

In [3]:
Y = data['Severity']
Y.shape

(10000,)

### Dropoing the irrelevent columns from training data

In [4]:
data = data.drop(columns=['Severity','Accident_ID','Accident_Type_Code','Adverse_Weather_Metric'],axis=1)
data.head()

Unnamed: 0,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Max_Elevation,Violations
0,49.223744,14,22,71.285324,0.272118,78.04,31335.476824,3
1,62.465753,10,27,72.288058,0.423939,84.54,26024.711057,2
2,63.059361,13,16,66.362808,0.322604,78.86,39269.053927,3
3,48.082192,11,9,74.703737,0.337029,81.79,42771.4992,1
4,26.484018,13,25,47.948952,0.54114,77.16,35509.228515,2


### creating the Label Encoder object which will encode the target severities to numerical form

In [5]:
label_encode = LabelEncoder()
y = label_encode.fit_transform(Y)

In [6]:
y.shape

(10000,)

### split the dataset for training and testing purpose

In [7]:
x_train,x_test,y_train,y_test = train_test_split(data,y,test_size = 0.3)

In [8]:
dtc = DecisionTreeClassifier()
one = OneVsOneClassifier(dtc)
one.fit(x_train,y_train)
predictions = one.predict(x_test)
acc = accuracy_score(y_test,predictions)
print('OneVsOneClassifier (DecisionTreeClassifier) -- ',acc)

OneVsOneClassifier (DecisionTreeClassifier) --  0.93


In [9]:
dtc = DecisionTreeClassifier()
onevsrest = OneVsRestClassifier(dtc)
onevsrest.fit(x_train,y_train)
predictions = onevsrest.predict(x_test)
acc = accuracy_score(y_test,predictions)
print('OneVsRestClassifier (DecisionTreeClassifier) -- ',acc)

OneVsRestClassifier (DecisionTreeClassifier) --  0.9183333333333333


In [10]:
ada = AdaBoostClassifier()
ada.fit(x_train,y_train)
predictions = ada.predict(x_test)
acc = accuracy_score(y_test,predictions)
print('AdaBoostClassifier -- ',acc)

AdaBoostClassifier --  0.451


In [11]:
dtc  = DecisionTreeClassifier()
dtc.fit(x_train,y_train)
predictions = dtc.predict(x_test)
acc = accuracy_score(y_test,predictions)
print('DecisionTreeClassifier -- ',acc)

DecisionTreeClassifier --  0.9346666666666666


In [12]:
etc  = ExtraTreeClassifier()
etc.fit(x_train,y_train)
predictions = etc.predict(x_test)
acc = accuracy_score(y_test,predictions)
print('ExtraTreeClassifier -- ',acc)

ExtraTreeClassifier --  0.601


In [13]:
rfc  = RandomForestClassifier()
rfc.fit(x_train,y_train)
predictions = rfc.predict(x_test)
acc = accuracy_score(y_test,predictions)
print('RandomForestClassifier -- ',acc)

RandomForestClassifier --  0.907




In [44]:
bag = BaggingClassifier()
bag.fit(x_train,y_train)
predictions = bag.predict(x_test)
acc = accuracy_score(y_test,predictions)
print('BaggingClassifier -- ',acc)

BaggingClassifier --  0.95


In [15]:
gbc = GradientBoostingClassifier()
gbc.fit(x_train,y_train)
predictions = gbc.predict(x_test)
acc = accuracy_score(y_test,predictions)
print('GradientBoostingClassifier -- ',acc)

GradientBoostingClassifier --  0.8973333333333333


In [33]:
bag = BaggingClassifier()
one = OneVsOneClassifier(bag)
one.fit(x_train,y_train)
predictions = one.predict(x_test)
acc = accuracy_score(y_test,predictions)
print('OneVsOneClassifier (BaggingClassifier) -- ',acc)

OneVsOneClassifier (BaggingClassifier) --  0.9453333333333334


In [43]:
xgb = XGBClassifier()
xgb.fit(x_train,y_train)
predictions = xgb.predict(x_test)
acc = accuracy_score(y_test,prediction)
print('XGBClassifier -- ',acc)

XGBClassifier --  0.8613333333333333


ValueError: Classification metrics can't handle a mix of continuous-multioutput and multiclass targets

In [None]:
test_data = pd.read_csv('test.csv')

In [None]:
activity_id = test_data['Accident_ID']
test_data = test_data.drop(columns=['Accident_ID','Accident_Type_Code'])

In [None]:
new_pred = dtc.predict(test_data)

In [None]:
test_categories = label_encode.inverse_transform(new_pred)

In [None]:
result_df = pd.DataFrame({'Accident_ID':activity_id,'Severity':test_categories})

In [None]:
result_df.to_csv('Prediction.csv',index=False)

### Accuracy - 83.65