## Importing all the required libraries

In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler

### Reading the training dataset to Pandas DataFrame

In [29]:
data = pd.read_csv('train.csv')
data.head()

Unnamed: 0,Severity,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric,Accident_ID
0,Minor_Damage_And_Injuries,49.223744,14,22,71.285324,0.272118,78.04,2,31335.476824,3,0.424352,7570
1,Minor_Damage_And_Injuries,62.465753,10,27,72.288058,0.423939,84.54,2,26024.711057,2,0.35235,12128
2,Significant_Damage_And_Fatalities,63.059361,13,16,66.362808,0.322604,78.86,7,39269.053927,3,0.003364,2181
3,Significant_Damage_And_Serious_Injuries,48.082192,11,9,74.703737,0.337029,81.79,3,42771.4992,1,0.211728,5946
4,Significant_Damage_And_Fatalities,26.484018,13,25,47.948952,0.54114,77.16,3,35509.228515,2,0.176883,9054


### Getting the target variables to Y variable

In [30]:
Y = data['Severity']
Y.shape

(10000,)

### Dropoing the irrelevent columns from training data

In [31]:
data = data.drop(columns=['Severity','Accident_ID','Accident_Type_Code','Adverse_Weather_Metric'],axis=1)
data.head()

Unnamed: 0,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Max_Elevation,Violations
0,49.223744,14,22,71.285324,0.272118,78.04,31335.476824,3
1,62.465753,10,27,72.288058,0.423939,84.54,26024.711057,2
2,63.059361,13,16,66.362808,0.322604,78.86,39269.053927,3
3,48.082192,11,9,74.703737,0.337029,81.79,42771.4992,1
4,26.484018,13,25,47.948952,0.54114,77.16,35509.228515,2


In [32]:
minmax = MinMaxScaler()

In [33]:
safety_score = data['Safety_Score']
type(safety_score)

pandas.core.series.Series

In [34]:
print(safety_score.shape)
safety_score = safety_score.values.reshape(-1,1)
print(safety_score.shape)

(10000,)
(10000, 1)


In [35]:
safety_score = minmax.fit_transform(safety_score)

In [36]:
data['Safety_Score'] = safety_score
data.head()

Unnamed: 0,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Max_Elevation,Violations
0,0.492237,14,22,71.285324,0.272118,78.04,31335.476824,3
1,0.624658,10,27,72.288058,0.423939,84.54,26024.711057,2
2,0.630594,13,16,66.362808,0.322604,78.86,39269.053927,3
3,0.480822,11,9,74.703737,0.337029,81.79,42771.4992,1
4,0.26484,13,25,47.948952,0.54114,77.16,35509.228515,2


### creating the Label Encoder object which will encode the target severities to numerical form

In [37]:
label_encode = LabelEncoder()
y = label_encode.fit_transform(Y)

In [38]:
x_train,x_test,y_train,y_test = train_test_split(data,y,test_size=0.3)

In [39]:
rfc = RandomForestClassifier()

In [40]:
rfc.fit(data,y)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [41]:
predictions = rfc.predict(x_test)

In [42]:
accuracy_score(y_test,predictions)

0.9993333333333333

In [43]:
test_data = pd.read_csv('test.csv')
accident_id = test_data['Accident_ID']
test_data.columns

Index(['Safety_Score', 'Days_Since_Inspection', 'Total_Safety_Complaints',
       'Control_Metric', 'Turbulence_In_gforces', 'Cabin_Temperature',
       'Accident_Type_Code', 'Max_Elevation', 'Violations',
       'Adverse_Weather_Metric', 'Accident_ID'],
      dtype='object')

In [44]:
print(test_data.shape)
test_data = test_data.drop(columns=['Accident_ID','Accident_Type_Code','Adverse_Weather_Metric'],axis=1)

(2500, 11)


In [45]:
test_data.shape

(2500, 8)

In [46]:
predictions = rfc.predict(test_data)

In [47]:
predictions = label_encode.inverse_transform(predictions)

In [48]:
result_df = pd.DataFrame({'Accident_ID':accident_id,'Severity':predictions})
result_df.head()

Unnamed: 0,Accident_ID,Severity
0,1,Significant_Damage_And_Fatalities
1,10,Significant_Damage_And_Fatalities
2,14,Significant_Damage_And_Fatalities
3,17,Significant_Damage_And_Fatalities
4,21,Significant_Damage_And_Fatalities


In [49]:
result_df.to_csv('Prediction.csv',index=False)