## Importing all the required libraries

In [51]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

### Reading the training dataset to Pandas DataFrame

In [52]:
data = pd.read_csv('train.csv')
data.head()

Unnamed: 0,Severity,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric,Accident_ID
0,Minor_Damage_And_Injuries,49.223744,14,22,71.285324,0.272118,78.04,2,31335.476824,3,0.424352,7570
1,Minor_Damage_And_Injuries,62.465753,10,27,72.288058,0.423939,84.54,2,26024.711057,2,0.35235,12128
2,Significant_Damage_And_Fatalities,63.059361,13,16,66.362808,0.322604,78.86,7,39269.053927,3,0.003364,2181
3,Significant_Damage_And_Serious_Injuries,48.082192,11,9,74.703737,0.337029,81.79,3,42771.4992,1,0.211728,5946
4,Significant_Damage_And_Fatalities,26.484018,13,25,47.948952,0.54114,77.16,3,35509.228515,2,0.176883,9054


### Getting the target variables to Y variable

In [53]:
Y = data['Severity']
Y.shape

(10000,)

### Dropoing the irrelevent columns from training data

In [54]:
data = data.drop(columns=['Severity','Accident_ID'],axis=1)
data.head()

Unnamed: 0,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric
0,49.223744,14,22,71.285324,0.272118,78.04,2,31335.476824,3,0.424352
1,62.465753,10,27,72.288058,0.423939,84.54,2,26024.711057,2,0.35235
2,63.059361,13,16,66.362808,0.322604,78.86,7,39269.053927,3,0.003364
3,48.082192,11,9,74.703737,0.337029,81.79,3,42771.4992,1,0.211728
4,26.484018,13,25,47.948952,0.54114,77.16,3,35509.228515,2,0.176883


### creating the Label Encoder object which will encode the target severities to numerical form

In [55]:
label_encode = LabelEncoder()
y = label_encode.fit_transform(Y)

In [56]:
y.shape

(10000,)

### split the dataset for training and testing purpose

In [57]:
x_train,x_test,y_train,y_test = train_test_split(data,y,test_size = 0.3)

In [58]:
x_train

Unnamed: 0,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric
9985,42.968037,9,0,34.867821,0.619395,80.12,4,32628.399491,2,0.060404
6357,36.073059,11,6,74.293528,0.283657,81.27,4,29518.331528,2,0.052887
2950,46.484018,11,1,72.333637,0.416726,78.75,3,28145.040920,1,0.139945
4953,33.789954,11,0,70.145852,0.291230,79.14,4,27619.706420,1,0.050652
7901,41.689498,16,4,87.101185,0.346405,77.13,2,40744.703022,3,0.549198
...,...,...,...,...,...,...,...,...,...,...
3763,25.890411,14,2,49.179581,0.402662,77.39,6,26744.866799,2,0.006528
731,51.735160,13,15,41.613491,0.485605,86.59,3,18083.049539,5,0.090817
2586,48.675799,7,2,67.639015,0.344963,82.11,4,32342.313384,1,0.058872
6720,72.009132,11,7,75.797630,0.272118,78.16,7,36485.430053,3,0.003265


### Using OneVsRestClassifier which will take linear regression as argument

In [59]:
clf = LinearRegression()
onevsrest = OneVsRestClassifier(clf)

In [60]:
results = onevsrest.fit(data,y)

In [61]:
predict = onevsrest.predict(x_test)

In [62]:
predict.shape

(3000,)

In [63]:
y_test.shape

(3000,)

In [64]:
df = pd.DataFrame({'Actual':y_test.flatten(),'Predicted':predict.flatten()})

In [65]:
accuracy_score(y_test,predict)

0.537

## Reading test dataset

In [66]:
test_data = pd.read_csv('test.csv')

In [67]:
activity_id = test_data['Accident_ID']

### Dropping the id column which is not required in testing process

In [68]:
test_data = test_data.drop(columns=['Accident_ID'],axis=1)
test_data.head()

Unnamed: 0,Safety_Score,Days_Since_Inspection,Total_Safety_Complaints,Control_Metric,Turbulence_In_gforces,Cabin_Temperature,Accident_Type_Code,Max_Elevation,Violations,Adverse_Weather_Metric
0,19.497717,16,6,72.151322,0.388959,78.32,4,37949.724386,2,0.069692
1,58.173516,15,3,64.585232,0.250841,78.6,7,30194.805567,2,0.002777
2,33.287671,15,3,64.721969,0.336669,86.96,6,17572.925484,1,0.004316
3,3.287671,21,5,66.362808,0.421775,80.86,3,40209.186341,2,0.19999
4,10.86758,18,2,56.107566,0.313228,79.22,2,35495.525408,2,0.483696


In [69]:
test_results = onevsrest.predict(test_data)

In [70]:
test_categories = label_encode.inverse_transform(test_results)

In [71]:
test_categories

array(['Highly_Fatal_And_Damaging', 'Significant_Damage_And_Fatalities',
       'Highly_Fatal_And_Damaging', ...,
       'Significant_Damage_And_Serious_Injuries',
       'Highly_Fatal_And_Damaging', 'Highly_Fatal_And_Damaging'],
      dtype=object)

In [72]:
result_df = pd.DataFrame({'Accident_ID':activity_id,'Severity':test_categories})
result_df

Unnamed: 0,Accident_ID,Severity
0,1,Highly_Fatal_And_Damaging
1,10,Significant_Damage_And_Fatalities
2,14,Highly_Fatal_And_Damaging
3,17,Highly_Fatal_And_Damaging
4,21,Highly_Fatal_And_Damaging
...,...,...
2495,12484,Minor_Damage_And_Injuries
2496,12487,Significant_Damage_And_Serious_Injuries
2497,12488,Significant_Damage_And_Serious_Injuries
2498,12491,Highly_Fatal_And_Damaging


In [73]:
result_df.to_csv('Prediction.csv',index=False)