In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Link to dataset:**

In [None]:
import pandas as pd
import sklearn as sk
import numpy as np

In [None]:
path="/content/drive/MyDrive/Accidents/Traffic_Crashes_-_Crashes.csv"
data = pd.read_csv(path)

In [None]:
df= data[['LATITUDE','LONGITUDE','WEATHER_CONDITION','LIGHTING_CONDITION','ROADWAY_SURFACE_COND','ROAD_DEFECT','TRAFFIC_CONTROL_DEVICE','POSTED_SPEED_LIMIT','MOST_SEVERE_INJURY']]

In [None]:
df = df.dropna()

Selecting the environmental attributes from the dataset

For each attribute considered, 
we logically fuse similar values and remove the rows whose values are ambiguous 




In [None]:
# ROAD_DEFECT 
df.ROAD_DEFECT.unique()

array(['NO DEFECTS', 'UNKNOWN', 'RUT, HOLES', 'SHOULDER DEFECT', 'OTHER',
       'WORN SURFACE', 'DEBRIS ON ROADWAY'], dtype=object)

In [None]:
# A rut is a depression or groove worn into a road or path by the travel of wheels
# We fuse 'rut,holes' and 'worn' to reduce number of attributes

df = df[df.ROADWAY_SURFACE_COND !='UNKNOWN']
df = df[df.ROADWAY_SURFACE_COND !='OTHER']
df.loc[df.ROADWAY_SURFACE_COND == 'RUT, HOLES','ROADWAY_SURFACE_COND'] = 'WORN SURFACE'

In [None]:
df.ROADWAY_SURFACE_COND.unique()

array(['DRY', 'WET', 'SNOW OR SLUSH', 'ICE', 'SAND, MUD, DIRT'],
      dtype=object)

In [None]:
# ROADWAY_SURFACE_CONDITION
df = df[df.ROADWAY_SURFACE_COND !='UNKNOWN']
df = df[df.ROADWAY_SURFACE_COND !='OTHER']

In [None]:
# WEATHER_CONDITION
df.WEATHER_CONDITION.unique()

array(['CLEAR', 'RAIN', 'CLOUDY/OVERCAST', 'UNKNOWN', 'SNOW',
       'SLEET/HAIL', 'FOG/SMOKE/HAZE', 'FREEZING RAIN/DRIZZLE', 'OTHER',
       'BLOWING SNOW', 'BLOWING SAND, SOIL, DIRT',
       'SEVERE CROSS WIND GATE'], dtype=object)

In [None]:
# Fuse all 'freezing conditions/cold' attributes
# Fuse all 'obstructing wind' attributes
df.loc[df.WEATHER_CONDITION.isin(['SLEET/HAIL','FREEZING RAIN/DRIZZLE']),'WEATHER_CONDITION'] = 'SNOW'
df.loc[df.WEATHER_CONDITION == 'BLOWING SNOW','WEATHER_CONDITION'] = 'BLOWING SAND, SOIL, DIRT'
df.loc[df.WEATHER_CONDITION == 'BLOWING SAND, SOIL, DIRT','WEATHER_CONDITION'] = 'BLOWING SAND, SOIL, DIRT'
df = df[df.WEATHER_CONDITION!='UNKNOWN']
df = df[df.WEATHER_CONDITION !='OTHER']

In [None]:
# LIGHTING_CONDITION
df.LIGHTING_CONDITION.unique()

array(['DAYLIGHT', 'DARKNESS, LIGHTED ROAD', 'DAWN', 'DARKNESS', 'DUSK',
       'UNKNOWN'], dtype=object)

In [None]:
df = df[df.LIGHTING_CONDITION !='UNKNOWN']
df = df[df.LIGHTING_CONDITION !='OTHER']

In [None]:
# TRAFFIC_CONTROL_DEVICE
df.TRAFFIC_CONTROL_DEVICE.unique()
# Fuse all sign boards
# Fuse all traffic lights and flashing signals into Traffic Light Signal

array(['STOP SIGN/FLASHER', 'TRAFFIC SIGNAL', 'NO CONTROLS',
       'PEDESTRIAN CROSSING SIGN', 'OTHER', 'UNKNOWN', 'YIELD',
       'OTHER REG. SIGN', 'LANE USE MARKING', 'POLICE/FLAGMAN',
       'RAILROAD CROSSING GATE', 'SCHOOL ZONE', 'OTHER RAILROAD CROSSING',
       'NO PASSING', 'RR CROSSING SIGN', 'BICYCLE CROSSING SIGN'],
      dtype=object)

In [None]:
df.loc[df.TRAFFIC_CONTROL_DEVICE.isin(['STOP SIGN/FLASHER','TRAFFIC SIGNAL','FLASHING CONTROL SIGNAL']),'TRAFFIC_CONTROL_DEVICE'] = 'TRAFFIC LIGHT SIGNAL'
df.loc[df.TRAFFIC_CONTROL_DEVICE.isin(['YIELD','OTHER WARNING SIGN','OTHER REG. SIGN','DELINEATORS','RAILROAD CROSSING GATE','PEDESTRIAN CROSSING SIGN','SCHOOL ZONE','BICYCLE CROSSING SIGN','NO PASSING','OTHER RAILROAD CROSSING' ,'RR CROSSING SIGN']),'TRAFFIC_CONTROL_DEVICE'] = 'VISUAL SIGN BOARD'
df = df[df.TRAFFIC_CONTROL_DEVICE!='UNKNOWN']
df = df[df.TRAFFIC_CONTROL_DEVICE!='OTHER']

In [None]:
df.isna().sum()

LATITUDE                  0
LONGITUDE                 0
WEATHER_CONDITION         0
LIGHTING_CONDITION        0
ROADWAY_SURFACE_COND      0
ROAD_DEFECT               0
TRAFFIC_CONTROL_DEVICE    0
POSTED_SPEED_LIMIT        0
MOST_SEVERE_INJURY        0
dtype: int64

In [None]:
df = pd.get_dummies(df, columns=["WEATHER_CONDITION",'LIGHTING_CONDITION','ROADWAY_SURFACE_COND','TRAFFIC_CONTROL_DEVICE','ROAD_DEFECT']) 

In [None]:
df = df.replace({'MOST_SEVERE_INJURY':{'NO INDICATION OF INJURY':1,'NONINCAPACITATING INJURY':2,'INCAPACITATING INJURY':3,'REPORTED, NOT EVIDENT':0,'FATAL':4}})

In [None]:
df.head()

Unnamed: 0,LATITUDE,LONGITUDE,POSTED_SPEED_LIMIT,MOST_SEVERE_INJURY,"WEATHER_CONDITION_BLOWING SAND, SOIL, DIRT",WEATHER_CONDITION_CLEAR,WEATHER_CONDITION_CLOUDY/OVERCAST,WEATHER_CONDITION_FOG/SMOKE/HAZE,WEATHER_CONDITION_RAIN,WEATHER_CONDITION_SEVERE CROSS WIND GATE,WEATHER_CONDITION_SNOW,LIGHTING_CONDITION_DARKNESS,"LIGHTING_CONDITION_DARKNESS, LIGHTED ROAD",LIGHTING_CONDITION_DAWN,LIGHTING_CONDITION_DAYLIGHT,LIGHTING_CONDITION_DUSK,ROADWAY_SURFACE_COND_DRY,ROADWAY_SURFACE_COND_ICE,"ROADWAY_SURFACE_COND_SAND, MUD, DIRT",ROADWAY_SURFACE_COND_SNOW OR SLUSH,ROADWAY_SURFACE_COND_WET,TRAFFIC_CONTROL_DEVICE_LANE USE MARKING,TRAFFIC_CONTROL_DEVICE_NO CONTROLS,TRAFFIC_CONTROL_DEVICE_POLICE/FLAGMAN,TRAFFIC_CONTROL_DEVICE_TRAFFIC LIGHT SIGNAL,TRAFFIC_CONTROL_DEVICE_VISUAL SIGN BOARD,ROAD_DEFECT_DEBRIS ON ROADWAY,ROAD_DEFECT_NO DEFECTS,ROAD_DEFECT_OTHER,"ROAD_DEFECT_RUT, HOLES",ROAD_DEFECT_SHOULDER DEFECT,ROAD_DEFECT_UNKNOWN,ROAD_DEFECT_WORN SURFACE
2,41.741804,-87.740954,35,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0
3,41.741804,-87.740954,30,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0
4,41.953647,-87.732082,35,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0
5,41.958987,-87.933994,35,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0
6,41.903825,-87.643286,30,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0


In [None]:
# df.to_csv(r'/content/drive/MyDrive/Accidents/Model_Data.csv',index=False)

In [None]:
df.dropna(inplace=True)

In [None]:
data = df.sample(frac=1).reset_index(drop=True)

In [None]:
data=data.iloc[:,data.columns != "LATITUDE"]
data=data.iloc[:,data.columns != "LONGITUDE"]

In [None]:
n = len(data.columns)
X = data.iloc[:, data.columns != 'MOST_SEVERE_INJURY']

#Last Column 'MOST_SEVERE_INJURY' is output
# Y = data.iloc[:,-1:]
Y = data.iloc[:, data.columns == 'MOST_SEVERE_INJURY']


**Splitting train,test data at a 80:20 ratio**

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size=0.2, random_state=0)

**Feature Scaling**

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(x_train)

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
from sklearn.metrics import confusion_matrix
import sklearn.metrics as metrics

**Classification Models** 

**KNeighborsClassifier**

**Decision Tree**

In [104]:
from sklearn import tree

DT = tree.DecisionTreeClassifier()
DT.fit(x_train,y_train.values.ravel())
predictions = DT.predict(x_test)

tree_confMat=metrics.confusion_matrix(y_test, predictions)
print(tree_confMat)

MAE_DT = metrics.mean_absolute_error(y_test, predictions)
acc_DT = metrics.accuracy_score(y_test, predictions)
MSE_DT = metrics.mean_squared_error(y_test, predictions)

print(MSE_DT)
print(acc_DT)


[[    3  3330     2     2     0]
 [   41 68883    28     6     0]
 [    4  5833     2     2     0]
 [    1  1297     1     0     0]
 [    0    76     0     0     0]]
0.19094213379280855
0.8663958446001182


In [106]:
print(MAE_DT)

0.152054432720001


**Multilayer Perceptron Neural Network**

In [107]:
print(MAE_NN)
print(MSE_NN)
print(acc_NN)

0.15097282137062795
0.18938260114952649
0.8672762259310033


In [105]:
from sklearn.neural_network import MLPClassifier


NN = MLPClassifier(hidden_layer_sizes=(10,),solver='sgd', alpha=0.0001)
NN.fit(x_train, y_train.values.ravel())
predictions = NN.predict(x_test)
round(NN.score(x_train, y_train.values.ravel()), 4)

nn_confMat=metrics.confusion_matrix(y_test, predictions)
print(nn_confMat)

MAE_NN = metrics.mean_absolute_error(y_test, predictions)
MSE_NN = metrics.mean_squared_error(y_test, predictions)
acc_NN = metrics.accuracy_score(y_test, predictions)

[[    0  3337     0     0     0]
 [    0 68958     0     0     0]
 [    0  5841     0     0     0]
 [    0  1299     0     0     0]
 [    0    76     0     0     0]]


**Random Forest**

In [None]:
# from sklearn.ensemble import RandomForestClassifier

# RF = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
# RF.fit(x_train, y_train.values.ravel())
# predictions = RF.predict(x_test)

# # round(RF.score(x_train, y_train), 4)

# acc_RF = metrics.accuracy_score(y_test, predictions)
# MAE_RF = metrics.mean_absolute_error(y_test, predictions)
# MSE_RF = metrics.mean_squared_error(y_test, predictions)

**Regression Models**

In [None]:
# divide data into features matrix and target vector
features = data.iloc[:, data.columns != 'MOST_SEVERE_INJURY']
target = data.iloc[:, data.columns == 'MOST_SEVERE_INJURY']

MAE = make_scorer(mean_absolute_error)
MSE = make_scorer(mean_squared_error)
folds = 5


In [None]:
from sklearn.metrics import accuracy_score

def acc_fun(target_true, target_fit):
    target_fit = np.round(target_fit)
    target_fit.astype('int')
    return accuracy_score(target_true, target_fit)

acc = make_scorer(acc_fun)
folds = 5


**Linear Regression**

In [98]:
from sklearn.linear_model import LinearRegression

model_linear = LinearRegression()

MAE_linear = cross_val_score(model_linear,
    features,
    target,
    cv=folds,
    scoring=MAE)
print('MAE: ', np.mean(MAE_linear))


acc_linear = cross_val_score(model_linear, features,  target,  cv=folds,  scoring=acc)
print('ACCURACY Linear regression: ',np.mean(acc_linear))

MSE_linear = cross_val_score(model_linear,    features,    target,    cv=folds,    scoring=MSE)
print('MSE Linear regression: ', np.mean(MSE_linear))



MAE:  0.2081061181600675
ACCURACY Linear regression:  0.8662373759605589
MSE Linear regression:  0.18734265793063504


**Ordinal Regression**

In [93]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from mord import LogisticAT

In [99]:
model_ordinal = LogisticAT() 


In [103]:
model_ordinal.fit(x_train,y_train.values.ravel())
predictions = model_ordinal.predict(x_test)
acc_RF = metrics.accuracy_score(y_test, predictions)
MAE_RF = metrics.mean_absolute_error(y_test, predictions)
MSE_RF = metrics.mean_squared_error(y_test, predictions)



0.8672762259310033


In [108]:
print(acc_RF)
print(MAE_RF)
print(MSE_RF)

0.8672762259310033
0.15097282137062795
0.18938260114952649


**Logistic Regression (One vs Rest)**

In [None]:
# MAE_ordinal = cross_val_score(model_ordinal,
#     features,
#     target,
#     cv=folds,
#     scoring=MAE)
# print('Ordered logistic regression: ', np.mean(MAE_ordinal))

In [None]:
# from sklearn.linear_model import LogisticRegression
# model_1vR = LogisticRegression(multi_class='ovr', class_weight='balanced',max_iter = 500)

# MAE_1vR = cross_val_score(model_1vR,
#     features,
#     target,
#     cv=folds,
#     scoring=MAE)
# print('Logistic regression (one versus rest): ', np.mean(MAE_1vR))

# print('Accuracy')
# acc_1vR = cross_val_score(model_1vR,
#     features,
#     target,
#     cv=folds,
#     scoring=acc)
# print('Logistic regression (one versus rest): ', np.mean(acc_1vR))

# print('Mean Squared Error')
# MSE_1vR = cross_val_score(model_1vR,
#     features,
#     target,
#     cv=folds,
#     scoring=MSE)
# print('Logistic regression (one versus rest): ', np.mean(MSE_1vR))



In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
# OvR = LogisticRegression(solver='lbfgs', multi_class='ovr')
# OvR.fit(x_train, y_train.values.ravel())
# OvR_predictions = OvR.predict(x_test)
# MAE_OvR = metrics.mean_absolute_error(y_test, OvR_predictions)
# MSE_OvR = metrics.mean_squared_error(y_test, OvR_predictions)
# acc_OvR = metrics.accuracy_score(y_test, OvR_predictions)

In [None]:
# print(MAE_OvR)
# print(MSE_OvR)
# print(acc_OvR)

In [None]:
# One-vs-rest (OvR for short, also referred to as One-vs-All or OvA) is a heuristic method 
# for using binary classification algorithms for multi-class classification.

# It involves splitting the multi-class dataset into multiple binary classification problems.
# A binary classifier is then trained on each binary classification 
# problem and predictions are made using the model that is the most confident.

**Multinomial Logistic Regression**

In [None]:
# model_multi = LogisticRegression()

# MAE_multi = cross_val_score(model_multi,
#     features,
#     target.values.ravel(),
#     cv=folds,
#     scoring=MAE)
# print('Logistic regression (multinomial): ', np.mean(MAE_multi))

# acc_multi = cross_val_score(model_multi,
#     features,
#     target.values.ravel(),
#     cv=folds,
#     scoring=acc)
# print('Logistic regression (multinomial): ', np.mean(acc_multi))

# print('Mean Squared Error')
# MSE_multi = cross_val_score(model_multi,
#     features,
#     target.values.ravel(),
#     cv=folds,
#     scoring=MSE)
# print('Logistic regression (one versus rest): ', np.mean(MSE_multi))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

Logistic regression (multinomial):  0.15288702192149511


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

Logistic regression (multinomial):  0.866214737583479
Mean Squared Error


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

Logistic regression (one versus rest):  0.19306762586308812


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [None]:
# model_multi = LogisticRegression(solver='lbfgs', multi_class='multinomial',max_iter = 1000)
# model_multi.fit(x_train, y_train.values.ravel())
# model_multi_predictions = model_multi.predict(x_test)
# MAE_model_multi = metrics.mean_absolute_error(y_test, model_multi_predictions)
# MSE_model_multi = metrics.mean_squared_error(y_test, model_multi_predictions)
# acc_model_multi = metrics.accuracy_score(y_test, model_multi_predictions)

In [None]:
# print(MAE_model_multi)
# print(MSE_model_multi)
# print(acc_model_multi)

In [None]:
# !pip install mord



In [None]:
# from sklearn.linear_model import LinearRegression, LogisticRegression
# from mord import LogisticAT

# model_ordinal = LogisticAT(alpha=0)  # alpha parameter set to zero to perform no regularisation

# model_ordinal = LogisticRegression(solver='lbfgs', multi_class='multinomial',max_iter = 1000)
# model_ordinal.fit(x_train, y_train.values.ravel())
# model_ordinal_predictions = model_ordinal.predict(x_test)
# MAE_model_ordinal = metrics.mean_absolute_error(y_test, model_ordinal_predictions)
# MSE_model_ordinal = metrics.mean_squared_error(y_test, model_ordinal_predictions)
# acc_model_ordinal = metrics.accuracy_score(y_test, model_ordinal_predictions)

In [None]:
# print(MAE_model_ordinal)
# print(MSE_model_ordinal)
# print(acc_model_ordinal)

In [None]:
# from mord import LogisticAT
# model_ordinal = LogisticAT()  # alpha parameter set to zero to perform no regularisation

# MAE_ordinal = cross_val_score(model_ordinal,
#     features,
#     target,
#     cv=folds,
#     scoring=MAE)
# print('Ordered logistic regression: ', np.mean(MAE_ordinal))

# acc_ordinal = cross_val_score(model_ordinal,
#     features,
#     target,
#     cv=folds,
#     scoring=acc)
# print('Ordered logistic regression: ', np.mean(acc_ordinal))

# print('Mean Squared Error')
# MSE_ordinal = cross_val_score(model_ordinal,
#     features,
#     target,
#     cv=folds,
#     scoring=MSE)
# print('Logistic regression (one versus rest): ', np.mean(MSE_ordinal))

**Results**

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Table(header=dict(values=['Model', 'Accuracy', 'MAE','MSE']),
                 cells=dict(values=[['KNN','Decision Tree','Random Forest','NN','Linear Regression'
                 ,'Logistic Regression(OvR)','Logistic Regression(Multinomial)','Ordinal Regression'],
                  [MAE_KNN,MAE_DT,MAE_RF,MAE_NN,MAE_linear,MAE_1vR, MAE_multi,MAE_ordinal],
                  [acc_KNN,acc_DT,acc_RF,acc_NN,acc_linear,acc_1vR, acc_multi, acc_ordinal]
                  [MSE_KNN,MSE_DT,MSE_RF,MSE_NN,MSE_linear,MSE_1vR, MSE_multi,MSE_ordinal]))
                     ])
fig.show()

SyntaxError: ignored

In [None]:
# from sklearn import svm
# import pandas as pd

# #OVR : ONE vs REST


# SVM = svm.SVC(decision_function_shape="ovr").fit(x_train, y_train.values.ravel())
# prediction = SVM.predict(x_test)
# round(SVM.score(x_test, y_test.values.ravel()), 4)
# # print(CM(predictions,y_test))
# # predictions
# svm_confMat=metrics.confusion_matrix(y_test, predictions)


# from sklearn.ensemble import RandomForestClassifier

# RF = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
# RF.fit(x_train, y_train.values.ravel())
# predictions = RF.predict(x_test)
# # predictions
# # round(RF.score(x_train, y_train.values.ravel()), 4)
# # print(CM(predictions,y_test))
# Random_confMat=metrics.confusion_matrix(y_test, predictions)
# print(Random_confMat)
# print("Accuracy=",metrics.accuracy_score(y_test, predictions))

In [None]:
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn import metrics

# scores = {}
# scores_list = []
# for k in (1,15):
#    KNN = KNeighborsClassifier(n_neighbors=k, algorithm = 'auto')
#    KNN.fit(x_train, y_train.values.ravel())
#    y_pred = KNN.predict(x_test)
#    scores[k] = metrics.accuracy_score(y_test,y_pred)
#    scores_list.append(metrics.accuracy_score(y_test,y_pred))

# MAE_KNN = metrics.mean_absolute_error(y_test, predictions)
# acc_KNN = metrics.accuracy_score(y_test, predictions)
# MSE_KNN = metrics.mean_squared_error(y_test, predictions)

# result = metrics.confusion_matrix(y_test, y_pred)
# print("Confusion Matrix:")
# print(result)
# result1 = metrics.classification_report(y_test, y_pred)
# print("Classification Report:",)
# print (result1)
# print()

In [None]:
# from sklearn.linear_model import LogisticRegression
# model_multi = LogisticRegression(multi_class='multinomial', solver='lbfgs',  class_weight='balanced')
# model_multi.fit(x_train, y_train.values.ravel())
# predictions = model_multi.predict(x_test)
# # for i in range(len(predictions)):
# #     predictions[i]=np.round(predictions[i])
# # predictions
# MultiLogisticReg_confMat=metrics.confusion_matrix(y_test, predictions)
# print(MultiLogisticReg_confMat)
# print(metrics.accuracy_score(y_test, predictions))

In [None]:
# from sklearn.linear_model import LinearRegression
# model_linear = LinearRegression()
# model_linear.fit(x_train, y_train.values.ravel())
# predictions = model_linear.predict(x_test)
# for i in range(len(predictions)):
#     predictions[i]=np.round(predictions[i])
# # predictions
# linearRef_confMat=metrics.confusion_matrix(y_test, predictions)
# print(linearRef_confMat)
# print(metrics.accuracy_score(y_test, predictions))

In [None]:
# from sklearn.linear_model import LogisticRegression
# model_1vR = LogisticRegression(multi_class='ovr', class_weight='balanced')
# model_1vR.fit(x_train, y_train.values.ravel())
# predictions = model_1vR.predict(x_test)
# # predictions
# logisticReg_confMat=metrics.confusion_matrix(y_test, predictions)