# Performance Measurements of Support Vector Machines

In [1]:
import pandas as p                #importing the necessary modules and libraries
import matplotlib.pyplot as plt
import seaborn as s
import numpy as n

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
df=p.read_excel("Data1.xlsx")   #classified data according to cities and average, max and min values of NO2 in them


In [4]:
df.head()

Unnamed: 0,City,Last Update,Avg1,Max1,Min1
0,Ahmedabad,0,42.08,39.0,7.8
1,Aizwal,0,4.1,5.2,3.1
2,Amaravati,0,10.36,16.7,6.53
3,Amritsar,0,51.27,28.02,7.79
4,Bengaluru,0,31.04,19.65,9.72


In [5]:
# Adding a new column according to the data.
AQI=['Satisfactory','Good' , 'Good','Satisfactory','Good', 'Good', 'Good','Satisfactory','Satisfactory', 'Good', 'Moderate', 'Good', 'Good','Good','Satisfactory','Good', 'Good', 'Moderate','Good','Satisfactory','Good','Good','Satisfactory','Good','Moderate']

In [6]:
df['AQI']=AQI #added to data frame

In [7]:
# Adding class to the dataset which is derieved on the values according to AQI
df['class'] = df.AQI.map({'Good':2, 'Satisfactory':1,'Moderate':0}) #class defined 

In [8]:
df.head(5)

Unnamed: 0,City,Last Update,Avg1,Max1,Min1,AQI,class
0,Ahmedabad,0,42.08,39.0,7.8,Satisfactory,1
1,Aizwal,0,4.1,5.2,3.1,Good,2
2,Amaravati,0,10.36,16.7,6.53,Good,2
3,Amritsar,0,51.27,28.02,7.79,Satisfactory,1
4,Bengaluru,0,31.04,19.65,9.72,Good,2


In [9]:
del df['AQI']

In [10]:
from sklearn.preprocessing import LabelEncoder
var_mod = ['City','Last Update','Avg1','Max1','Min1','class']
le = LabelEncoder()
for i in var_mod:
    df[i] = le.fit_transform(df[i].astype(str))

In [11]:
X= df.drop(labels='class',axis=1)
y=df.loc[:,'class']

In [12]:
from sklearn.metrics import confusion_matrix, classification_report,accuracy_score

In [26]:
from sklearn.model_selection import train_test_split #spliting our data to training sets and test sets
X_train,X_test,y_train,y_test= train_test_split(X,y, test_size=0.2, random_state=1,stratify=y)
#  test_size :Ratio by which we are splitting
#70% model used for training and rest 30% used for testing for predicting actual accuracy.
#  Random state used for keeking values constant means we dont want to change them.

In [22]:
from sklearn.model_selection import cross_val_score  

In [23]:
from sklearn.svm import SVC
s = SVC()

s.fit(X_train,y_train)
predicts = s.predict(X_test)
print(" ")
print("Classification report of Support Vector Machines Report:")
print("")

print(classification_report(y_test,predicts)) # shows classification metrics precision, recall and f1 score

# Accuracy is the correctly classified data instances over the total number of data instances.
# Cross validation is to evaluate our models
accuracy = cross_val_score(s ,X, y, cv=7) #splitting the data according to cv dividing it into training and testing
print("Cross validation test results of accuracy:")
print(accuracy)
print("")
# get mean of every field
print("Accuracy result of Support Vector Machines is:",accuracy.mean()*100)
print("")

cm1=confusion_matrix(y_test,predicts) #evaluating the performance of a classification model
print("Confusion Matrix result of Support Vector Machines is:\n",cm1)
print("")
sensitivity1= cm1[0,0]/(cm1[0,0]+cm1[0,1])
print("Senstivity :",sensitivity1) #  Cases actual positive cases that got predicted as true
print("")
specificity1 = cm1[1,1]/(cm1[1,0]+cm1[1,1])
print("specificity :",specificity1)  #  Cases actual positive cases that got predicted as false
print("")

 
Classification report of Support Vector Machines Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.00      0.00      0.00         2
           2       0.62      1.00      0.77         5

    accuracy                           0.62         8
   macro avg       0.21      0.33      0.26         8
weighted avg       0.39      0.62      0.48         8

Cross validation test results of accuracy:
[1.         0.5        0.5        0.5        0.66666667 0.66666667
 0.66666667]

Accuracy result of Support Vector Machines is: 64.28571428571429

Confusion Matrix result of Support Vector Machines is:
 [[0 0 1]
 [0 0 2]
 [0 0 5]]

Senstivity : nan

specificity : nan



In [24]:
TN = cm1[0][0]
FN = cm1[1][0]
TP = cm1[1][1]
FP = cm1[0][1]
print("True positive :",TP)
print("True negative :",TN)
print("False positive :",FP)
print("False negative :",FN)
print("")
TPR = TP/(TP+FN)
TNR = TN/(TN+FP)
FPR = FP/(FP+TN)
FNR = FN/(TP+FN)
print("")
print("True positive Rate :",TPR)
print("True negative Rate :",TNR)
print("False positive Rate:",FPR)
print("False negative Rate :",FNR)

True positive : 0
True negative : 0
False positive : 0
False negative : 0


True positive Rate : nan
True negative Rate : nan
False positive Rate: nan
False negative Rate : nan


# Performance Measurements of Random Forest 

In [27]:
from sklearn import model_selection
# random forest model creation

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier()
rfc.fit(X_train,y_train)
predicts = rfc.predict(X_test)
print(" ")
print("Classification report of Random Forest:")
print("")

print(classification_report(y_test,predicts)) # shows classification metrics precision, recall and f1 score

# Accuracy is the correctly classified data instances over the total number of data instances.
# Cross validation is to evaluate our models
accuracy = cross_val_score(rfc ,X, y, cv=7) #splitting the data according to cv dividing it into training and testing
print("Cross validation test results of accuracy:")
print(accuracy)
print("")
# get mean of every field
print("Accuracy result of Random Forest is:",accuracy.mean()*100)
print("")

cm1=confusion_matrix(y_test,predicts) #evaluating the performance of a classification model
print("Confusion Matrix result of Random Forests is:\n",cm1)
print("")
sensitivity1= cm1[0,0]/(cm1[0,0]+cm1[0,1])
print("Senstivity :",sensitivity1) #  Cases actual positive cases that got predicted as true
print("")
specificity1 = cm1[1,1]/(cm1[1,0]+cm1[1,1])
print("specificity :",specificity1)  #  Cases actual positive cases that got predicted as false
print("")

 
Classification report of Random Forest:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.00      0.00      0.00         1
           2       0.67      0.67      0.67         3

    accuracy                           0.40         5
   macro avg       0.22      0.22      0.22         5
weighted avg       0.40      0.40      0.40         5

Cross validation test results of accuracy:
[0.5        0.5        0.5        0.75       0.66666667 0.66666667
 1.        ]

Accuracy result of Random Forest is: 65.47619047619048

Confusion Matrix result of Random Forests is:
 [[0 1 0]
 [0 0 1]
 [0 1 2]]

Senstivity : 0.0

specificity : nan



In [33]:
TN = cm1[0][0]
FN = cm1[1][0]
TP = cm1[1][1]
FP = cm1[0][1]
print("True positive :",TP)
print("True negative :",TN)
print("False positive :",FP)
print("False negative :",FN)
print("")
TPR = TP/(TP+FN)
TNR = TN/(TN+FP)
FPR = FP/(FP+TN)
FNR = FN/(TP+FN)
print("")
print("True positive Rate :",TPR)
print("True negative Rate :",TNR)
print("False positive Rate:",FPR)
print("False negative Rate :",FNR)

True positive : 0
True negative : 0
False positive : 1
False negative : 0


True positive Rate : nan
True negative Rate : 0.0
False positive Rate: 1.0
False negative Rate : nan
