In [1]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from bince_encoding import bince_encoding
from classification_using_ca import classification
from frequency_encoding import frequency_encoding
from sklearn.neighbors import KNeighborsClassifier
from calculate_time import calculate_time
import time

In [2]:
df=pd.read_csv('iris.csv')

In [3]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [4]:
y=df['species']
df.drop(['species'],axis=1,inplace=True)
X=df

In [5]:
dict_x=X.to_dict()

In [6]:
data=frequency_encoding(dict_x)

In [7]:
X=pd.DataFrame(data)
y=LabelEncoder().fit_transform(y)

In [8]:
X['encoded_string']=X.apply(lambda row: ''.join(map(str, row)), axis=1)

In [9]:
X.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,encoded_string
0,1,111,0,0,1111000000
1,0,11,0,0,11000000
2,0,11,0,0,11000000
3,0,11,0,0,11000000
4,0,111,0,0,111000000


In [10]:
X.drop(['sepal_length','sepal_width','petal_length','petal_width'],axis=1,inplace=True)

In [11]:
split_columns = X['encoded_string'].apply(lambda x: pd.Series(list(x)))

In [12]:
X=split_columns

In [13]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=12)

In [14]:
start_time = time.time()
knn=KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train,y_train)
knn_pred=knn.predict(X_test)
print("Accuracy of K neighbors classifier:",accuracy_score(y_test,knn_pred));
print("Precision of K neighbors classifier:",precision_score(y_test,knn_pred,average="weighted"))
print("Recall of K neighbors classifier:",recall_score(y_test,knn_pred,average="weighted"))
print("F-1 score of K neighbors classifier:", f1_score(y_test,knn_pred,average="weighted"));
end_time = calculate_time()
execution_time =  end_time - start_time
print("Execution time:", execution_time, "seconds")

Accuracy of K neighbors classifier: 0.9555555555555556
Precision of K neighbors classifier: 0.9611111111111111
Recall of K neighbors classifier: 0.9555555555555556
F-1 score of K neighbors classifier: 0.9562230362230362
Execution time: 0.08648681640625 seconds


In [15]:
start_time = time.time()
svm_clf=svm.SVC(kernel='linear')
svm_clf.fit(X_train,y_train)
svm_clf_pred=svm_clf.predict(X_test)
print("Accuracy of Support Vector Machine: ",accuracy_score(y_test,svm_clf_pred));
print("Precision of Support Vector Machine:",precision_score(y_test,svm_clf_pred,average="weighted"))
print("Recall of Support Vector Machine:",recall_score(y_test,svm_clf_pred,average="weighted"))
print("F1-score of Support Vector Machine:",f1_score(y_test,svm_clf_pred,average="weighted"))
end_time = calculate_time()
execution_time =  end_time - start_time
print("Execution time:", execution_time, "seconds")

Accuracy of Support Vector Machine:  0.9777777777777777
Precision of Support Vector Machine: 0.9792592592592592
Recall of Support Vector Machine: 0.9777777777777777
F1-score of Support Vector Machine: 0.9778760192553295
Execution time: 0.08600997924804688 seconds


In [16]:
start_time = time.time()
gnb=GaussianNB()
gnb.fit(X_train,y_train)
gnb_pred=gnb.predict(X_test)
print("Accuracy of Gaussian Naive Bayes:",accuracy_score(y_test,gnb_pred));
print("Precision of Gaussian Naive Bayes:",precision_score(y_test,gnb_pred,average="weighted"))
print("Recall of Gaussian Naive Bayes:",recall_score(y_test,gnb_pred,average="weighted"))
print("F-1 score of Gaussian Naive Bayes:", f1_score(y_test,gnb_pred,average="weighted"));
end_time = calculate_time()
execution_time =  end_time - start_time
print("Execution time:", execution_time, "seconds")

Accuracy of Gaussian Naive Bayes: 0.8666666666666667
Precision of Gaussian Naive Bayes: 0.8925000000000001
Recall of Gaussian Naive Bayes: 0.8666666666666667
F-1 score of Gaussian Naive Bayes: 0.8655587211142767
Execution time: 0.0856473445892334 seconds


In [17]:
start_time = time.time()
dt=DecisionTreeClassifier()
dt.fit(X_train,y_train)
dt_pred=dt.predict(X_test)
print("Accuracy score of decision tree classifier:",accuracy_score(y_test,dt_pred))
print("Precision of Decision Tree classifier",precision_score(y_test,dt_pred,average="weighted"))
print("Recall of Decision Tree classifier",recall_score(y_test,dt_pred,average="weighted"))
print("F1-score of Decision Tree classifier",f1_score(y_test,dt_pred,average="weighted"))
end_time = calculate_time()
execution_time =  end_time - start_time
print("Execution time:", execution_time, "seconds")


Accuracy score of decision tree classifier: 1.0
Precision of Decision Tree classifier 1.0
Recall of Decision Tree classifier 1.0
F1-score of Decision Tree classifier 1.0
Execution time: 0.09921407699584961 seconds
