<a href="https://colab.research.google.com/github/nan-973/assignment_/blob/main/classification_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [1]:
import pandas as pd # importing pandas
from google.colab import drive # to mount google drive
from sklearn.model_selection import train_test_split # to split data as train and test
from sklearn.preprocessing import LabelEncoder # for label encoding
from sklearn.linear_model import LogisticRegression # to create logistic regression model
from sklearn.neighbors import KNeighborsClassifier # to create k-NN model
from sklearn.naive_bayes import GaussianNB # to create Naive-Bayes model
from sklearn.svm import SVC # to create SVM model
from sklearn.tree import DecisionTreeClassifier # to create decision tree
from sklearn.ensemble import RandomForestClassifier # to create Random Forest
from sklearn.metrics import accuracy_score, precision_score, recall_score,confusion_matrix # to calculate accuracy, precision , recall and confusion matrix

# Loading dataset from google drive

In [2]:
# mounting the drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df_iris=pd.read_csv('/content/drive/MyDrive/DSA_B6/data/iris.csv')
df_iris.head()
# iris dataset is an already preprocessed dataset

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [4]:
# label encoder is used only on target columns unless the features has two or binary values
le=LabelEncoder()
df_iris['species']=le.fit_transform(df_iris['species'])
df_iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


# Test and Train split

In [5]:
X = df_iris.drop(columns=['species'],axis=1) # x represents features
y = df_iris['species'] # y represents the target

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

# Logistic Regression

In [6]:
# building logistic regression model
log_reg_model = LogisticRegression()
log_reg_model.fit(X_train,y_train) # training the model

y_pred_log = log_reg_model.predict(X_test) # predicting the model

# evaluate the model
accuracy_log = accuracy_score(y_test,y_pred_log)
precision_log = precision_score(y_test,y_pred_log,average='weighted')
recall_log = recall_score(y_test,y_pred_log,average='weighted')

print("Accuracy: ",accuracy_log)
print("Precision: ",precision_log)
print("Recall: ",recall_log)

Accuracy:  1.0
Precision:  1.0
Recall:  1.0


In [7]:
# prediction using threshold
y_pred_threshold = log_reg_model.predict_proba(X_test)[:,1]
# for class 1
y_pred_threshold = (y_pred_threshold > 0.33).astype(int)

# evaluate the model
accuracy_thresh = accuracy_score(y_test,y_pred_threshold)
precision_thresh = precision_score(y_test,y_pred_threshold,average='weighted')
recall_thresh= recall_score(y_test,y_pred_threshold,average='weighted')

print("Threshold = 0.33")
print("Accuracy: ",accuracy_thresh)
print("Precision: ",precision_thresh)
print("Recall: ",recall_thresh)

Threshold = 0.33
Accuracy:  0.6333333333333333
Precision:  0.43666666666666665
Recall:  0.6333333333333333


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# k-NN Model

In [8]:
# building k-NN model
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train,y_train) # training of the model

y_pred_knn=knn_model.predict(X_test) # predicting the model

# evaluate the model
accuracy_knn = accuracy_score(y_test,y_pred_knn)
precision_knn = precision_score(y_test,y_pred_knn,average='weighted')
recall_knn = recall_score(y_test,y_pred_knn,average='weighted')

print("Accuracy: ",accuracy_knn)
print("Precision: ",precision_knn)
print("Recall: ",recall_knn)

Accuracy:  1.0
Precision:  1.0
Recall:  1.0


#  Naive-Bayes classification

In [9]:
# Bulding Naive-Bayes model
nb_model = GaussianNB()
nb_model.fit(X_train,y_train) # training of the model

y_pred_nb=nb_model.predict(X_test) # prediction

# evaulation of model
accuracy_nb = accuracy_score(y_test,y_pred_nb)
precision_nb = precision_score(y_test,y_pred_nb,average='weighted')
recall_nb = recall_score(y_test,y_pred_nb,average='weighted')

print("Accuracy: ",accuracy_nb)
print("Precision: ",precision_nb)
print("Recall: ",recall_nb)

Accuracy:  1.0
Precision:  1.0
Recall:  1.0


# SVM classifier

In [10]:
# building SVM Model
svm_model = SVC(kernel='linear')
svm_model.fit(X_train,y_train) # training of the model

y_pred_svm=svm_model.predict(X_test) # prediction of model

# evaluation of model
accuracy_svm = accuracy_score(y_test,y_pred_svm)
precision_svm = precision_score(y_test,y_pred_svm,average='weighted')
recall_svm = recall_score(y_test,y_pred_svm,average='weighted')
confusion_matrix_svm=confusion_matrix(y_test,y_pred_svm)

print("Accuracy: ",accuracy_svm)
print("Precision: ",precision_svm)
print("Recall: ",recall_svm)
print("Confusion Matrix: \n",confusion_matrix_svm)

Accuracy:  1.0
Precision:  1.0
Recall:  1.0
Confusion Matrix: 
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


# Decision Tree

In [11]:
# building decision tree model
dt_model = DecisionTreeClassifier(criterion='gini')
dt_model.fit(X_train,y_train) # training of the model

y_pred_dt=dt_model.predict(X_test) # prediction of model

# evaluating the model
accuracy_dt = accuracy_score(y_test,y_pred_dt)
precision_dt = precision_score(y_test,y_pred_dt,average='weighted')
recall_dt = recall_score(y_test,y_pred_dt,average='weighted')

print("Accuracy: ",accuracy_dt)
print("Precision: ",precision_dt)
print("Recall: ",recall_dt)

Accuracy:  1.0
Precision:  1.0
Recall:  1.0


# Random Forest

In [12]:
# building random forest model
rf_model = RandomForestClassifier()
rf_model.fit(X_train,y_train) # training of the model

y_pred_rf=rf_model.predict(X_test) # prediction of model

# evaluating the model
accuracy_rf = accuracy_score(y_test,y_pred_rf)
precision_rf = precision_score(y_test,y_pred_rf,average='weighted')
recall_rf = recall_score(y_test,y_pred_rf,average='weighted')

print("Accuracy: ",accuracy_rf)
print("Precision: ",precision_rf)
print("Recall: ",recall_rf)

Accuracy:  1.0
Precision:  1.0
Recall:  1.0


# Comparison Between Models

In [13]:
data = {
    'Model': ['Logistic Regression', 'Logistic Regression with Threshold',
              'k-Nearest Neighbour Classification', 'Naive-Bayes Classification','SVM Classification','Decision Trees','Random Forest'],
    'Accuracy': [accuracy_log,accuracy_thresh,accuracy_knn,accuracy_nb,accuracy_svm,accuracy_dt,accuracy_rf],
    'Recall': [recall_log,recall_thresh,recall_knn,recall_nb,recall_svm,recall_dt,recall_rf],
    'Precision': [precision_log,precision_thresh,precision_knn,precision_nb,precision_svm,precision_dt,precision_rf]
}

df = pd.DataFrame(data)

In [14]:
print("\t \t \t \t Model Comparison \n")
print(df)

	 	 	 	 Model Comparison 

                                Model  Accuracy    Recall  Precision
0                 Logistic Regression  1.000000  1.000000   1.000000
1  Logistic Regression with Threshold  0.633333  0.633333   0.436667
2  k-Nearest Neighbour Classification  1.000000  1.000000   1.000000
3          Naive-Bayes Classification  1.000000  1.000000   1.000000
4                  SVM Classification  1.000000  1.000000   1.000000
5                      Decision Trees  1.000000  1.000000   1.000000
6                       Random Forest  1.000000  1.000000   1.000000
