In [1]:
import pandas as pd
import numpy as np

In [2]:
dataset = pd.read_csv("./iris.csv", names=['sw', 'sl', 'pw', 'pl', 'target'])

In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   sw      150 non-null    float64
 1   sl      150 non-null    float64
 2   pw      150 non-null    float64
 3   pl      150 non-null    float64
 4   target  150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [7]:
features = dataset.iloc[:, :-1].values[:100]
labels = dataset.iloc[:, -1].values[:100]

In [8]:
from sklearn.feature_selection import SelectKBest, chi2
features = SelectKBest(chi2, k=2).fit_transform(features, labels)
features.shape

(100, 2)

In [9]:
from sklearn.preprocessing import LabelEncoder
labels = LabelEncoder().fit_transform(labels)

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(features, labels)

In [12]:
# X_train

In [13]:
#Metrics
from sklearn.metrics import classification_report, confusion_matrix

class metrics_:
     
    def __init__(self, y_test, y_pred):
        self.y_test = y_test
        self.y_pred = y_pred
        
    def cr(self):
        print(classification_report(self.y_pred, self.y_test))
        
    def cm(self):
        print(confusion_matrix(self.y_pred, self.y_test))

ALGORITHMS

In [14]:
# Logistic Regression 
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression().fit(X_train, y_train)

In [15]:
y_predlr = lr.predict(X_test)

In [16]:
y_predlr_ = [1 if val >= 0.5 else 0 for val in y_predlr]

In [17]:
reports = metrics_(y_test, y_predlr_)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        11

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[14  0]
 [ 0 11]]


In [18]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier().fit(X_train, y_train)

In [19]:
ypredictdr = dt.predict(X_test)

In [20]:
reports = metrics_(y_test, ypredictdr)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        11

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[14  0]
 [ 0 11]]


In [21]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier().fit(X_train, y_train)
ypredrf = rf.predict(X_test)
reports = metrics_(y_test, ypredrf)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        11

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[14  0]
 [ 0 11]]


In [22]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier().fit(X_train, y_train)
ypredknn = knn.predict(X_test)
reports = metrics_(y_test, ypredknn)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        11

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[14  0]
 [ 0 11]]


In [23]:
from sklearn.svm import SVC
svm = SVC().fit(X_train, y_train)
ypredsvm = svm.predict(X_test)
reports = metrics_(y_test, ypredsvm)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        11

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[14  0]
 [ 0 11]]


In [24]:
from sklearn.ensemble import VotingClassifier, BaggingClassifier

algos = [('lg', LogisticRegression()), ('smv', SVC(probability=True)), 
         ('knn', KNeighborsClassifier()), ('rf', RandomForestClassifier())]

In [25]:
vc = VotingClassifier(estimators=algos, voting='soft')

In [26]:
vc.fit(X_train, y_train)

VotingClassifier(estimators=[('lg', LogisticRegression()),
                             ('smv', SVC(probability=True)),
                             ('knn', KNeighborsClassifier()),
                             ('rf', RandomForestClassifier())],
                 voting='soft')

In [27]:
vcpred = vc.predict(X_test)

In [28]:
reports = metrics_(y_test, vcpred)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        11

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[14  0]
 [ 0 11]]


In [33]:
!pip install xgboost



In [34]:
from xgboost import XGBClassifier
from sklearn.ensemble import BaggingClassifier

In [36]:
bg = BaggingClassifier(base_estimator=RandomForestClassifier()).fit(X_train, y_train)
ypredsvm = bg.predict(X_test)
reports = metrics_(y_test, ypredsvm)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        11

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[14  0]
 [ 0 11]]


In [37]:
xgb = XGBClassifier().fit(X_train, y_train)
ypredxgb = xgb.predict(X_test)
reports = metrics_(y_test, ypredxgb)
reports.cr()
reports.cm()

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        11

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

[[14  0]
 [ 0 11]]
