# ***Iris Project***

In [21]:
# import libraries 
import pandas as pd
from sklearn.metrics import precision_score, accuracy_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB as NaiveBayes
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import seaborn as sns
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline

from sklearn.model_selection import GridSearchCV

In [4]:
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [7]:
df['species'] = LabelEncoder().fit_transform(df['species'])
df['species'].value_counts()

species
0    50
1    50
2    50
Name: count, dtype: int64

In [8]:
X = df.drop('species', axis=1).values
y = df['species'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [9]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Logistic Regression")
print("Accuracy:", accuracy_score(y_test, y_pred))

Logistic Regression
Accuracy: 0.9866666666666667


In [10]:
model = KNeighborsClassifier(n_neighbors=3)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("K-Nearest Neighbors")
print("Accuracy:", accuracy_score(y_test, y_pred))

K-Nearest Neighbors
Accuracy: 0.96


In [11]:
model = NaiveBayes()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Naive Bayes")
print("Accuracy:", accuracy_score(y_test, y_pred))

Naive Bayes
Accuracy: 0.9866666666666667


In [25]:

pipe = Pipeline([
    ("knn", KNeighborsClassifier()),
])
param_grid = {"knn__n_neighbors": [3, 5, 7, 9]}
models = GridSearchCV( estimator=pipe, param_grid=param_grid, cv=5, scoring='accuracy', refit=True)
models.fit(X_train, y_train)

y_pred = models.predict(X_test)

print("recall score: ", recall_score(y_test, y_pred, average='macro'))
print("accuracy score: ", accuracy_score(y_test, y_pred,))
print("precision score: ", precision_score(y_test, y_pred, average='macro'))

print("Best parameters found: ", models.best_params_)
print(models.cv_results_["mean_test_score"])

recall score:  0.9565217391304347
accuracy score:  0.96
precision score:  0.9615384615384616
Best parameters found:  {'knn__n_neighbors': 3}
[0.90666667 0.84       0.89333333 0.86666667]


In [19]:
models = {
    "LogisticRegression": {
        "model": LogisticRegression(max_iter=1000),
        "params": {
            "C": [0.01, 0.1, 1, 10]
        }
    },
    "KNN": {
        "model": KNeighborsClassifier(),
        "params": {
            "n_neighbors": [3, 5, 7, 9]
        }
    },
    "NaiveBayes": {
        "model": GaussianNB(),
        "params": {}   # No major hyperparameters
    }
}

best_models = {}

# 2Ô∏è‚É£ GridSearch for each model
for model_name, mp in models.items():
    grid = GridSearchCV(
        estimator=mp["model"],
        param_grid=mp["params"],
        cv=5,
        scoring="accuracy",
        refit=True
    )
    
    grid.fit(X_train, y_train)
    best_models[model_name] = grid

# 3Ô∏è‚É£ Evaluate best model from each GridSearch
for model_name, grid in best_models.items():
    y_pred = grid.predict(X_test)
    
    print(f"\nüìå {model_name}")
    print("Best Params:", grid.best_params_)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred, average="macro"))
    print("Recall:", recall_score(y_test, y_pred, average="macro"))
    print("best parameters:", grid.best_params_)
print("\n")
# print("All Best Models:", best_models)


üìå LogisticRegression
Best Params: {'C': 1}
Accuracy: 0.9866666666666667
Precision: 0.9861111111111112
Recall: 0.9855072463768115
best parameters: {'C': 1}

üìå KNN
Best Params: {'n_neighbors': 3}
Accuracy: 0.96
Precision: 0.9615384615384616
Recall: 0.9565217391304347
best parameters: {'n_neighbors': 3}

üìå NaiveBayes
Best Params: {}
Accuracy: 0.9866666666666667
Precision: 0.9861111111111112
Recall: 0.9855072463768115
best parameters: {}


