In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import numpy as np

# Load dataset
df = pd.read_csv("iris.csv")

# Basic cleaning: remove missing or negative values
df = df.dropna()
df = df[(df.select_dtypes(include='number') >= 0).all(axis=1)]

# Error correcting: remove outliers using Z-score method
z_scores = np.abs((df.select_dtypes(include='number') - df.mean(numeric_only=True)) / df.std(numeric_only=True))
df = df[(z_scores < 3).all(axis=1)]

# Features and target
X = df.drop("variety", axis=1)
y = df["variety"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Logistic Regression
lr = LogisticRegression()
lr.fit(X_train, y_train)
lr_acc = accuracy_score(y_test, lr.predict(X_test))

# Naive Bayes
nb = GaussianNB()
nb.fit(X_train, y_train)
nb_acc = accuracy_score(y_test, nb.predict(X_test))

# Output results
print(f"Logistic Regression Accuracy: {lr_acc * 100:.2f}%")
print(f"Naïve Bayes Accuracy: {nb_acc * 100:.2f}%")


Logistic Regression Accuracy: 100.00%
Naïve Bayes Accuracy: 100.00%
