In [1]:
'''Classify iris plants into three species use following dataset https://www.kaggle.com/datasets/uciml/iris 
(Give comparative analysis of any three classification techniques based on accuracy).'''
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

# 1. Load dataset
url = "https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv"
iris = pd.read_csv(url)

print("\nFirst 5 rows of dataset:\n")
print(iris.head())

# 2. Preprocessing
X = iris.drop('species', axis=1)   # Features
y = iris['species']                # Labels

# Encode species labels (if needed, here sklearn can handle strings too)
# encoder = LabelEncoder()
# y = encoder.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 3. Classifiers
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Decision Tree": DecisionTreeClassifier(),
    "K-Nearest Neighbors (KNN)": KNeighborsClassifier()
}

# 4. Train and Evaluate
accuracies = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    accuracies[name] = acc
    print(f"\nAccuracy using {name}: {acc:.4f}")

# 5. Comparative Analysis
print("\n--- Comparative Analysis ---")
for name, acc in accuracies.items():
    print(f"{name}: {acc*100:.2f}%")



First 5 rows of dataset:

   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa

Accuracy using Logistic Regression: 1.0000

Accuracy using Decision Tree: 1.0000

Accuracy using K-Nearest Neighbors (KNN): 1.0000

--- Comparative Analysis ---
Logistic Regression: 100.00%
Decision Tree: 100.00%
K-Nearest Neighbors (KNN): 100.00%
