# Supervised Learning
## Logistic Regression
## Decision Tree
## SVM

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

## Load Dateset

In [None]:
# Load Iris dataset from selva86/datasets repo
url = "https://raw.githubusercontent.com/selva86/datasets/master/Iris.csv"
df = pd.read_csv(url)

# df.head()
df.sample(5)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [None]:
print("Shape:", df.shape)
df.info()

Shape: (150, 6)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


Preprocessing

In [None]:
# Drop unnecessary column if present
if "Id" in df.columns:
    df.drop(columns=["Id"], inplace=True)

# Define features and target
X = df.drop(columns=["Species"])
y = df["Species"]

X.head(), y.head()

(   SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
 0            5.1           3.5            1.4           0.2
 1            4.9           3.0            1.4           0.2
 2            4.7           3.2            1.3           0.2
 3            4.6           3.1            1.5           0.2
 4            5.0           3.6            1.4           0.2,
 0    Iris-setosa
 1    Iris-setosa
 2    Iris-setosa
 3    Iris-setosa
 4    Iris-setosa
 Name: Species, dtype: object)

# Train /Test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=42,
    stratify=y
)

print("Train size:", X_train.shape)
print("Test size:", X_test.shape)

Train size: (105, 4)
Test size: (45, 4)


# Feature Scaling (IMPORTANT for LR & SVM)

In [None]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# Logistic Regression

In [None]:
log_reg = LogisticRegression(max_iter=200)
log_reg.fit(X_train_scaled, y_train)

y_pred_lr = log_reg.predict(X_test_scaled)

lr_acc = accuracy_score(y_test, y_pred_lr) * 100
print("Logistic Regression Accuracy:", lr_acc)


Logistic Regression Accuracy: 91.11111111111111


# Decission Tree

In [None]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)

dt_acc = accuracy_score(y_test, y_pred_dt) * 100
print("Decision Tree Accuracy:", dt_acc)

Decision Tree Accuracy: 93.33333333333333


# SVM (Support Vector Mechine)

In [None]:
svm = SVC(kernel="rbf", random_state=42)
svm.fit(X_train_scaled, y_train)

y_pred_svm = svm.predict(X_test_scaled)

svm_acc = accuracy_score(y_test, y_pred_svm) * 100
print("SVM Accuracy:", svm_acc)

SVM Accuracy: 93.33333333333333


# Comparison

In [None]:
results = pd.DataFrame({
    "Model": ["Logistic Regression", "Decision Tree", "SVM"],
    "Accuracy": [lr_acc, dt_acc, svm_acc]
})

results

Unnamed: 0,Model,Accuracy
0,Logistic Regression,91.111111
1,Decision Tree,93.333333
2,SVM,93.333333


# Detailed classification Report

In [None]:
print("Classification Report for SVM:\n")
print(classification_report(y_test, y_pred_svm))

Classification Report for SVM:

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        15
Iris-versicolor       0.88      0.93      0.90        15
 Iris-virginica       0.93      0.87      0.90        15

       accuracy                           0.93        45
      macro avg       0.93      0.93      0.93        45
   weighted avg       0.93      0.93      0.93        45

