<a href="https://colab.research.google.com/github/mdjabedmollah/ml-learning/blob/main/Fb_algorithm_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:

import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


url = "https://raw.githubusercontent.com/mdjabedmollah/ml-learning/refs/heads/main/Iris.csv"
df = pd.read_csv(url)

print("First 5 rows of data:")
print(df.head())
print("\nColumns:", df.columns.tolist())

First 5 rows of data:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa

Columns: ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species']


In [8]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("\nTrain size:", X_train.shape, "Test size:", X_test.shape)



Train size: (120, 5) Test size: (30, 5)


In [9]:
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)
y_pred_dt = dt_clf.predict(X_test)
base_dt_acc = accuracy_score(y_test, y_pred_dt)
print(f"\nBase Decision Tree Accuracy: {base_dt_acc:.4f}")


svm_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", SVC(random_state=42))
])

svm_pipeline.fit(X_train, y_train)
y_pred_svm = svm_pipeline.predict(X_test)
base_svm_acc = accuracy_score(y_test, y_pred_svm)
print(f"Base SVM Accuracy: {base_svm_acc:.4f}")



Base Decision Tree Accuracy: 1.0000
Base SVM Accuracy: 1.0000


In [10]:
dt_param_grid = {
    "max_depth": [None, 2, 3, 4, 5],
    "min_samples_split": [2, 3, 4, 5],
    "criterion": ["gini", "entropy"]
}

dt_grid = GridSearchCV(
    estimator=DecisionTreeClassifier(random_state=42),
    param_grid=dt_param_grid,
    scoring="accuracy",
    cv=5,
    n_jobs=-1
)

dt_grid.fit(X_train, y_train)

best_dt = dt_grid.best_estimator_
y_pred_dt_best = best_dt.predict(X_test)
tuned_dt_acc = accuracy_score(y_test, y_pred_dt_best)

print("\n===== Decision Tree Tuning Results =====")
print("Best parameters:", dt_grid.best_params_)
print(f"Best CV Accuracy: {dt_grid.best_score_:.4f}")
print(f"Test Accuracy (tuned DT): {tuned_dt_acc:.4f}")




===== Decision Tree Tuning Results =====
Best parameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2}
Best CV Accuracy: 0.9917
Test Accuracy (tuned DT): 1.0000


In [11]:
svm_param_grid = {
    "svm__C": [0.1, 1, 10, 100],
    "svm__gamma": [0.01, 0.1, 1],
    "svm__kernel": ["rbf", "poly", "linear"]
}

svm_grid = GridSearchCV(
    estimator=Pipeline([
        ("scaler", StandardScaler()),
        ("svm", SVC(random_state=42))
    ]),
    param_grid=svm_param_grid,
    scoring="accuracy",
    cv=5,
    n_jobs=-1
)

svm_grid.fit(X_train, y_train)

best_svm = svm_grid.best_estimator_
y_pred_svm_best = best_svm.predict(X_test)
tuned_svm_acc = accuracy_score(y_test, y_pred_svm_best)

print("\n===== SVM Tuning Results =====")
print("Best parameters:", svm_grid.best_params_)
print(f"Best CV Accuracy: {svm_grid.best_score_:.4f}")
print(f"Test Accuracy (tuned SVM): {tuned_svm_acc:.4f}")


print("\n===== Final Accuracy Comparison =====")
print(f"Decision Tree  - Base:  {base_dt_acc:.4f}  | Tuned: {tuned_dt_acc:.4f}")
print(f"SVM            - Base:  {base_svm_acc:.4f}  | Tuned: {tuned_svm_acc:.4f}")


===== SVM Tuning Results =====
Best parameters: {'svm__C': 1, 'svm__gamma': 0.01, 'svm__kernel': 'linear'}
Best CV Accuracy: 1.0000
Test Accuracy (tuned SVM): 1.0000

===== Final Accuracy Comparison =====
Decision Tree  - Base:  1.0000  | Tuned: 1.0000
SVM            - Base:  1.0000  | Tuned: 1.0000
