<a href="https://colab.research.google.com/github/klaxman23/August_pratice/blob/main/Module_14_Case_Study_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

In [None]:
try:
    df = pd.read_csv("letterCG.data", header=None)
    print("Dataset loaded")

except Exception:
    print("File issue detected → using dummy dataset")

    np.random.seed(42)
    n_samples = 1300
    n_features = 16

    X_dummy = np.random.rand(n_samples, n_features)
    y_dummy = np.random.choice(list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), n_samples)

    df = pd.DataFrame(X_dummy)
    df["target"] = y_dummy

In [None]:
# If dataset has less than 2 columns, rebuild it
if df.shape[1] < 2:
    print("Invalid dataset format → rebuilding dataset")

    np.random.seed(42)
    X_dummy = np.random.rand(1300, 16)
    y_dummy = np.random.choice(list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), 1300)

    df = pd.DataFrame(X_dummy)
    df["target"] = y_dummy

In [None]:
X = df.iloc[:, :-1].to_numpy(dtype=float)
y = df.iloc[:, -1].to_numpy()

In [None]:
if y.dtype == object:
    y = LabelEncoder().fit_transform(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=42
)

In [None]:
depth1_acc = []

for n in range(1, 17):
    base = DecisionTreeClassifier(max_depth=1, random_state=42)

    model = AdaBoostClassifier(
        estimator=base,
        n_estimators=n,
        algorithm="SAMME",
        random_state=42
    )

    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    depth1_acc.append(accuracy_score(y_test, preds))

In [None]:
plt.figure()
plt.plot(range(1, 17), depth1_acc, marker='o')
plt.xlabel("Number of Weak Learners")
plt.ylabel("Test Accuracy")
plt.title("AdaBoost Accuracy (Decision Tree depth = 1)")
plt.grid()
plt.show()

In [None]:
depth2_acc = []

for n in range(1, 17):
    base = DecisionTreeClassifier(max_depth=2, random_state=42)

    model = AdaBoostClassifier(
        estimator=base,
        n_estimators=n,
        algorithm="SAMME",
        random_state=42
    )

    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    depth2_acc.append(accuracy_score(y_test, preds))

In [None]:
plt.figure()
plt.plot(range(1, 17), depth2_acc, marker='o')
plt.xlabel("Number of Weak Learners")
plt.ylabel("Test Accuracy")
plt.title("AdaBoost Accuracy (Decision Tree depth = 2)")
plt.grid()
plt.show()

In [None]:
results = pd.DataFrame({
    "Weak Learners": range(1, 17),
    "Depth_1_Accuracy": depth1_acc,
    "Depth_2_Accuracy": depth2_acc
})

results