# 🧠 Modul 6: Model Building & Hyperparameter Tuning

Notebook ini membahas cara membangun model Machine Learning dan melakukan tuning hyperparameter untuk meningkatkan akurasi.

In [None]:
# 📥 1. Import Library
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import warnings
warnings.filterwarnings('ignore')

In [None]:
# 📊 2. Load Data dan Split
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape

## 🔍 3. Model Baseline: Logistic Regression

In [None]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

## 🌲 4. Model Random Forest + Grid Search

In [None]:
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5]
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Best Params:", grid_search.best_params_)
y_pred_rf = grid_search.best_estimator_.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_rf))

## 🎲 5. Randomized Search (Logistic Regression)

In [None]:
param_dist = {
    'C': [0.01, 0.1, 1.0, 10.0, 100.0],
    'solver': ['liblinear', 'lbfgs']
}

rand_search = RandomizedSearchCV(LogisticRegression(), param_distributions=param_dist, n_iter=5, cv=5, random_state=42)
rand_search.fit(X_train, y_train)

print("Best Params:", rand_search.best_params_)
y_pred_rs = rand_search.best_estimator_.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_rs))