<a href="https://colab.research.google.com/github/makhlufiaero338/tugas-machine-learning/blob/main/tugasperbaikan/Tugas_perbaikan_bab5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install necessary libraries (if not already installed)
!pip install pandas numpy scikit-learn



In [2]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, StratifiedKFold
from sklearn.metrics import classification_report, roc_auc_score, mean_squared_error, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.datasets import make_classification, make_regression

In [3]:
# Generate synthetic data for classification and regression tasks
X_class, y_class = make_classification(n_samples=500, n_features=10, n_informative=5, n_classes=2, random_state=42)
X_reg, y_reg = make_regression(n_samples=500, n_features=10, noise=0.1, random_state=42)

In [4]:
# Split data into training and testing sets
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

In [5]:
# ----------------------------------------
# Cross-Validation
# ----------------------------------------

# Cross-validation example
clf = RandomForestClassifier(random_state=42)
cv_scores = cross_val_score(clf, X_train_class, y_train_class, cv=5)
print("Cross-Validation Scores:", cv_scores)
print("Mean CV Score:", np.mean(cv_scores))

# Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=5)
stratified_scores = []
for train_idx, test_idx in skf.split(X_train_class, y_train_class):
    clf.fit(X_train_class[train_idx], y_train_class[train_idx])
    score = clf.score(X_train_class[test_idx], y_train_class[test_idx])
    stratified_scores.append(score)
print("\nStratified K-Fold Scores:", stratified_scores)

Cross-Validation Scores: [0.925  0.925  0.925  0.9    0.8875]
Mean CV Score: 0.9125

Stratified K-Fold Scores: [0.925, 0.925, 0.925, 0.9, 0.8875]


In [6]:
# ----------------------------------------
# Grid Search
# ----------------------------------------

# Simple Grid Search
param_grid = {'n_estimators': [10, 50, 100], 'max_depth': [None, 10, 20]}
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3)
grid_search.fit(X_train_class, y_train_class)
print("\nBest Parameters (Grid Search):", grid_search.best_params_)

# Grid Search with Cross-Validation
svc_param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
svc_grid_search = GridSearchCV(SVC(), svc_param_grid, cv=5, scoring='accuracy')
svc_grid_search.fit(X_train_class, y_train_class)
print("\nBest Parameters (SVC Grid Search with Cross-Validation):", svc_grid_search.best_params_)



Best Parameters (Grid Search): {'max_depth': None, 'n_estimators': 100}

Best Parameters (SVC Grid Search with Cross-Validation): {'C': 10, 'kernel': 'rbf'}


In [8]:
# ----------------------------------------
# Evaluation Metrics
# ----------------------------------------

# Metrics for Binary Classification
best_clf = grid_search.best_estimator_
y_pred = best_clf.predict(X_test_class)
print("\nClassification Report:")
print(classification_report(y_test_class, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test_class, best_clf.predict_proba(X_test_class)[:, 1]))

# Metrics for Multiclass Classification (using synthetic multiclass dataset)
X_multi, y_multi = make_classification(n_samples=500, n_features=10, n_classes=3, n_informative=5, random_state=42)
X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(X_multi, y_multi, test_size=0.2, random_state=42)
multi_clf = RandomForestClassifier(random_state=42)
multi_clf.fit(X_train_multi, y_train_multi)
y_pred_multi = multi_clf.predict(X_test_multi)
print("\nClassification Report (Multiclass):")
print(classification_report(y_test_multi, y_pred_multi))




Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.96      0.92        50
           1       0.96      0.88      0.92        50

    accuracy                           0.92       100
   macro avg       0.92      0.92      0.92       100
weighted avg       0.92      0.92      0.92       100

ROC-AUC Score: 0.9674

Classification Report (Multiclass):
              precision    recall  f1-score   support

           0       0.78      0.93      0.85        30
           1       0.77      0.97      0.86        35
           2       0.85      0.49      0.62        35

    accuracy                           0.79       100
   macro avg       0.80      0.80      0.78       100
weighted avg       0.80      0.79      0.77       100



In [10]:
# Regression Metrics (Fixed)
from sklearn.ensemble import RandomForestRegressor

# Menggunakan model regresi yang sesuai
reg_model = RandomForestRegressor(random_state=42)
reg_model.fit(X_train_reg, y_train_reg)
y_pred_reg = reg_model.predict(X_test_reg)

# Evaluasi dengan Mean Squared Error
print("\nMean Squared Error (Regression):", mean_squared_error(y_test_reg, y_pred_reg))



Mean Squared Error (Regression): 4046.179121828416


In [11]:
# ----------------------------------------
# Summary
# ----------------------------------------

# Final summary of models and metrics
print("\nSummary:")
print(f"Binary Classification ROC-AUC Score: {roc_auc_score(y_test_class, best_clf.predict_proba(X_test_class)[:, 1])}")
print(f"Regression Mean Squared Error: {mean_squared_error(y_test_reg, y_pred_reg)}")


Summary:
Binary Classification ROC-AUC Score: 0.9674
Regression Mean Squared Error: 4046.179121828416
