<a href="https://colab.research.google.com/github/putriayualc/Machine-Learning/blob/main/Jobsheet-6/Tugas_Jobsheet6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Tugas 1
Terdapat dataset mushroom. Berdasarkan dataset yang tersebut, bandingkan peforma antara algoritma Decision Tree dan RandomForest. Gunakan tunning hyperparameter untuk mendapatkan parameter dan akurasi yang terbaik.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 1. Muat Dataset
data = pd.read_csv('/content/mushrooms.csv')

# 2. Pra-Pemrosesan Data
# Misalnya, kolom 'class' adalah label dan sisanya adalah fitur
X = data.drop('class', axis=1)
y = data['class']

# Encode fitur kategorikal
X = pd.get_dummies(X)

# Bagi data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Tuning Hyperparameter
# Tuning untuk Decision Tree
dt_param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

dt_grid_search = GridSearchCV(DecisionTreeClassifier(), dt_param_grid, cv=5)
dt_grid_search.fit(X_train, y_train)

# Tuning untuk Random Forest
rf_param_grid = {
    'n_estimators': [50, 100, 200],
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

rf_grid_search = GridSearchCV(RandomForestClassifier(), rf_param_grid, cv=5)
rf_grid_search.fit(X_train, y_train)

# 4. Evaluasi Model
# Evaluasi Decision Tree
dt_best_model = dt_grid_search.best_estimator_
dt_predictions = dt_best_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_predictions)

# Evaluasi Random Forest
rf_best_model = rf_grid_search.best_estimator_
rf_predictions = rf_best_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)

# 5. Hasil
print(f'Best Decision Tree Accuracy: {dt_accuracy:.4f}')
print(f'Best Random Forest Accuracy: {rf_accuracy:.4f}')


Best Decision Tree Accuracy: 1.0000
Best Random Forest Accuracy: 1.0000


## Tugas 2
Terdapat dataset mushroom. Berdasarkan dataset tersebut, bandingkan peforma antara algoritma Decision Tree dan AdaBoost. Gunakan tunning hyperparameter untuk mendapatkan parameter dan akurasi yang terbaik.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# 1. Muat dataset
data = pd.read_csv('/content/mushrooms.csv')

# 2. Pra-Pemrosesan Data
# Memisahkan fitur dan label
X = data.drop('class', axis=1)  # Fitur
y = data['class']                # Label

# 3. Encoding Kategorikal
# Menggunakan OneHotEncoder untuk mengonversi kolom kategorikal
encoder = ColumnTransformer(transformers=[
    ('cat', OneHotEncoder(), X.columns)
])

X_encoded = encoder.fit_transform(X)

# 4. Pisahkan Data Menjadi Data Latih dan Data Uji
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# 5. Tuning Hyperparameter untuk Decision Tree
dt_param_grid = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10]
}
dt_grid_search = GridSearchCV(DecisionTreeClassifier(), dt_param_grid, cv=5, scoring='accuracy')
dt_grid_search.fit(X_train, y_train)
dt_best_model = dt_grid_search.best_estimator_

# 6. Tuning Hyperparameter untuk AdaBoost
ab_param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1]
}
ab_grid_search = GridSearchCV(AdaBoostClassifier(algorithm='SAMME'), ab_param_grid, cv=5, scoring='accuracy')
ab_grid_search.fit(X_train, y_train)
ab_best_model = ab_grid_search.best_estimator_

# 7. Evaluasi Model
# Decision Tree
dt_y_pred = dt_best_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_y_pred)

# AdaBoost
ab_y_pred = ab_best_model.predict(X_test)
ab_accuracy = accuracy_score(y_test, ab_y_pred)

# 8. Hasil
print(f"Decision Tree Best Parameters: {dt_grid_search.best_params_}, Accuracy: {dt_accuracy:.4f}")
print(f"AdaBoost Best Parameters: {ab_grid_search.best_params_}, Accuracy: {ab_accuracy:.4f}")


Decision Tree Best Parameters: {'max_depth': None, 'min_samples_split': 2}, Accuracy: 1.0000
AdaBoost Best Parameters: {'learning_rate': 1, 'n_estimators': 100}, Accuracy: 1.0000


## Tugas 3

Dengan menggunakan dataset diabetes, buatlah ensemble voting dengan algoritma

1. Logistic Regression

2. SVM kernel polynomial

3. Decission Tree

Anda boleh melakukan eksplorasi dengan melakukan tunning hyperparameter

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# 1. Muat dataset
data = pd.read_csv('/content/diabetes.csv')

# 2. Pra-Pemrosesan Data
# Memisahkan fitur dan label
X = data.drop('Outcome', axis=1)  # Fitur
y = data['Outcome']                # Label

# 3. Pisahkan Data Menjadi Data Latih dan Data Uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Standardisasi Fitur
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Tuning Hyperparameter untuk Logistic Regression
lr_param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10],
    'max_iter': [100, 200, 300]
}
lr_grid_search = GridSearchCV(LogisticRegression(), lr_param_grid, cv=5, scoring='accuracy')
lr_grid_search.fit(X_train_scaled, y_train)
lr_best_model = lr_grid_search.best_estimator_

# 6. Tuning Hyperparameter untuk SVM
svm_param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10],
    'degree': [2, 3, 4],  # degree untuk kernel polynomial
    'gamma': ['scale', 'auto']
}
svm_grid_search = GridSearchCV(SVC(kernel='poly'), svm_param_grid, cv=5, scoring='accuracy')
svm_grid_search.fit(X_train_scaled, y_train)
svm_best_model = svm_grid_search.best_estimator_

# 7. Tuning Hyperparameter untuk Decision Tree
dt_param_grid = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10]
}
dt_grid_search = GridSearchCV(DecisionTreeClassifier(), dt_param_grid, cv=5, scoring='accuracy')
dt_grid_search.fit(X_train, y_train)
dt_best_model = dt_grid_search.best_estimator_

# 8. Ensemble Voting Classifier
voting_model = VotingClassifier(estimators=[
    ('lr', lr_best_model),
    ('svm', svm_best_model),
    ('dt', dt_best_model)
], voting='hard')

# 9. Melatih Model Voting Classifier
voting_model.fit(X_train_scaled, y_train)

# 10. Evaluasi Model
y_pred = voting_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)

# 11. Hasil
print(f"Logistic Regression Best Parameters: {lr_grid_search.best_params_}")
print(f"SVM Best Parameters: {svm_grid_search.best_params_}")
print(f"Decision Tree Best Parameters: {dt_grid_search.best_params_}")
print(f"Voting Classifier Accuracy: {accuracy:.4f}")


Logistic Regression Best Parameters: {'C': 10, 'max_iter': 100}
SVM Best Parameters: {'C': 10, 'degree': 3, 'gamma': 'scale'}
Decision Tree Best Parameters: {'max_depth': 5, 'min_samples_split': 10}
Voting Classifier Accuracy: 0.7727
