In [None]:
import time
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.datasets import load_iris

# Contoh parameter grid untuk beberapa algoritma
param_grids = {
    'KNN': {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance'],
        'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
    },
    'SVM': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': [0.001, 0.01, 0.1]
    },
    'DecisionTree': {
        'max_depth': [5, 10, 15],
        'min_samples_split': [2, 5],
        'criterion': ['gini', 'entropy']
    },
    'RandomForest': {
        'n_estimators': [50, 100, 150],
        'max_depth': [5, 10, 15],
        'min_samples_split': [2, 5]
    },
    'Boosting': {
        'n_estimators': [100, 150],  # Jumlah pohon
        'learning_rate': [0.05, 0.1],  # Learning rate
        'max_depth': [5, 7],  # Kedalaman maksimal setiap pohon
        'subsample': [0.8],  # Proporsi data yang digunakan untuk training tiap pohon
        'min_samples_split': [2, 5],  # Minimum sampel untuk membagi node
        'min_samples_leaf': [1, 2],  # Minimum sampel pada leaf node
        'max_features': [None, 'sqrt']  # Jumlah fitur yang dipertimbangkan di setiap split
}
}

# Fungsi untuk menghitung jumlah estimasi
def calculate_estimations(param_grid):
    total_estimations = 1
    for param, values in param_grid.items():
        total_estimations *= len(values)  # Mengalikan jumlah pilihan tiap parameter
    return total_estimations

# Fungsi untuk menghitung waktu per estimasi
def estimate_time_for_model(model, param_grid, X_train, y_train):
    # Lakukan GridSearchCV hanya untuk mengukur waktu per estimasi
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=2, n_jobs=1, verbose=0)
    
    start_time = time.time()
    grid_search.fit(X_train, y_train)  # Latih model untuk satu kali pencarian
    end_time = time.time()
    
    # Hitung waktu yang diperlukan untuk satu estimasi
    time_per_estimation = (end_time - start_time) / len(grid_search.cv_results_['params'])  # Waktu per estimasi dihitung
    return time_per_estimation

# Memuat dataset untuk contoh
X, y = load_iris(return_X_y=True)

# Menghitung estimasi untuk setiap model dan waktu yang dibutuhkan
estimations = {}
time_estimations = {}
for model_name, param_grid in param_grids.items():
    model = None
    if model_name == 'KNN':
        model = KNeighborsClassifier()
    elif model_name == 'SVM':
        model = SVC()
    elif model_name == 'DecisionTree':
        model = DecisionTreeClassifier()
    elif model_name == 'RandomForest':
        model = RandomForestClassifier()
    elif model_name == 'Boosting':
        model = GradientBoostingClassifier()
    
    estimations[model_name] = calculate_estimations(param_grid)
    
    # Estimasi waktu per estimasi
    time_per_estimation = estimate_time_for_model(model, param_grid, X, y)
    
    # Estimasi total waktu = estimasi jumlah * waktu per estimasi
    total_time_estimate = estimations[model_name] * time_per_estimation
    time_estimations[model_name] = total_time_estimate

# Menampilkan estimasi hasil
for model_name in estimations:
    print(f"\nModel {model_name} membutuhkan {estimations[model_name]} estimasi.")
    print(f"Estimasi waktu total untuk {model_name}: {time_estimations[model_name]:.2f} detik.")


In [None]:
# Mengkomputasi datan_scaled menggunakan PCA() untuk menghasilkan elemen yang digunakan
dataclean = dm.loc[:, dm.columns.isin(num_feat)]
datan_scaled = StandardScaler().fit_transform(dataclean)
pca = PCA()
pca.fit(datan_scaled)
print(len(pca.explained_variance_ratio_))   # Mengidentifikasi proporsi varians data komponen
print(len(pca.explained_variance_))  		# Mengidentifikasi nilai varians
# Plot Nilai kumulatif dari explained_variance_ratio_
plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.grid()
plt.plot(range(0, 5), np.cumsum(pca.explained_variance_ratio_ * 100),  marker='o')
# Membuat garis vertikal dan horizontal pada batas threshold 95%
plt.axhline(y=95, color='r', linestyle='--', label='95% Explained Variance')
plt.axvline(x=np.argmax(np.cumsum(pca.explained_variance_ratio_ * 100) >= 95), color='g', linestyle='--', label='Number of Components for 95%')
plt.xlabel('Number of components')
plt.ylabel('Explained Variance Ratio - Cummulative')
plt.legend()
plt.grid(True)
plt.tight_layout()
# Check the number of features if we want to retain 95 % of the information
cumsum = np.cumsum(pca.explained_variance_ratio_)
num_features = np.argmax(cumsum >= 0.95) + 1
print(f'Jumlah PC : {num_features}')

print(f'Threshold : {cumsum[4]}') 
cumsum

In [None]:
results_accuracy_train = {}
results_recall_train = {}
results_f1score_train = {}
predictions_train = {}

results_accuracy = {}
results_recall = {}
results_f1score = {}
predictions = {}

# Melakukan prediksi dan evaluasi untuk setiap model yang sudah dilatih
for model_name, pipeline in pipelines.items():
    
    # Melatih pipeline terlebih dahulu
    pipeline.fit(X_train, y_train) 

    # Prediksi
    y_pred_train= pipeline.predict(X_train)
    y_pred_test= pipeline.predict(X_test)
    
    # Evaluasi akurasi
    accuracy_train = accuracy_score(y_train, y_pred_train)
    recall_train = recall_score(y_train, y_pred_train)
    f1_train = f1_score(y_train, y_pred_train)

    accuracy = accuracy_score(y_test, y_pred_test)
    recall = recall_score(y_test, y_pred_test)
    f1 = f1_score(y_test, y_pred_test)
    
    # Simpan hasil akurasi
    results_accuracy_train[model_name] = accuracy_train
    results_recall_train[model_name] = recall_train
    results_f1score_train[model_name] = f1_train

    results_accuracy[model_name] = accuracy
    results_recall[model_name] = recall
    results_f1score[model_name] = f1

    # Simpan hasil prediksi dalam dictionary
    predictions_train[model_name] = y_pred_train
    predictions[model_name] = y_pred_test

# Menampilkan hasil evaluasi
print('=== Train ===')
print(f'Accuracy :{results_accuracy_train}')
print(f'Recall :{results_recall_train}')
print(f'F1Score :{results_f1score_train}')
print(predictions_train)
print()
print('=== Test ===')
# Menampilkan hasil evaluasi
print(f'Accuracy :{results_accuracy}')
print(f'Recall :{results_recall}')
print(f'F1Score :{results_f1score}')
print(predictions)