In [1]:
import numpy as np

# Model arrays
model_RF = np.array([0.8206, 0.7823, 0.7982, 0.7840, 0.8640])
model_ET = np.array([0.7628, 0.7352, 0.7982, 0.7664, 0.8520])
model_ADA = np.array([0.8120, 0.7599, 0.7703, 0.8120, 0.8624])
model_GB = np.array([0.8080, 0.7094, 0.7599, 0.7664, 0.8607])
model_HGB = np.array([0.7674, 0.7094, 0.7933, 0.7664, 0.8799])
model_XGB = np.array([0.7887, 0.7094, 0.7950, 0.8470, 0.8712])
model_CB = np.array([0.7953, 0.7582, 0.7670, 0.7599, 0.8607])
model_STA = np.array([0.8399, 0.7823, 0.7895, 0.8152, 0.8832])

# Dictionary for easier iteration
models = {
    "RF": model_RF,
    "ET": model_ET,
    "ADA": model_ADA,
    "GB": model_GB,
    "HGB": model_HGB,
    "XGB": model_XGB,
    "CB": model_CB,
    "STA": model_STA
}

# Calculate and print mean for each model
for name, values in models.items():
    mean_val = np.mean(values)
    print(f"Mean (rata-rata) of model {name}: {mean_val:.4f}")


Mean (rata-rata) of model RF: 0.8098
Mean (rata-rata) of model ET: 0.7829
Mean (rata-rata) of model ADA: 0.8033
Mean (rata-rata) of model GB: 0.7809
Mean (rata-rata) of model HGB: 0.7833
Mean (rata-rata) of model XGB: 0.8023
Mean (rata-rata) of model CB: 0.7882
Mean (rata-rata) of model STA: 0.8220


In [1]:
# Core Libraries
import numpy as np
import pandas as pd
import os
import time
import tracemalloc
import joblib

# Machine Learning Models
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

# Metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score



In [12]:
import time
import tracemalloc
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Start timing and memory tracking
start_time = time.time()
tracemalloc.start()

# Load model
model_path = "D:/AAAAKK SKRIPSWEETT/Bismillah Code Skripsi V2/model_stack3.pkl"
model = joblib.load(model_path)

# Load data
csv_path = "D:/AAAAKK SKRIPSWEETT/Bismillah Code Skripsi V2/Dataset/time.csv"
df = pd.read_csv(csv_path, delimiter=";")



# Kolom yang dibutuhkan
selected_columns = [
    "bugs", "cbo", "dit", "fanin", "fanout", "lcom", "noc", "loc", "rfc", "wmc",
    "totalMethodsQty", "protectedMethodsQty", "publicMethodsQty", "privateMethodsQty",
    "finalFieldsQty", "protectedFieldsQty", "publicFieldsQty", "privateFieldsQty", 
]

# Preprocessing
df = df.dropna(subset=selected_columns)
X = df[[col for col in selected_columns if col != "bugs"]]
y_true = df["bugs"]

# Predict
y_pred = model.predict(X)
df["bug_predicted"] = y_pred


# Probabilities (if supported)
try:
    y_proba = model.predict_proba(X)[:, 1]
except AttributeError:
    y_proba = None

# Save prediction result
output_path = csv_path.replace("time.csv", "time_with_prediction.csv")
df.to_csv(output_path, index=False)

# Metrics
print("📊 Evaluation Metrics:")
print(f"Accuracy:  {accuracy_score(y_true, y_pred):.4f}")
print(f"Precision: {precision_score(y_true, y_pred, zero_division=0):.4f}")
print(f"Recall:    {recall_score(y_true, y_pred, zero_division=0):.4f}")
print(f"F1 Score:  {f1_score(y_true, y_pred, zero_division=0):.4f}")
if y_proba is not None:
    print(f"ROC AUC:   {roc_auc_score(y_true, y_proba):.4f}")
else:
    print("ROC AUC:   Model does not support predict_proba.")

# Execution time
execution_time = time.time() - start_time
print(f"Execution Time: {execution_time:.4f} seconds")


📊 Evaluation Metrics:
Accuracy:  0.6219
Precision: 0.1942
Recall:    0.7460
F1 Score:  0.3082
ROC AUC:   0.7106
Execution Time: 7.2941 seconds


In [None]:
import time
import tracemalloc
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Start timing and memory tracking
start_time = time.time()
tracemalloc.start()

# Load model
model_path = "D:/AAAAKK SKRIPSWEETT/Bismillah Code Skripsi V2/model_stack3.pkl"
model = joblib.load(model_path)

# Load data
csv_path = "D:/AAAAKK SKRIPSWEETT/Bismillah Code Skripsi V2/Dataset/time.csv"
df = pd.read_csv(csv_path, delimiter=",")

# Kolom yang dibutuhkan
selected_columns = [
    "class", "cbo", "dit", "fanin", "fanout", "lcom", "noc", "loc", "rfc", "wmc",
    "totalMethodsQty", "protectedMethodsQty", "publicMethodsQty", "privateMethodsQty",
    "finalFieldsQty", "protectedFieldsQty", "publicFieldsQty", "privateFieldsQty"
]

# Preprocessing
df = df.dropna(subset=selected_columns)
X = df[[col for col in selected_columns if col != "class"]]
y_true = df["class"]

# Predict
y_pred = model.predict(X)
df["bug_predicted"] = y_pred

# Pastikan label seragam (hindari campuran string & angka)
y_true = y_true.astype(int)
y_pred = y_pred.astype(int)


# Probabilities (if supported)
try:
    y_proba = model.predict_proba(X)[:, 1]
except AttributeError:
    y_proba = None

# Save prediction result
output_path = csv_path.replace("time.csv", "time_with_prediction.csv")
df.to_csv(output_path, index=False)

# Metrics
print("📊 Evaluation Metrics:")
print(f"Accuracy:  {accuracy_score(y_true, y_pred):.4f}")
print(f"Precision: {precision_score(y_true, y_pred, zero_division=0):.4f}")
print(f"Recall:    {recall_score(y_true, y_pred, zero_division=0):.4f}")
print(f"F1 Score:  {f1_score(y_true, y_pred, zero_division=0):.4f}")
if y_proba is not None:
    print(f"ROC AUC:   {roc_auc_score(y_true, y_proba):.4f}")
else:
    print("ROC AUC:   Model does not support predict_proba.")

# Execution time
execution_time = time.time() - start_time
print(f"Execution Time: {execution_time:.4f} seconds")
