In [None]:
import pandas as pd
import logging
import os
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (
    RandomForestClassifier,
    GradientBoostingClassifier,
    AdaBoostClassifier,
    VotingClassifier,
    BaggingClassifier,
    StackingClassifier
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix,
    classification_report
)

LOG_PATH = r"C:\Users\Rasulbek907\Desktop\Hotel Booking Cancellation Prediction\Log\data_loader.log"
os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)

logging.basicConfig(
    filename=LOG_PATH,
    filemode='a',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

logging.info("Jarayon Boshlandi")

train_path = r"C:\Users\Rasulbek907\Desktop\Hotel Booking Cancellation Prediction\Data\Preprosessed\Train.csv"
test_path  = r"C:\Users\Rasulbek907\Desktop\Hotel Booking Cancellation Prediction\Data\Preprosessed\Test.csv"

train_df = pd.read_csv(train_path)
test_df  = pd.read_csv(test_path)

logging.info(f"Train shape: {train_df.shape}")
logging.info(f"Test shape: {test_df.shape}")

X_train = train_df.drop(columns=['is_canceled'])
y_train = train_df['is_canceled']

X_test = test_df.drop(columns=['is_canceled'])
y_test = test_df['is_canceled']

logging.info("Target ajratildi")


logreg = LogisticRegression(max_iter=1000)
dtree = DecisionTreeClassifier(random_state=42)
rf    = RandomForestClassifier(n_estimators=200, random_state=42)
knn   = KNeighborsClassifier(n_neighbors=5)
svc   = SVC(kernel='rbf', probability=True)

base_models = [
    ("Logistic Regression", logreg),
    ("Decision Tree", dtree),
    ("Random Forest", rf),
    ("KNN", knn),
    ("SVM", svc)
]


voting_clf = VotingClassifier(
    estimators=base_models,
    voting='soft'
)

# Bagging (base estimator = Decision Tree)
bagging_clf = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators=50,
    random_state=42
)

# Boosting
adaboost_clf = AdaBoostClassifier(
    base_estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=50,
    random_state=42
)

gradboost_clf = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    random_state=42
)

# Stacking Classifier
stacking_clf = StackingClassifier(
    estimators=[
        ('lr', logreg),
        ('dt', dtree),
        ('rf', rf)
    ],
    final_estimator=LogisticRegression()
)

ensemble_models = [
    ("Voting Classifier", voting_clf),
    ("Bagging Classifier", bagging_clf),
    ("AdaBoost Classifier", adaboost_clf),
    ("GradientBoosting Classifier", gradboost_clf),
    ("Stacking Classifier", stacking_clf)
]


results = []

for name, model in ensemble_models:
    logging.info(f"{name} train qilinmoqda")
    print(f"\n===== {name} =====")
    
    # Train
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:,1] if hasattr(model, "predict_proba") else None
    
    # Metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_proba) if y_proba is not None else None
    
    results.append({
        "Model": name,
        "Accuracy": acc,
        "Precision": prec,
        "Recall": rec,
        "F1-score": f1,
        "ROC-AUC": roc_auc
    })
    
    logging.info(f"{name} metrics - Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}, ROC-AUC: {roc_auc}")
    
    print(classification_report(y_test, y_pred))
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title(f"{name} - Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by="Accuracy", ascending=False)

print("\nðŸ“Š ENSEMBLE MODEL METRICS NATIJALARI")
results_df

logging.info("Jarayon Tugadi") 