{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ensemble Methods with Bank Marketing Dataset\\n",
    "\\n",
    "ဒီ Project မှာ Bank Marketing Dataset ကို အသုံးပြုပြီး ပိုမိုတိကျတဲ့ ခန့်မှန်းချက်တွေရဖို့ Ensemble Learning နည်းလမ်းတွေကို အသုံးပြုသွားမှာ ဖြစ်ပါတယ်။"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. Libraries ခေါ်ယူခြင်း\\n",
    "import pandas as pd\\n",
    "import numpy as np\\n",
    "import matplotlib.pyplot as plt\\n",
    "import seaborn as sns\\n",
    "from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score\\n",
    "from sklearn.preprocessing import LabelEncoder, StandardScaler\\n",
    "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\\n",
    "\\n",
    "# Ensemble Models\\n",
    "from sklearn.ensemble import RandomForestClassifier, VotingClassifier\\n",
    "from xgboost import XGBClassifier\\n",
    "\\n",
    "import warnings\\n",
    "warnings.filterwarnings('ignore')\\n",
    "\\n",
    "# 2. Data Load ပြုလုပ်ခြင်း\\n",
    "DATA_PATH = r'bank.csv'  # သင့်ဖိုင်လမ်းကြောင်းအတိုင်းပြောင်းပါ\\n",
    "df = pd.read_csv(DATA_PATH, sep=';')\\n",
    "\\n",
    "print('Data Loaded Successfully!')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3. Feature Engineering (Data ပြင်ဆင်ခြင်း)\\n",
    "# Categorical Columns များကို Numerical ပြောင်းလဲခြင်း\\n",
    "categorical_cols = df.select_dtypes(include=['object']).columns.tolist()\\n",
    "categorical_cols.remove('y')\\n",
    "\\n",
    "label_encoders = {}\\n",
    "for col in categorical_cols:\\n",
    "    le = LabelEncoder()\\n",
    "    df[col] = le.fit_transform(df[col])\\n",
    "    label_encoders[col] = le\\n",
    "\\n",
    "df['y'] = (df['y'] == 'yes').astype(int)\\n",
    "\\n",
    "# Feature အသစ်များ ဖန်တီးခြင်း\\n",
    "df['campaign_ratio'] = df['campaign'] / (df['pdays'] + 1)\\n",
    "df['previous_contact'] = (df['previous'] > 0).astype(int)\\n",
    "df['balance_per_age'] = df['balance'] / (df['age'] + 1)\\n",
    "\\n",
    "X = df.drop('y', axis=1)\\n",
    "y = df['y']\\n",
    "feature_names = X.columns.tolist()\\n",
    "\\n",
    "# Train/Test Split\\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\\n",
    "\\n",
    "print('Feature engineering completed.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4. Ensemble Method - 1: Random Forest (Bagging)\\n",
    "print('\\n' + '='*50)\\n",
    "print('RANDOM FOREST (BAGGING)')\\n",
    "print('='*50)\\n",
    "\\n",
    "rf_params = {\\n",
    "    'n_estimators': [100, 200, 300],\\n",
    "    'max_depth': [10, 15, 20],\\n",
    "    'min_samples_split': [5, 10],\\n",
    "    'min_samples_leaf': [2, 5],\\n",
    "    'max_features': ['sqrt', 'log2']\\n",
    "}\\n",
    "\\n",
    "rf_base = RandomForestClassifier(random_state=42, n_jobs=-1)\\n",
    "rf_grid = GridSearchCV(rf_base, rf_params, cv=5, scoring='accuracy', n_jobs=-1)\\n",
    "rf_grid.fit(X_train, y_train)\\n",
    "\\n",
    "best_rf = rf_grid.best_estimator_\\n",
    "print('Best Parameters:', rf_grid.best_params_)\\n",
    "\\n",
    "y_pred_rf = best_rf.predict(X_test)\\n",
    "rf_acc = accuracy_score(y_test, y_pred_rf)\\n",
    "print(f'Test Accuracy: {rf_acc:.4f}')\\n",
    "print('\\nClassification Report:')\\n",
    "print(classification_report(y_test, y_pred_rf))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5. Ensemble Method - 2: XGBoost (Boosting)\\n",
    "print('\\n' + '='*50)\\n",
    "print('XGBOOST (BOOSTING)')\\n",
    "print('='*50)\\n",
    "\\n",
    "xgb_params = {\\n",
    "    'n_estimators': [100, 200, 300],\\n",
    "    'learning_rate': [0.01, 0.05, 0.1],\\n",
    "    'max_depth': [3, 5, 7],\\n",
    "    'subsample': [0.8, 1.0],\\n",
    "    'colsample_bytree': [0.8, 1.0]\\n",
    "}\\n",
    "\\n",
    "xgb_base = XGBClassifier(random_state=42, eval_metric='logloss', use_label_encoder=False)\\n",
    "xgb_grid = GridSearchCV(xgb_base, xgb_params, cv=5, scoring='accuracy', n_jobs=-1)\\n",
    "xgb_grid.fit(X_train, y_train)\\n",
    "\\n",
    "best_xgb = xgb_grid.best_estimator_\\n",
    "print('Best Parameters:', xgb_grid.best_params_)\\n",
    "\\n",
    "y_pred_xgb = best_xgb.predict(X_test)\\n",
    "xgb_acc = accuracy_score(y_test, y_pred_xgb)\\n",
    "print(f'Test Accuracy: {xgb_acc:.4f}')\\n",
    "print('\\nClassification Report:')\\n",
    "print(classification_report(y_test, y_pred_xgb))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 6. Ensemble Method - 3: Voting Classifier (Hybrid)\\n",
    "print('\\n' + '='*50)\\n",
    "print('VOTING CLASSIFIER (HYBRID)')\\n",
    "print('='*50)\\n",
    "\\n",
    "voting_model = VotingClassifier(\\n",
    "    estimators=[('rf', best_rf), ('xgb', best_xgb)],\\n",
    "    voting='soft'\\n",
    ")\\n",
    "voting_model.fit(X_train, y_train)\\n",
    "\\n",
    "y_pred_vote = voting_model.predict(X_test)\\n",
    "vote_acc = accuracy_score(y_test, y_pred_vote)\\n",
    "\\n",
    "print(f'Test Accuracy: {vote_acc:.4f}')\\n",
    "print('\\nClassification Report:')\\n",
    "print(classification_report(y_test, y_pred_vote))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 7. Results Comparison\\n",
    "results = pd.DataFrame({\\n",
    "    'Model': ['Random Forest (Bagging)', 'XGBoost (Boosting)', 'Voting (Hybrid)'],\\n",
    "    'Accuracy': [rf_acc, xgb_acc, vote_acc]\\n",
    "}).sort_values('Accuracy', ascending=False)\\n",
    "\\n",
    "print('\\n' + '='*50)\\n",
    "print('MODEL COMPARISON')\\n",
    "print('='*50)\\n",
    "print(results)\\n",
    "\\n",
    "plt.figure(figsize=(10, 6))\\n",
    "sns.barplot(data=results, x='Accuracy', y='Model', palette='viridis')\\n",
    "plt.title('Model Accuracy Comparison')\\n",
    "plt.xlim(0.85, 0.95)  # ဒီဂဏန်းတွေကို ကိုယ့်ရလဒ်အရ ပြင်ပါ\\n",
    "plt.xlabel('Accuracy')\\n",
    "plt.tight_layout()\\n",
    "plt.savefig('model_comparison.png', dpi=300)\\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}