In [13]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Decision Tree & Random Forest with Bank Marketing Dataset\\n",
    "\\n",
    "Bank Marketing Dataset ကို အသုံးပြုပြီး ဖောက်သည်တစ်ယောက်က ဘဏ်ရဲ့ Term Deposit ကို ဝယ်မလား၊ မဝယ်ဘူးလားဆိုတာကို ကြိုတင်ခန့်မှန်းခြင်း။\\n",
    "\\n",
    "**Dataset**: https://www.kaggle.com/datasets/janiobachmann/bank-marketing-dataset\\n",
    "\\n",
    "**Goal**: ဖောက်သည်၏ အချက်အလက်များကိုအခြေခံပြီး Term Deposit စာရင်းသွင်းမှု (Yes/No) ကို ခန့်မှန်းခြင်း။"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. Libraries ခေါ်ယူခြင်း\\n",
    "import numpy as np\\n",
    "import pandas as pd\\n",
    "import matplotlib.pyplot as plt\\n",
    "import seaborn as sns\\n",
    "from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score\\n",
    "from sklearn.preprocessing import LabelEncoder, StandardScaler\\n",
    "from sklearn.tree import DecisionTreeClassifier, plot_tree\\n",
    "from sklearn.ensemble import RandomForestClassifier\\n",
    "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\\n",
    "import warnings\\n",
    "warnings.filterwarnings('ignore')\\n",
    "\\n",
    "# 2. Data Load ပြုလုပ်ခြင်း\\n",
    "DATA_PATH = r'bank.csv'  # သင့်ဖိုင်လမ်းကြောင်းအတိုင်းပြောင်းပါ\\n",
    "df = pd.read_csv(DATA_PATH, sep=';')\\n",
    "\\n",
    "print('Data Loaded Successfully!')\\n",
    "print('Dataset shape:', df.shape)\\n",
    "display(df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3. Data Overview & Preprocessing\\n",
    "print('\\n=== Data Info ===')\\n",
    "print(df.info())\\n",
    "\\n",
    "print('\\n=== Target Variable Distribution (y) ===')\\n",
    "print(df['y'].value_counts())\\n",
    "\\n",
    "# 4. Categorical Columns များကို Numerical ပြောင်းလဲခြင်း (Label Encoding)\\n",
    "categorical_cols = df.select_dtypes(include=['object']).columns.tolist()\\n",
    "categorical_cols.remove('y')  # Target ကိုဖယ်ထားပါ\\n",
    "\\n",
    "label_encoders = {}\\n",
    "for col in categorical_cols:\\n",
    "    le = LabelEncoder()\\n",
    "    df[col] = le.fit_transform(df[col])\\n",
    "    label_encoders[col] = le\\n",
    "\\n",
    "# Target variable ကိုလည်း 0/1 အဖြစ်ပြောင်းခြင်း\\n",
    "df['y'] = (df['y'] == 'yes').astype(int)\\n",
    "\\n",
    "print('\\nCategorical encoding completed.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5. Feature Engineering (အချက်အလက်သစ်များ ဖန်တီးခြင်း)\\n",
    "df['campaign_ratio'] = df['campaign'] / (df['pdays'] + 1)\\n",
    "df['previous_contact'] = (df['previous'] > 0).astype(int)\\n",
    "df['balance_per_age'] = df['balance'] / (df['age'] + 1)\\n",
    "\\n",
    "# 6. Feature နဲ့ Target ခွဲခြားခြင်း\\n",
    "X = df.drop('y', axis=1)\\n",
    "y = df['y']\\n",
    "\\n",
    "feature_names = X.columns.tolist()\\n",
    "print('Features shape:', X.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 7. Train/Test Split (80% Train, 20% Test)\\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\\n",
    "\\n",
    "print(f'Training set size: {X_train.shape}')\\n",
    "print(f'Test set size: {X_test.shape}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 8. Decision Tree Model တည်ဆောက်ခြင်း (Hyperparameter Tuning with GridSearchCV)\\n",
    "dt_params = {\\n",
    "    'criterion': ['gini', 'entropy'],\\n",
    "    'max_depth': [5, 10, 15, 20, None],\\n",
    "    'min_samples_split': [5, 10, 20],\\n",
    "    'min_samples_leaf': [2, 5, 10]\\n",
    "}\\n",
    "\\n",
    "dt_base = DecisionTreeClassifier(random_state=42)\\n",
    "dt_grid = GridSearchCV(dt_base, dt_params, cv=5, scoring='accuracy', n_jobs=-1)\\n",
    "dt_grid.fit(X_train, y_train)\\n",
    "\\n",
    "best_dt = dt_grid.best_estimator_\\n",
    "print('Best Decision Tree Parameters:', dt_grid.best_params_)\\n",
    "\\n",
    "# 9. ခန့်မှန်းချက်ထုတ်ခြင်း & Evaluation\\n",
    "y_pred_dt = best_dt.predict(X_test)\\n",
    "dt_acc = accuracy_score(y_test, y_pred_dt)\\n",
    "\\n",
    "print(f'\\nDecision Tree Test Accuracy: {dt_acc:.4f}')\\n",
    "print('\\nClassification Report:')\\n",
    "print(classification_report(y_test, y_pred_dt))\\n",
    "\\n",
    "# 10. Decision Tree Visualization\\n",
    "plt.figure(figsize=(20, 10))\\n",
    "plot_tree(best_dt, feature_names=feature_names, class_names=['No', 'Yes'], \\n",
    "          filled=True, rounded=True, fontsize=8, max_depth=3)\\n",
    "plt.title('Decision Tree (First 3 Levels)')\\n",
    "plt.tight_layout()\\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 11. Random Forest Model တည်ဆောက်ခြင်း (Hyperparameter Tuning)\\n",
    "rf_params = {\\n",
    "    'n_estimators': [100, 200, 300],\\n",
    "    'max_depth': [10, 15, 20],\\n",
    "    'min_samples_split': [5, 10],\\n",
    "    'min_samples_leaf': [2, 5],\\n",
    "    'max_features': ['sqrt', 'log2']\\n",
    "}\\n",
    "\\n",
    "rf_base = RandomForestClassifier(random_state=42, n_jobs=-1)\\n",
    "rf_grid = GridSearchCV(rf_base, rf_params, cv=5, scoring='accuracy', n_jobs=-1)\\n",
    "rf_grid.fit(X_train, y_train)\\n",
    "\\n",
    "best_rf = rf_grid.best_estimator_\\n",
    "print('Best Random Forest Parameters:', rf_grid.best_params_)\\n",
    "\\n",
    "# 12. ခန့်မှန်းချက်ထုတ်ခြင်း\\n",
    "y_pred_rf = best_rf.predict(X_test)\\n",
    "rf_acc = accuracy_score(y_test, y_pred_rf)\\n",
    "\\n",
    "print(f'\\nRandom Forest Test Accuracy: {rf_acc:.4f}')\\n",
    "print('\\nClassification Report:')\\n",
    "print(classification_report(y_test, y_pred_rf))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 13. Feature Importance (Random Forest မှ)\\n",
    "feature_importance = pd.DataFrame({\\n",
    "    'feature': feature_names,\\n",
    "    'importance': best_rf.feature_importances_\\n",
    "}).sort_values('importance', ascending=False)\\n",
    "\\n",
    "plt.figure(figsize=(10, 6))\\n",
    "plt.barh(feature_importance['feature'][:10], feature_importance['importance'][:10])\\n",
    "plt.xlabel('Importance')\\n",
    "plt.ylabel('Feature')\\n",
    "plt.title('Top 10 Feature Importance (Random Forest)')\\n",
    "plt.gca().invert_yaxis()\\n",
    "plt.tight_layout()\\n",
    "plt.show()\\n",
    "\\n",
    "print('\\nTop 10 Features:')\\n",
    "display(feature_importance.head(10))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}

{'cells': [{'cell_type': 'markdown',
   'metadata': {},
   'source': ['# Decision Tree & Random Forest with Bank Marketing Dataset\\n',
    '\\n',
    'Bank Marketing Dataset ကို အသုံးပြုပြီး ဖောက်သည်တစ်ယောက်က ဘဏ်ရဲ့ Term Deposit ကို ဝယ်မလား၊ မဝယ်ဘူးလားဆိုတာကို ကြိုတင်ခန့်မှန်းခြင်း။\\n',
    '\\n',
    '**Dataset**: https://www.kaggle.com/datasets/janiobachmann/bank-marketing-dataset\\n',
    '\\n',
    '**Goal**: ဖောက်သည်၏ အချက်အလက်များကိုအခြေခံပြီး Term Deposit စာရင်းသွင်းမှု (Yes/No) ကို ခန့်မှန်းခြင်း။']},
  {'cell_type': 'code',
   'execution_count': None,
   'metadata': {},
   'outputs': [],
   'source': ['# 1. Libraries ခေါ်ယူခြင်း\\n',
    'import numpy as np\\n',
    'import pandas as pd\\n',
    'import matplotlib.pyplot as plt\\n',
    'import seaborn as sns\\n',
    'from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score\\n',
    'from sklearn.preprocessing import LabelEncoder, StandardScaler\\n',
    'from sklearn.tree import DecisionTreeClass