### Decision Tree Example

In [1]:
%%capture
!pip install -r requirements.txt

In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

data = np.asarray( pd.read_csv('data.csv', header=None) )
X = data[:,0:2]
y = data[:,2]

#model = DecisionTreeClassifier()
model = DecisionTreeClassifier(max_depth=5, min_samples_leaf=10)
model.fit(X,y)

y_pred = model.predict(X)
acc = accuracy_score(y, y_pred)
print(acc)

0.8333333333333334


### Random Forest Example

In [4]:
# Random Forest Classifier Example
# Random Forest ဆိုတာ Decision Tree အများကြီးကို ပေါင်းစပ်ထားတဲ့ Ensemble Method တစ်ခုပါ
# Tree တစ်ခုချင်းစီက vote ပေးပြီး majority voting နဲ့ final prediction ထုတ်ပါတယ်

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Data Loading - CSV file ကနေ data ဖတ်ပါမယ်
data = np.asarray(pd.read_csv('data.csv', header=None))
X = data[:, 0:2]  # Features (x, y coordinates)
y = data[:, 2]     # Labels (0 or 1)

# Train/Test Split - 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest Model တည်ဆောက်ခြင်း
# n_estimators = tree အရေအတွက် (100 trees)
# max_depth = tree တစ်ခုချင်းစီရဲ့ အနက်ဆုံးအဆင့်
# random_state = reproducibility အတွက်
rf_model = RandomForestClassifier(
    n_estimators=100,      # tree 100 ခုသုံးမယ်
    max_depth=5,           # tree တစ်ခုချင်း အနက်ဆုံး 5 ဆင့်
    min_samples_leaf=5,    # leaf node မှာ အနည်းဆုံး sample 5 ခုရှိရမယ်
    random_state=42
)

# Model Training
rf_model.fit(X_train, y_train)

# Prediction
y_pred = rf_model.predict(X_test)

# Accuracy တွက်ချက်ခြင်း
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Feature Importance - ဘယ် feature က ပိုအရေးကြီးလဲ ကြည့်နိုင်တယ်
print("Feature Importances:", rf_model.feature_importances_)

Random Forest Accuracy: 0.8500

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      0.67      0.80         9
         1.0       0.79      1.00      0.88        11

    accuracy                           0.85        20
   macro avg       0.89      0.83      0.84        20
weighted avg       0.88      0.85      0.84        20

Feature Importances: [0.46163134 0.53836866]


### Essemble Method Example

In [None]:
# Ensemble Methods Example
# Ensemble Method ဆိုတာ model အများကြီးကို ပေါင်းစပ်ပြီး ပိုကောင်းတဲ့ prediction ရအောင် လုပ်တာပါ
# အဓိက နည်းလမ်း ၃ မျိုးရှိပါတယ်: Bagging, Boosting, Voting

import numpy as np
import pandas as pd
from sklearn.ensemble import (
    RandomForestClassifier,      # Bagging method
    GradientBoostingClassifier,  # Boosting method
    AdaBoostClassifier,          # Boosting method
    VotingClassifier             # Voting method
)
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

# Data Loading
data = np.asarray(pd.read_csv('data.csv', header=None))
X = data[:, 0:2]
y = data[:, 2]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ==========================================
# 1. BAGGING (Bootstrap Aggregating) Example
# ==========================================
# Data ကို random sampling လုပ်ပြီး model တွေ train တယ်
# Random Forest က Bagging ရဲ့ နာမည်ကြီး implementation ပါ

print("=" * 50)
print("1. BAGGING - Random Forest")
print("=" * 50)

bagging_model = RandomForestClassifier(n_estimators=50, random_state=42)
bagging_model.fit(X_train, y_train)
bagging_pred = bagging_model.predict(X_test)
print(f"Random Forest (Bagging) Accuracy: {accuracy_score(y_test, bagging_pred):.4f}")

# ==========================================
# 2. BOOSTING Examples
# ==========================================
# Model တွေကို sequential အစဉ်လိုက် train တယ်
# အရင် model ရဲ့ error ကို နောက် model က ပြင်ဆင်တယ်

print("\n" + "=" * 50)
print("2. BOOSTING Methods")
print("=" * 50)

# AdaBoost - Adaptive Boosting
# မှားတဲ့ sample တွေကို weight ပိုပေးပြီး focus လုပ်တယ်
ada_model = AdaBoostClassifier(n_estimators=50, random_state=42)
ada_model.fit(X_train, y_train)
ada_pred = ada_model.predict(X_test)
print(f"AdaBoost Accuracy: {accuracy_score(y_test, ada_pred):.4f}")

# Gradient Boosting - gradient descent သုံးပြီး error minimize လုပ်တယ်
gb_model = GradientBoostingClassifier(n_estimators=50, learning_rate=0.1, random_state=42)
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)
print(f"Gradient Boosting Accuracy: {accuracy_score(y_test, gb_pred):.4f}")

# ==========================================
# 3. VOTING Example
# ==========================================
# မတူညီတဲ့ model အမျိုးမျိုးကို vote ပေးခိုင်းတယ်

print("\n" + "=" * 50)
print("3. VOTING Classifier")
print("=" * 50)

# Hard Voting - majority vote (အများစုပေးတဲ့ class ကို ရွေးတယ်)
voting_model = VotingClassifier(
    estimators=[
        ('dt', DecisionTreeClassifier(max_depth=5)),    # Decision Tree
        ('rf', RandomForestClassifier(n_estimators=30)),  # Random Forest
        ('lr', LogisticRegression()),                    # Logistic Regression
    ],
    voting='hard'  # 'hard' = majority vote, 'soft' = probability average
)

voting_model.fit(X_train, y_train)
voting_pred = voting_model.predict(X_test)
print(f"Voting Classifier Accuracy: {accuracy_score(y_test, voting_pred):.4f}")

# ==========================================
# Cross-Validation နဲ့ Model Comparison
# ==========================================
print("\n" + "=" * 50)
print("Cross-Validation Comparison (5-fold)")
print("=" * 50)

models = {
    'Decision Tree': DecisionTreeClassifier(max_depth=5),
    'Random Forest': RandomForestClassifier(n_estimators=50),
    'AdaBoost': AdaBoostClassifier(n_estimators=50),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=50),
}

for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=5)
    print(f"{name}: Mean Accuracy = {scores.mean():.4f} (+/- {scores.std()*2:.4f})")