**Decision Trees with Sample/Feature Subsetting**
- max_samples: subset of samples per tree
- max_features: subset of features per split
- min_samples_split: minimum samples for split
- min_samples_leaf: minimum samples in leaf

In [None]:
from sklearn.tree import DecisionTreeClassifier
import numpy as np

tree = DecisionTreeClassifier(
    max_features="sqrt",
    min_samples_split=5,
    min_samples_leaf=2
)

**Random Forest**
- Ensemble of decision trees
- Bootstrap sampling (bagging)
- Random feature selection
- Parallel training
- Reduces overfitting

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

class RandomForestExample:
    def __init__(self, n_estimators=100):
        self.rf = RandomForestClassifier(
            n_estimators=n_estimators,
            max_features="sqrt",
            bootstrap=True,
            oob_score=True
        )

    def train_and_evaluate(self, X, y):
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2
                                                           )
        # Train
        self.rf.fit(X_train, y_train)
        # Evaluate
        print(f"OOB score: {self.rf.oob_score_:.3f}")
        print(f"Test score: {self.rf.score(X_test, y_test):.3f}")

        return self.rf.feature_importances_

**Gradient Boosting**
- Sequential training
- Each tree correct previous errors
- Learning rate controls contribution
- More prone to overfitting than RF

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
import lightgbm as lgb

class GradientBoostingExample:
    def __init__(self):
        self.gbm = GradientBoostingClassifier(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=3,
            subsample=0.8
        )

        # XGBoost
        self.xgb = xgb.XGBClassifier(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=3,
            subsample=0.8,
            colsample_bytree=0.8
        )

        # LightGBM
        self.lgb = lgb.LGBMClassifier(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=3,
            subsample=0.8
        )

    def compare_boosting(self, X, y):
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2
        )

        result = {}
        for name, model in [
            ("GBM", self.gbm),
            ("XGBoost", self.xgb),
            ("LightGBM", self.lgb)
        ]:
            model.fit(X_train, y_train)
            score = model.score(X_test, y_test)
            results[name] = score

        return results

**Feature Importance Measures**
- Gini/Entropy importance (default)
- Permutation importance
- SHAP values

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score

class TreeFeatureImportance:
    def __init__(self, model, feature_names):
        self.model = model
        self.feature_names = feature_names

    def get_importance_types(self):
        from sklearn.inspection import permutation_importance
        import shap

        importances = {
            "gini": self.model.feature_importances_,
            "permutation": permutation_importance(
                self.model, X_test, y_test
            ).importances_mean
        }
        # SHAP values
        explainer = shap.TreeExplainer(self.model)
        shap_values = explainer.shap_values(X_test)
        importances["shap"] = np.abs(shap_values).mean(axis=0)

        return importances

    def plot_importance(self, importance_type="gini"):
        importance = self.get_importance_types()[importance_type]
        imp_df = pd.DataFrame({
            "feature": self.feature_names,
            "importance": importance
        }).sort_values("importance", ascending=False)
        plt.figure(figsize=(10, 6))
        plt.bar(imp_df["feature"], imp_df["importance"])
        plt.xticks(rotation=45)
        plt.title(f"Feature Importance ({importance_type})")
        plt.tight_layout()
        plt.show

from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import xgboost as xgb
import lightgbm as lgb
import numpy as np
import matplotlib.pyplot as plt

def compare_tree_models(X, y):
    # Initialize models with proper parameters
    models = {
        'Random Forest': RandomForestClassifier(random_state=42),
        'GradientBoosting': GradientBoostingClassifier(random_state=42),
        'XGBoost': xgb.XGBClassifier(
            use_label_encoder=False,
            eval_metric='logloss',
            random_state=42
        ),
        'LightGBM': lgb.LGBMClassifier(random_state=42)
    }
    
    results = {}
    for name, model in models.items():
        try:
            # Cross validation
            scores = cross_val_score(model, X, y, cv=5)
            results[name] = {
                'mean_score': scores.mean(),
                'std_score': scores.std(),
                'scores': scores
            }
            print(f"{name}:")
            print(f"Mean CV Score: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")
        except Exception as e:
            print(f"Error with {name}: {str(e)}")
            continue
    
    # Visualization of results
    plt.figure(figsize=(10, 6))
    plt.boxplot([results[model]['scores'] for model in results.keys()])
    plt.xticks(range(1, len(results) + 1), results.keys(), rotation=45)
    plt.title('Model Comparison')
    plt.ylabel('Score')
    plt.tight_layout()
    plt.show()
    
    return results

In [None]:
# Example
X = np.random.randn(1000, 20)
y = np.random.randint(0, 2, 1000)
results = compare_tree_models(X, y)