In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils.validation import check_array
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
# Example usage:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(
                    self,
                    n_estimators=100,
                    criterion="gini",
                    max_depth=None,
                    min_samples_split=2,
                    min_samples_leaf=1,
                    max_features="sqrt",
                    max_leaf_nodes=None,
                    bootstrap=True,
                    oob_score=False,
                    class_weight=None,
                    curt_v=0, **kwargs
                ):
        super().__init__(**kwargs)
        self.curt_v = curt_v
        self.n_estimators = n_estimators
        self.criterion = criterion
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features
        self.max_leaf_nodes = max_leaf_nodes
        self.class_weight = class_weight
        self.oob_score = oob_score
        self.bootstrap = bootstrap

# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


search_space =  {
                    "n_estimators": [10],
                    "curt_v":  np.arange(2, 400).tolist(), # [None, 5, 10, 15, 20, 30, 50], #
                    # "max_depth":  np.arange(2, 100).tolist(), # [None, 5, 10, 15, 20, 30, 50], #
                    # "criterion": ["gini", "entropy"],
                    # "max_features": ['sqrt', 'log2', None],
                    # "min_samples_split": np.arange(2, 11).tolist(),
                    # "min_samples_leaf":  np.arange(1, 200).tolist(),
                    # 'class_weight': [None, 'balanced', 'balanced_subsample'],  # Weight for each class
                    # "oob_score": [False]
                }


# Create an instance of the custom RandomForestClassifier
clf = CustomRandomForestClassifier()
# clf = RandomForestClassifier(random_state=42)

RS = RandomizedSearchCV(clf, search_space, scoring=["neg_brier_score"], refit="neg_brier_score", cv=5, n_iter=50, random_state=0)
RS.fit(X_train, y_train)
RF = RS.best_estimator_


# Fit the model
# clf.fit(X_train, y_train)

# Predict on the test set
y_pred = RF.predict(X_test)

# Evaluate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.8700


In [2]:
RS.best_params_

{'n_estimators': 10, 'curt_v': 196}