In [None]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import make_scorer, cohen_kappa_score

param_grid = {
    'n_estimators': np.arange(20, 200, 10), # (50, 200, 10)
    'criterion': ['gini', 'entropy'],
    'max_depth': np.arange(5, 15), # np.arange(5, 15)
    'min_samples_split': np.arange(2, 11), # np.arange(2, 11)
    'min_samples_leaf': np.arange(1, 11), # np.arange(1, 11)
    'max_features': ['sqrt', 'log2', None], #  ['sqrt', 'log2', None]
    'bootstrap': [True, False] # [True, False]
}

# Create a RandomForestClassifier
rf = RandomForestClassifier(random_state=42)

# Define Kappa score as the scoring metric
kappa_scorer = make_scorer(cohen_kappa_score)

# Create RandomizedSearchCV object with Kappa score as the scoring metric
random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_grid, n_iter=100, cv=5,
                                   scoring=kappa_scorer, random_state=42, n_jobs=-1)

random_search.fit(X_train_balanced, y_train_balanced)

best_params_from_random_search = random_search.best_params_

# Define a smaller range around the best parameters for GridSearchCV
param_grid_for_grid_search = {
    'n_estimators': np.arange(best_params_from_random_search['n_estimators'] - 20,
                               best_params_from_random_search['n_estimators'] + 20, 5),
    'criterion': [best_params_from_random_search['criterion']],
    'max_depth': np.arange(best_params_from_random_search['max_depth'] - 2,
                           best_params_from_random_search['max_depth'] + 5),
    'min_samples_split': np.arange(best_params_from_random_search['min_samples_split'],#  - 1,
                                   best_params_from_random_search['min_samples_split'] + 5),
    'min_samples_leaf': np.arange(best_params_from_random_search['min_samples_leaf'], # - 1,
                                  best_params_from_random_search['min_samples_leaf'] + 5),
    'max_features': [best_params_from_random_search['max_features']],
    'bootstrap': [best_params_from_random_search['bootstrap']]
}

# Create GridSearchCV object with Kappa score as the scoring metric
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid_for_grid_search, cv=5,
                           scoring=kappa_scorer, n_jobs=-1)

# Fit the GridSearchCV on the training data
grid_search.fit(X_train_balanced, y_train_balanced)

# Get the best model and its parameters
best_rf_model = grid_search.best_estimator_
best_params = grid_search.best_params_

# Evaluate the model on the test data
y_pred = best_rf_model.predict(X_test_transformed_df)
kappa_score_test = cohen_kappa_score(y_test, y_pred)

print("Best Model Parameters:")
print(best_params)
print("Best Model Kappa Score on Test Data:", kappa_score_test)

In [None]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import make_scorer, cohen_kappa_score
from sklearn.tree import DecisionTreeClassifier

# Parameter grid for AdaBoostClassifier
adaboost_param_grid = {
    'n_estimators': np.arange(50, 200, 10),
    'learning_rate': np.linspace(0.1, 2, 20),
    'algorithm': ['SAMME', 'SAMME.R']
}

# Define the class weights
class_weights = {1: 5, 2: 1}  # Class "2" is the minority class

# Create an AdaBoost classifier with a decision tree as the base estimator
adaboost = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1, class_weight=class_weights), n_estimators=100)

# Define Kappa score as the scoring metric
kappa_scorer = make_scorer(cohen_kappa_score)

# Create RandomizedSearchCV object for AdaBoosting
random_search_adaboost = RandomizedSearchCV(estimator=adaboost, param_distributions=adaboost_param_grid,
                                            n_iter=100, cv=5, scoring=kappa_scorer, random_state=42, n_jobs=-1)

# Fit the RandomizedSearchCV on the training data
random_search_adaboost.fit(X_train_balanced, y_train_balanced)

best_params_from_random_search_adaboost = random_search_adaboost.best_params_

# Define a smaller range around the best parameters for GridSearchCV
param_grid_for_grid_search_adaboost = {
    'n_estimators': np.arange(best_params_from_random_search_adaboost['n_estimators'] - 20,
                               best_params_from_random_search_adaboost['n_estimators'] + 20, 5),
    'learning_rate': np.linspace(best_params_from_random_search_adaboost['learning_rate'] ,
                                 best_params_from_random_search_adaboost['learning_rate'] + 0.5, 10),
    'algorithm': [best_params_from_random_search_adaboost['algorithm']]
}

# Create GridSearchCV object for AdaBoosting
grid_search_adaboost = GridSearchCV(estimator=adaboost, param_grid=param_grid_for_grid_search_adaboost,
                                    cv=5, scoring=kappa_scorer, n_jobs=-1)

# Fit the GridSearchCV on the training data
grid_search_adaboost.fit(X_train_balanced, y_train_balanced)

# Get the best model and its parameters
best_adaboost_model = grid_search_adaboost.best_estimator_
best_params_adaboost = grid_search_adaboost.best_params_

# Evaluate the model on the test data
y_pred_adaboost = best_adaboost_model.predict(X_test_scaled_df)
kappa_score_test_adaboost = cohen_kappa_score(y_test, y_pred_adaboost)

print("Best AdaBoost Model Parameters:")
print(best_params_adaboost)
print("Best AdaBoost Model Kappa Score on Test Data:", kappa_score_test_adaboost)