In [6]:
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import roc_curve, auc


In [7]:
class ModelEvaluator:
    def __init__(self, algorithm, data, train_cols, test_cols):
        self.algorithm = algorithm
        self.data = data
        self.train_cols = train_cols
        self.test_cols = test_cols

    def evaluate(self):
        X = self.data[self.train_cols]
        y = self.data[self.test_cols]

        # Handling imbalanced dataset using SMOTE
        smote = SMOTE(random_state=42)
        X_resampled, y_resampled = smote.fit_resample(X, y)

        # Train test split
        X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

        if self.algorithm == 'RandomForest':
            model = RandomForestClassifier()
            param_grid = {'n_estimators': [50, 100, 150],
                          'max_depth': [None, 10, 20],
                          'min_samples_split': [2, 5, 10]}
        elif self.algorithm == 'XGBoost':
            model = XGBClassifier()
            param_grid = {'n_estimators': [50, 100, 150],
                          'max_depth': [3, 5, 7],
                          'learning_rate': [0.1, 0.01, 0.001]}
        elif self.algorithm == 'DecisionTree':
            model = DecisionTreeClassifier()
            param_grid = {'max_depth': [None, 5, 10, 20],
                          'min_samples_split': [2, 5, 10]}

        # Parameter tuning using GridSearchCV
        grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='roc_auc')
        grid_search.fit(X_train, y_train)

        # Best parameters and best score
        best_params = grid_search.best_params_
        best_score = grid_search.best_score_

        # Cross-validation
        cv_scores = cross_val_score(grid_search.best_estimator_, X_train, y_train, cv=5, scoring='roc_auc')

        # Final evaluation on test set
        y_pred = grid_search.predict_proba(X_test)[:, 1]
        fpr, tpr, thresholds = roc_curve(y_test, y_pred)
        auc_score = auc(fpr, tpr)

        return best_params, best_score, cv_scores, auc_score



# Example usage
data = pd.read_csv('80%_null_drop_rest_impute_rf.csv')
train_cols = list(data.columns)
# train_cols.remove('Unique_ID')
train_cols.remove('Dependent_Variable')
test_cols = ['Dependent_Variable']  # Specify your target variable

# Evaluating Random Forest
rf_evaluator = ModelEvaluator('RandomForest', data, train_cols, test_cols)
rf_best_params, rf_best_score, rf_cv_scores, rf_auc_score = rf_evaluator.evaluate()

print("Random Forest:")
print("Best Parameters:", rf_best_params)
print("Best AUC Score:", rf_best_score)
print("Cross Validation Scores:", rf_cv_scores)
print("Test AUC Score:", rf_auc_score)

# Evaluating XGBoost
xgb_evaluator = ModelEvaluator('XGBoost', data, train_cols, test_cols)
xgb_best_params, xgb_best_score, xgb_cv_scores, xgb_auc_score = xgb_evaluator.evaluate()

print("\nXGBoost:")
print("Best Parameters:", xgb_best_params)
print("Best AUC Score:", xgb_best_score)
print("Cross Validation Scores:", xgb_cv_scores)
print("Test AUC Score:", xgb_auc_score)

# Evaluating Decision Tree
dt_evaluator = ModelEvaluator('DecisionTree', data, train_cols, test_cols)
dt_best_params, dt_best_score, dt_cv_scores, dt_auc_score = dt_evaluator.evaluate()

print("\nDecision Tree:")
print("Best Parameters:", dt_best_params)
print("Best AUC Score:", dt_best_score)
print("Cross Validation Scores:", dt_cv_scores)
print("Test AUC Score:", dt_auc_score)


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **

Random Forest:
Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Best AUC Score: 1.0
Cross Validation Scores: [1. 1. 1. 1. 1.]
Test AUC Score: 1.0


  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  elif is_categorical_dtype(dty

In [4]:
data.head()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7,C8,N1,N2,...,N20,N21,N22,N23,N24,N33,N34,N35,Dependent_Variable,Dependent_Variable.1
0,0,0,3,25,0,0,0,1,23.75,81.34271,...,21.764142,0.857199,0.906422,27.816,1750.0,58.0,113.39,12.0,1,1
1,0,15,12,63,2,0,1,1,11.05,22.0,...,17.0,0.88,1.0,40.0,10833.33333,160.0,262.1,17.0,0,0
2,0,0,11,12,0,0,0,1,29.0,81.34271,...,21.764142,0.857199,0.906422,20.0,6250.0,24.0,50.29,18.0,1,1
3,0,1,8,42,1,0,4,1,17.99,1.0,...,6.0,1.0,0.0,26.0,2413.666667,70.0,126.52,27.0,0,0
4,0,1,5,1,1,1,6,1,27.5,206.0,...,31.0,0.96,0.0,44.0,7666.666667,100.0,205.47,21.0,0,0
