In [None]:
# notebooks/model_experimentation.ipynb

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
# Load the preprocessed data
data = pd.read_csv('path/to/preprocessed_data.csv')

In [None]:
# Split the data into features and target
X = data.drop('target', axis=1)
y = data['target']

In [None]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
print("Random Forest Classifier:")
print("Accuracy:", accuracy_score(y_test, rf_predictions))
print("Precision:", precision_score(y_test, rf_predictions))
print("Recall:", recall_score(y_test, rf_predictions))
print("F1 Score:", f1_score(y_test, rf_predictions))

In [None]:
# XGBoost Classifier
xgb_model = XGBClassifier(random_state=42)
xgb_model.fit(X_train, y_train)
xgb_predictions = xgb_model.predict(X_test)
print("XGBoost Classifier:")
print("Accuracy:", accuracy_score(y_test, xgb_predictions))
print("Precision:", precision_score(y_test, xgb_predictions))
print("Recall:", recall_score(y_test, xgb_predictions))
print("F1 Score:", f1_score(y_test, xgb_predictions))

In [None]:
# Hyperparameter tuning for Random Forest Classifier
rf_params = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}
rf_grid = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=rf_params, cv=5, scoring='accuracy')
rf_grid.fit(X_train, y_train)
print("Best parameters for Random Forest Classifier:", rf_grid.best_params_)
print("Best score for Random Forest Classifier:", rf_grid.best_score_)


In [None]:
# Hyperparameter tuning for XGBoost Classifier
xgb_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.3]
}
xgb_grid = GridSearchCV(estimator=XGBClassifier(random_state=42), param_grid=xgb_params, cv=5, scoring='accuracy')
xgb_grid.fit(X_train, y_train)
print("Best parameters for XGBoost Classifier:", xgb_grid.best_params_)
print("Best score for XGBoost Classifier:", xgb_grid.best_score_)