In [5]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
churn = pd.read_csv("churn_dataset.csv")
X = churn.drop(columns=['Exited'])
y = churn['Exited']

In [None]:
np.random.seed(1)

n_estimators_levels = [50, 100, 200]
max_depth_levels = [None, 1, 10]
training_splits = [0.7, 0.8, 0.9]

accuracy_df = pd.DataFrame(columns=["Block", "n_estimators", "max_depth", "Accuracy"])

for block, train_split in enumerate(training_splits, start=1):
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_split, random_state=42)

    for n_estimators in n_estimators_levels:
        for max_depth in max_depth_levels:
            model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)

            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)

            accuracy_temp = pd.DataFrame({
                "Block": [f"Split {block} ({int(train_split * 100)}/{100 - int(train_split * 100)})"],
                "n_estimators": [n_estimators],
                "max_depth": [max_depth],
                "Accuracy": [accuracy]
            })
            accuracy_df = pd.concat([accuracy_df, accuracy_temp], ignore_index=True)

accuracy_df.to_csv("random_forest_accuracies.csv", index=False)

print(accuracy_df)