In [1]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
churn = pd.read_csv("churn_dataset.csv")
X = churn.drop(columns=['Exited'])
y = churn['Exited']

In [4]:
np.random.seed(1)

n_estimators_levels = [1, 100, 500]
max_depth_levels = [None, 1, 10]
training_splits = [0.7, 0.8, 0.9]

accuracy_df = pd.DataFrame(columns=["Block", "n_estimators", "max_depth", "Accuracy"])

for block, train_split in enumerate(training_splits, start=1):
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_split, random_state=42)

    for n_estimators in n_estimators_levels:
        for max_depth in max_depth_levels:
            model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)

            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)

            accuracy_temp = pd.DataFrame({
                "Block": [f"Split {block} ({int(train_split * 100)}/{100 - int(train_split * 100)})"],
                "n_estimators": [n_estimators],
                "max_depth": [max_depth],
                "Accuracy": [accuracy]
            })
            accuracy_df = pd.concat([accuracy_df, accuracy_temp], ignore_index=True)

accuracy_df.to_csv("random_forest_accuracies.csv", index=False)

print(accuracy_df)

              Block n_estimators max_depth  Accuracy
0   Split 1 (70/30)            1      None  0.727304
1   Split 1 (70/30)            1         1  0.789331
2   Split 1 (70/30)            1        10  0.803043
3   Split 1 (70/30)          100      None  0.800758
4   Split 1 (70/30)          100         1  0.789331
5   Split 1 (70/30)          100        10  0.814279
6   Split 1 (70/30)          500      None  0.802434
7   Split 1 (70/30)          500         1  0.789331
8   Split 1 (70/30)          500        10  0.814622
9   Split 2 (80/20)            1      None  0.734645
10  Split 2 (80/20)            1         1  0.788608
11  Split 2 (80/20)            1        10  0.805805
12  Split 2 (80/20)          100      None  0.802005
13  Split 2 (80/20)          100         1  0.788608
14  Split 2 (80/20)          100        10  0.814717
15  Split 2 (80/20)          500      None  0.803605
16  Split 2 (80/20)          500         1  0.788608
17  Split 2 (80/20)          500        10  0.