In [64]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import resample
from sklearn.metrics import accuracy_score

In [70]:
class MyRandomForest:
    def __init__(self, n_estimators=20, max_features='sqrt', max_depth=5):
        self.n_estimators = n_estimators
        self.max_features = max_features
        self.trees = []
        self.max_depth = max_depth

    def fit(self, X, y):
        self.trees = []
        self.bootstrap_samples = []

        for _ in range(self.n_estimators):
            X_sample, y_sample = resample(X, y) #bootstrap
            
            tree = DecisionTreeClassifier(
                max_features=self.max_features,
                max_depth=self.max_depth,
                splitter="best"
            )
            
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)
    
    def predict(self, X):
        tree_preds = np.array([tree.predict(X) for tree in self.trees])
        
        y_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=tree_preds)
        return y_pred
    def score(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)

In [66]:
from sklearn.model_selection import train_test_split

df = pd.read_csv("titanic_prepared.csv")
df_x = df.drop(columns=['label'])
df_y = df['label']

X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=True)

In [89]:
from sklearn.tree import DecisionTreeClassifier
single_tree_model = DecisionTreeClassifier(max_depth=10)
single_tree_model.fit(X_train, y_train)
tree_y_pred = single_tree_model.predict(X_test)
tree_acc = single_tree_model.score(X_test, y_test)
print(f"Decicion tree accuracy: {tree_acc}")

Decicion tree accuracy: 0.8997739261492087


In [88]:
random_forest_model = MyRandomForest(n_estimators=70, max_depth=10)
random_forest_model.fit(X_train, y_train)
forest_preds = random_forest_model.predict(X_test)
forest_acc = random_forest_model.score(X_test, y_test)
print(f"Random Forest accuracy: {forest_acc}")

Random Forest accuracy: 0.9073097211755841
