### Initialize notebook

#### Imports

In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from src.decision_tree_classifier import DecisionTreeClassifier
from src.random_forest_classifier import RandomForestClassifier

#### Get some data

In [2]:
# Create synthetic dataset.
X, y = make_classification(n_samples=5000)

# Split data between train and validation.
train_size = int(len(X) * 0.8)
X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:], y[train_size:]

#### Custom __DecisionTreeClassifier__.

- Initialize the class.
- Fit the training data.
- Compute accuracy score. 

In [3]:
# Initialize class.
decision_tree = DecisionTreeClassifier(
    max_depth=np.inf,
    min_samples_split=2,
    min_samples_leaf=1,
    min_impurity_decrease=0.0
)

# Fit.
decision_tree.fit(X_train, y_train)

# Predict.
y_train_hat = decision_tree.predict(X_train)
y_val_hat = decision_tree.predict(X_val)

# Evaluate.
accuracy_train = accuracy_score(y_train, y_train_hat)
accuracy_val = accuracy_score(y_val, y_val_hat)

print(
    f"Accuracy score for training data is {accuracy_train:.2} \n Accuracy score for validation data is {accuracy_val:.2}"
)

Accuracy score for training data is 1.0 
 Accuracy score for validation data is 0.82


#### Custom __RandomForestClassifier__.

- Initialize the class.
- Fit the training data.
- Compute accuracy score. 

In [4]:
# Initialize class.
random_forest = RandomForestClassifier(
    n_estimators=100,
    max_depth=np.inf,
    min_samples_split=8,
    min_samples_leaf=1,
    min_impurity_decrease=0.0,
    bootstrap=True,
    n_cores=None,
)

# Fit.
random_forest.fit(X_train, y_train)

# Predict.
y_train_hat = random_forest.predict(X_train)
y_val_hat = random_forest.predict(X_val)

# Evaluate.
accuracy_train = accuracy_score(y_train, y_train_hat)
accuracy_val = accuracy_score(y_val, y_val_hat)

print(
    f"Accuracy score for training data is {accuracy_train:.2} \n Accuracy score for validation data is {accuracy_val:.2}"
)

Accuracy score for training data is 0.99 
 Accuracy score for validation data is 0.87
