# Boston Dataset

In [7]:
import sys  
sys.path.insert(0, '../')
import deep_forest
import preprocess
import torch as th
from torch import nn as nn
import matplotlib.pyplot as plt
%matplotlib inline
from math import pi
import seaborn as sns
from tqdm import tqdm
sns.set_theme("notebook")
sns.set_style('whitegrid')

## Generate Data

In [2]:
x, y, _, _ = preprocess.get_data(100 )

## Deep Forest

In [64]:
model = deep_forest.DeepForest(100, 2, 13, 0.25, 15)
device = th.device("cuda" if th.cuda.is_available() else "cpu")
model = model.to(device)
x = x.to(device)
y = y.to(device)

In [65]:
optimizer = th.optim.Adam(model.parameters())
pbar = tqdm(range(2500))
for i in pbar:
    model.populate_best(x[:, :], y[:])
    optimizer.zero_grad()

    loss = model.loss(x[:, :], y[:], device)
    loss.backward()
    optimizer.step()

    pbar.set_description("EPOCH %d || Acc: %s || Loss: %s" % (i, str(th.mean((model.forward(x[:, :], device) == y[:]).float())), str(loss)))

print("\n\n==============\nFINAL ACC: %s" % str(th.mean((model.forward(x[:, :], device) == y[:]).float())))

EPOCH 2499 || Acc: tensor(0.9944) || Loss: tensor([626.6826], grad_fn=<AddBackward0>): 100%|██████████| 2500/2500 [26:06<00:00,  1.60it/s]

FINAL ACC: tensor(0.9944)



## MLP Baseline

In [73]:
mlp = nn.Sequential(
    nn.Linear(13, 30),
    nn.LeakyReLU(),
    nn.Linear(30, 15),
    nn.LeakyReLU(),
    nn.Linear(15, 3),
    nn.Softmax()
)

optimizer = th.optim.Adam(mlp.parameters())
pbar = tqdm(range(1000))
for i in pbar:
    optimizer.zero_grad()

    preds = mlp(x[:, :])
    loss = nn.functional.cross_entropy(preds, (y[:].type(th.LongTensor)).to(device))
    loss.backward()
    optimizer.step()

    pbar.set_description("EPOCH %d || Acc: %s || Loss: %s" % (i, str(th.mean((th.argmax(mlp(x[:]), 1) == y[:]).float())), str(loss)))

print("\n==============\nFINAL ACC: %s" % str(th.mean((th.argmax(mlp(x[:]), 1) == y[:]).float())))

EPOCH 999 || Acc: tensor(0.9831) || Loss: tensor(0.5779, grad_fn=<NllLossBackward>): 100%|██████████| 1000/1000 [00:03<00:00, 275.37it/s]

FINAL ACC: tensor(0.9831)


## Random Forest

In [54]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=2)
clf.fit(x[:, :].numpy(), y[:].numpy())
print(clf.score(x[:, :].numpy(), y[:].numpy()))

0.9831460674157303
