# Wine Dataset

In [1]:
import sys  
sys.path.insert(0, '../')
import deep_forest
import torch as th
from torch import nn as nn
import matplotlib.pyplot as plt
%matplotlib inline
from math import pi
import seaborn as sns
from preprocess import get_data
sns.set_theme("notebook")
sns.set_style('whitegrid')

## Get Data

In [2]:
x, y, test_data, test_labels = get_data(100)

# Deep F #

In [3]:
model = deep_forest.DeepForest(25, 2, 13, 0.25, 10)

In [4]:
optimizer = th.optim.Adam(model.parameters())
pbar = tqdm(range(2500))
for i in pbar:
    model.populate_best(x[:, :], y[:])
    optimizer.zero_grad()

    loss = model.loss(x[:, :], y[:], device)
    loss.backward()
    optimizer.step()

    pbar.set_description("EPOCH %d || Acc: %s || Loss: %s" % (i, str(th.mean((model.forward(x[:, :], device) == y[:]).float())), str(loss)))

print("\n\n==============\nFINAL ACC: %s" % str(th.mean((model.forward(x[:, :], device) == y[:]).float())))

====EPOCH 0====
Acc: tensor(0.3989)
Loss: tensor([167.4655], grad_fn=<AddBackward0>)
====EPOCH 200====
Acc: tensor(0.6798)
Loss: tensor([163.0257], grad_fn=<AddBackward0>)
====EPOCH 400====
Acc: tensor(0.9101)
Loss: tensor([160.8562], grad_fn=<AddBackward0>)
====EPOCH 600====
Acc: tensor(0.9326)
Loss: tensor([159.4335], grad_fn=<AddBackward0>)
====EPOCH 800====
Acc: tensor(0.9775)
Loss: tensor([159.3911], grad_fn=<AddBackward0>)
====EPOCH 1000====
Acc: tensor(0.9719)
Loss: tensor([158.9085], grad_fn=<AddBackward0>)
====EPOCH 1200====
Acc: tensor(0.9831)
Loss: tensor([159.2259], grad_fn=<AddBackward0>)
====EPOCH 1400====
Acc: tensor(0.9831)
Loss: tensor([159.0055], grad_fn=<AddBackward0>)

FINAL TRAIN ACC: tensor(0.9944)

FINAL TEST ACC: tensor(0.9944)


In [None]:
imp = model.compute_importance(x)
print()
print(imp)

In [None]:
import pandas as pd
data = pd.DataFrame({"feat": list(imp.keys()), "imp": list(imp.values())})
sns.barplot(x="feat", y="imp", data=data)

## MLP Baseline

In [None]:
mlp = nn.Sequential(
    nn.Linear(7, 30),
    nn.LeakyReLU(),
    nn.Linear(30, 15),
    nn.LeakyReLU(),
    nn.Linear(15, 2),
    nn.Softmax()
)

optimizer = th.optim.Adam(mlp.parameters())
pbar = tqdm(range(1000))
for i in pbar:
    optimizer.zero_grad()

    preds = mlp(x)
    loss = nn.functional.cross_entropy(preds, y)
    loss.backward()
    optimizer.step()

    pbar.set_description("EPOCH %d || Acc: %s || Loss: %s" % (i, str(th.mean((th.argmax(mlp(x), 1) == y).float())), str(loss)))

print("\n\n==============\nFINAL ACC: %s" % str(th.mean((th.argmax(mlp(x[:]), 1) == y[:]).float())))

## Random Forest

In [5]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=2)
clf.fit(x.numpy(), y.numpy())
print(clf.score(x.numpy(), y.numpy()))

Train Accuracy:  0.9831460674157303


In [None]:
data = pd.DataFrame({"feat": list(range(30)), "imp": clf.feature_importances_})
sns.barplot(x="feat", y="imp", data=data)