# Iris Dataset Testing

In [1]:
import sys  
sys.path.insert(0, '../')
import deep_forest
import preprocess
import torch as th
from torch import nn as nn
import matplotlib.pyplot as plt
%matplotlib inline
from math import pi
import seaborn as sns
from tqdm import tqdm
sns.set_theme("notebook")
sns.set_style('whitegrid')

## Generate Data
The data is the UCl iris dataset. Here, 100% of the data is used for training.

In [2]:
train_data, train_labels, test_data, test_labels = preprocess.get_data(100)

## Deep Forest
Here, the model is instantiated (using cuda or the CPU) and trained.

In [4]:
model = deep_forest.DeepForest(100, 2, 4, 0.5, 10)
device = th.device("cuda" if th.cuda.is_available() else "cpu")
model = model.to(device)
train_data = train_data.to(device)
train_labels = train_labels.to(device)
model.train(2500, train_data, train_labels)

KeyboardInterrupt: 

In [None]:
imp = model.compute_importance(train_data)
print()
print(imp)

In [None]:
import pandas as pd
data = pd.DataFrame({"feat": list(imp.keys()), "imp": list(imp.values())})
sns.barplot(x="feat", y="imp", data=data).set_title("Iris Deep Forest Importance")

# MLP Baseline
Here, a baseline feedforward is used on thr Iris dataset for comparison.

In [None]:
mlp = nn.Sequential(
    nn.Linear(4, 15),
    nn.LeakyReLU(),
    nn.Linear(15, 15),
    nn.LeakyReLU(),
    nn.Linear(15, 3),
    nn.Softmax()
)

optimizer = th.optim.Adam(mlp.parameters())
pbar = tqdm(range(1000))
for i in pbar:
    optimizer.zero_grad()

    preds = mlp(train_data[:, :])
    loss = nn.functional.cross_entropy(preds, (train_labels[:].type(th.LongTensor)).to(device))
    loss.backward()
    optimizer.step()

    pbar.set_description("EPOCH %d || Acc: %s || Loss: %s" % (i, str(th.mean((th.argmax(mlp(train_data), 1) == train_labels).float())), str(loss)))

print("\n\n==============\nFINAL ACC: %s" % str(th.mean((th.argmax(mlp(train_data[:]), 1) == train_labels[:]).float())))


# Standard Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=2)
clf.fit(train_data[:, :].numpy(), train_labels[:].numpy())
print("Accuracy: " + str(clf.score(train_data[:, :].numpy(), train_labels[:].numpy())))

In [None]:
data = pd.DataFrame({"feat": list(range(4)), "imp": clf.feature_importances_})
sns.barplot(x="feat", y="imp", data=data).set_title("Iris Random Forest Importance")