Import required packages

In [1]:
import sys
import numpy as np
import pandas as pd

from proglearn.progressive_learner import ProgressiveLearner
from proglearn.voters import TreeClassificationVoter
from proglearn.transformers import TreeClassificationTransformer
from proglearn.transformers import ObliqueTreeClassificationTransformer
from proglearn.deciders import SimpleArgmaxAverage

from sklearn.model_selection import train_test_split, cross_val_score

Set parameters

In [2]:
max_depth = 10
feature_combinations = 2
density = 0.01

reps = 5
n_trees = 10

kwargs = {"kwargs" : {"max_depth" : max_depth, "feature_combinations" : feature_combinations, "density" : density}}

Train and test on data (Hill Valley without noise)

In [8]:
default_voter_class = TreeClassificationVoter
default_voter_kwargs = {}

default_decider_class = SimpleArgmaxAverage
    
kappa = np.zeros(reps)
for i in range(reps):

    df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/hill-valley/Hill_Valley_without_noise_Training.data")

    X = df[df.columns[:-1]].to_numpy()
    y = df[df.columns[-1]].to_numpy()
    
    n_classes = len(np.unique(y))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True, stratify=y)

    default_decider_kwargs = {"classes" : np.arange(n_classes)}

    pl = ProgressiveLearner(
        default_transformer_class = ObliqueTreeClassificationTransformer,
        default_transformer_kwargs = kwargs,
        default_voter_class = default_voter_class,
        default_voter_kwargs = default_voter_kwargs,
        default_decider_class = default_decider_class,
        default_decider_kwargs = default_decider_kwargs)
      
    pl.add_task(X_train, y_train, num_transformers=n_trees)

    y_hat = pl.predict(X_test, task_id=0)
    
    acc = np.sum(y_test == y_hat) / len(y_test)
    print("Accuracy after iteration ", i, ": ", acc)

    chance_pred = 1 / n_classes
    kappa[i] = (acc - chance_pred) / (1 - chance_pred)

kappa = np.mean(kappa) * 100
err = (np.std(kappa) * 100) / np.sqrt(reps)

print("kappa: ", kappa, ", error:", err)

Accuracy after iteration  0 :  1.0
Accuracy after iteration  1 :  1.0
Accuracy after iteration  2 :  1.0
Accuracy after iteration  3 :  0.9672131147540983
Accuracy after iteration  4 :  1.0
kappa:  98.68852459016392 , error: 0.0
