In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import *
from src.data_utils import *
from sklearn.metrics import accuracy_score
from src.ShapeCARTClassifier import ShapeCARTClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
dataset = 'electricity'
data_factory = DataFactory_clf(dataset=dataset, cache=False)
X_train, y_train, X_val, y_val, X_test, y_test = data_factory.get_data(0)
feature_dict = data_factory.feature_dict
n_classes = len(np.unique(y_train))
print(f"n_classes: {n_classes}")

n_classes: 2


In [3]:
feature_dict

{'date': [0],
 'period': [1],
 'nswprice': [2],
 'nswdemand': [3],
 'vicprice': [4],
 'vicdemand': [5],
 'transfer': [6],
 'day': [7, 8, 9, 10, 11, 12]}

In [4]:
cart = DecisionTreeClassifier(max_depth=5, random_state=0)
cart.fit(X_train, y_train)
y_pred_train = cart.predict(X_train)
y_pred = cart.predict(X_test)
print(f"Cart Train Accuracy: {accuracy_score(y_train, y_pred_train)}")
print(f"Cart Test Accuracy: {accuracy_score(y_test, y_pred)}")

Cart Train Accuracy: 0.7815751308405322
Cart Test Accuracy: 0.7755710029791459


In [5]:
clf = ShapeCARTClassifier(
    max_depth=5,
    random_state=0,
)
# extra argument (feature_dict) can be passed to fit method to enable superset branching
clf.fit(X_train, y_train, feature_dict=feature_dict) 
y_pred_train = clf.predict(X_train)
y_pred = clf.predict(X_test)
print(f"ShapeCART Train Accuracy: {accuracy_score(y_train, y_pred_train)}")
print(f"ShapeCART Test Accuracy: {accuracy_score(y_test, y_pred)}")

ShapeCART Train Accuracy: 0.86575446118923
ShapeCART Test Accuracy: 0.8417742469381


In [6]:
clf = ShapeCARTClassifier(
    max_depth=5,
    random_state=0,
    k=3 # k is the branching factor
)
clf.fit(X_train, y_train, feature_dict=feature_dict) 
y_pred_train = clf.predict(X_train)
y_pred = clf.predict(X_test)
print(f"ShapeCART_3 Train Accuracy: {accuracy_score(y_train, y_pred_train)}")
print(f"ShapeCART_3 Test Accuracy: {accuracy_score(y_test, y_pred)}")

ShapeCART_3 Train Accuracy: 0.9109338545936062
ShapeCART_3 Test Accuracy: 0.8636213174445548


In [7]:
clf = ShapeCARTClassifier(
    max_depth=5,
    random_state=0,
    pairwise_candidates=1.0 # consider X.shape[1] pairwise combinations of features at each node
)
clf.fit(X_train, y_train, feature_dict=feature_dict) 
y_pred_train = clf.predict(X_train)
y_pred = clf.predict(X_test)
print(f"Shape2CART Train Accuracy: {accuracy_score(y_train, y_pred_train)}")
print(f"Shape2CART Test Accuracy: {accuracy_score(y_test, y_pred)}")

Shape2CART Train Accuracy: 0.8972823002711394
Shape2CART Test Accuracy: 0.8660487697230498


In [8]:
clf = ShapeCARTClassifier(
    max_depth=5,
    random_state=0,
    pairwise_candidates=1.0, 
    k=3 
)
# extra argument (feature_dict) can be passed to fit method to enable superset branching
clf.fit(X_train, y_train, feature_dict=feature_dict) 
y_pred_train = clf.predict(X_train)
y_pred = clf.predict(X_test)
print(f"Shape2CART_3 Train Accuracy: {accuracy_score(y_train, y_pred_train)}")
print(f"Shape2CART_3 Test Accuracy: {accuracy_score(y_test, y_pred)}")

Shape2CART_3 Train Accuracy: 0.9412951636294848
Shape2CART_3 Test Accuracy: 0.8624075913053073
