In [1]:
#specify GPU to use
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [2]:
from sklearn.model_selection import train_test_split
import openml
import category_encoders as ce
import numpy as np
import sklearn
import pandas as pd

# Load delays_zurich_transport dataset
dataset = openml.datasets.get_dataset(537)
X, y, categorical_indicator, attribute_names = dataset.get_data(target=dataset.default_target_attribute)

X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train, X_valid, y_train, y_valid = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)


print("Training set size:", len(X_train))
print("Validation set size:", len(X_valid))
print("Test set size:", len(X_test))


Training set size: 13209
Validation set size: 3303
Test set size: 4128




In [3]:
X_train.head()

Unnamed: 0,median_income,housing_median_age,total_rooms,total_bedrooms,population,households,latitude,longitude
16490,4.7069,27,1683.0,288.0,873.0,258.0,38.0,-120.97
6090,3.875,15,6409.0,1363.0,3359.0,1267.0,34.1,-117.87
10973,2.8828,26,2312.0,525.0,1273.0,437.0,33.76,-117.85
17287,5.0,34,1664.0,292.0,705.0,257.0,34.35,-119.74
12615,5.0371,25,4719.0,745.0,1857.0,739.0,38.5,-121.51


In [4]:
X_train_raw = X_train.copy()
X_valid_raw = X_valid.copy()
X_test_raw = X_test.copy()

low_cardinality_indices = []
high_cardinality_indices = []

categorical_feature_indices = []
for column_index in range(X_train.shape[1]):
    if categorical_indicator[column_index]:
        categorical_feature_indices.append(column_index)
        if len(X_train.iloc[:,column_index].unique()) < 10:
            low_cardinality_indices.append(X_train.columns[column_index])
        else:
            high_cardinality_indices.append(X_train.columns[column_index])

In [5]:
encoder = ce.OrdinalEncoder(cols=X_train.columns[categorical_feature_indices])
encoder.fit(X_train)
X_train = encoder.transform(X_train).astype(np.float64)
X_valid = encoder.transform(X_valid).astype(np.float64)
X_test = encoder.transform(X_test).astype(np.float64)

In [6]:
from GradTree import GradTree

params = {
        'depth': 5,

        'learning_rate_index': 0.01,
        'learning_rate_values': 0.01,
        'learning_rate_leaf': 0.01,

        'optimizer': 'SWA',
        'cosine_decay_steps': 0,

        'initializer': 'RandomNormal',

        'loss': 'mse',
        'focal_loss': False,
        'temperature': 0.0,

        'from_logits': True,
        'apply_class_balancing': False,
}

args = {
    'epochs': 1_000,
    'early_stopping_epochs': 25,
    'batch_size': 64,

    'cat_idx': categorical_feature_indices,
    'objective': 'regression',
    
    'metrics': ['R2'], # F1, Accuracy, R2
    'random_seed': 42,
    'verbose': 1,       
}

model_gradtree = GradTree(params=params, args=args)

model_gradtree.fit(X_train=X_train,
          y_train=y_train,
          X_val=X_valid,
          y_val=y_valid)

preds_gradtree = model_gradtree.predict(X_test)

2023-11-03 13:53:53.031721: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



Epoch 1/1000


2023-11-03 13:53:57.113606: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46692 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:a1:00.0, compute capability: 8.6
2023-11-03 13:54:00.588319: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x2206e660 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-11-03 13:54:00.588363: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA RTX A6000, Compute Capability 8.6
2023-11-03 13:54:00.598328: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-11-03 13:54:01.872341: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8801
2023-11-03 13:54:03.130075: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XL

Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000


In [7]:
def calculate_sample_weights(y_data):
    class_weights = sklearn.utils.class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_data), y = y_data)
    sample_weights = sklearn.utils.class_weight.compute_sample_weight(class_weight = 'balanced', y =y_data)
    return sample_weights

def calculate_class_weights(y_data):
    class_weights = sklearn.utils.class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_data), y = y_data)
    return list(class_weights)

In [8]:
median = X_train.median(axis=0)
X_train= X_train.fillna(median)
X_vali = X_valid.fillna(median)
X_test = X_test.fillna(median)

encoder = ce.LeaveOneOutEncoder(cols=high_cardinality_indices)
encoder.fit(X_train, y_train)
X_train = encoder.transform(X_train)
X_valid = encoder.transform(X_valid)
X_test = encoder.transform(X_test)

encoder = ce.OneHotEncoder(cols=low_cardinality_indices)
encoder.fit(X_train)
X_train = encoder.transform(X_train)
X_valid = encoder.transform(X_valid)
X_test = encoder.transform(X_test)

median = X_train.median(axis=0)
X_train = X_train.fillna(median)
X_valid = X_valid.fillna(median)
X_test = X_test.fillna(median)

quantile_noise = 1e-4
quantile_train = np.copy(X_train.values).astype(np.float64)
np.random.seed(42)
stds = np.std(quantile_train, axis=0, keepdims=True)
noise_std = quantile_noise / np.maximum(stds, quantile_noise)
quantile_train += noise_std * np.random.randn(*quantile_train.shape)       

scaler = sklearn.preprocessing.QuantileTransformer(output_distribution='normal')
scaler.fit(quantile_train)

X_train = scaler.transform(X_train.values.astype(np.float64))
X_valid = scaler.transform(X_valid.values.astype(np.float64))
X_test = scaler.transform(X_test.values.astype(np.float64))

In [10]:
from sklearn.tree import DecisionTreeRegressor
model_cart = DecisionTreeRegressor()
model_cart.fit(np.concatenate([X_train, X_valid]), 
                np.concatenate([y_train, y_valid]),
                
             )

preds_cart = model_cart.predict(X_test)

In [13]:
if args['objective'] == 'binary':
    accuracy = sklearn.metrics.accuracy_score(y_test, np.round(preds_gradtree[:,1]))
    f1_score = sklearn.metrics.f1_score(y_test, np.round(preds_gradtree[:,1]), average='macro')
    roc_auc = sklearn.metrics.roc_auc_score(y_test, preds_gradtree[:,1], average='macro', multi_class='ovo')

    print('Accuracy GradTree:', accuracy)
    print('F1 Score GradTree:', f1_score)
    print('ROC AUC GradTree:', roc_auc)
    print('\n')

    accuracy = sklearn.metrics.accuracy_score(y_test, np.round(preds_cart[:,1]))
    f1_score = sklearn.metrics.f1_score(y_test, np.round(preds_cart[:,1]), average='macro')
    roc_auc = sklearn.metrics.roc_auc_score(y_test, preds_cart[:,1], average='macro', multi_class='ovo')

    print('Accuracy CART:', accuracy)
    print('F1 Score CART:', f1_score)
    print('ROC AUC CART:', roc_auc)
    print('\n')
    
elif args['objective'] == 'classification':
    accuracy = sklearn.metrics.accuracy_score(y_test, np.argmax(preds_gradtree, axis=1))
    f1_score = sklearn.metrics.f1_score(y_test, np.argmax(preds_gradtree, axis=1), average='macro')
    roc_auc = sklearn.metrics.roc_auc_score(y_test, preds_gradtree, average='macro', multi_class='ovo', labels=[i for i in range(preds_gradtree.shape[1])])

    print('Accuracy GradTree:', accuracy)
    print('F1 Score GradTree:', f1_score)
    print('ROC AUC GradTree:', roc_auc)
    print('\n')

    accuracy = sklearn.metrics.accuracy_score(y_test, np.argmax(preds_cart, axis=1))
    f1_score = sklearn.metrics.f1_score(y_test, np.argmax(preds_cart, axis=1), average='macro')
    roc_auc = sklearn.metrics.roc_auc_score(y_test, preds_cart, average='macro', multi_class='ovo', labels=[i for i in range(preds_gradtree.shape[1])])

    print('Accuracy CART:', accuracy)
    print('F1 Score CART:', f1_score)
    print('ROC AUC CART:', roc_auc)
    print('\n')

else:
    mean_absolute_error = sklearn.metrics.mean_absolute_error(y_test, preds_gradtree)
    r2_score = sklearn.metrics.r2_score(y_test, preds_gradtree)

    print('MAE GradTree:', mean_absolute_error)
    print('R2 Score GradTree:', r2_score)
    print('\n')

    mean_absolute_error = sklearn.metrics.mean_absolute_error(y_test, preds_cart)
    r2_score = sklearn.metrics.r2_score(y_test, preds_cart)

    print('MAE CART:', mean_absolute_error)
    print('R2 Score CART:', r2_score)
    print('\n')


MAE GradTree: 59983.78264671148
R2 Score GradTree: 0.49586411373442785


MAE CART: 44544.35222868217
R2 Score CART: 0.627737162237934


