In [1]:
from dgpsi import dgp, kernel, emulator, Categorical, nb_seed, combine
import numpy as np
from sklearn.datasets import load_wine
from sklearn.preprocessing import MinMaxScaler
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import Matern
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
import pandas as pd

## Data preparation

In [2]:
# Set the random state
random_state = 99
np.random.seed(random_state)
nb_seed(random_state) #ensure reproducibility of the numba implementation 

# Load the Wine dataset
X_wine, y_wine = load_wine(return_X_y=True)

# Scale the Wine dataset
scaler = MinMaxScaler()
X_wine_scaled = scaler.fit_transform(X_wine)

# Split the scaled Wine data into training and testing sets with the specified random state
X_train_wine, X_test_wine, y_train_wine, y_test_wine = train_test_split(X_wine_scaled, y_wine, test_size=0.2, random_state=random_state)


## GP classifier

In [None]:
ker = 1.0 * Matern([1.0] * 13, nu=2.5, length_scale_bounds=(1e-5, 1e8))
m_gp = GaussianProcessClassifier(kernel=ker, random_state=random_state)
m_gp.fit(X_train_wine, y_train_wine)

## DGP classifier

In [4]:
layer1 = []
for _ in range(X_train_wine.shape[1]):
    layer1.append(kernel(length=np.array([1]),name='matern2.5',nugget=1e-6))
layer2 = [
    kernel(length=np.array([1]), name='sexp', scale_est=True, nugget=1e-4, connect=np.arange(X_train_wine.shape[1])),
    kernel(length=np.array([1]), name='sexp', scale_est=True, nugget=1e-4, connect=np.arange(X_train_wine.shape[1])),
    kernel(length=np.array([1]), name='sexp', scale_est=True, nugget=1e-4, connect=np.arange(X_train_wine.shape[1]))
]
layer3 = [Categorical()]
all_layer = combine(layer1, layer2, layer3)
m_dgp = dgp(X_train_wine, y_train_wine.reshape(-1,1), all_layer)

In [5]:
m_dgp.train(N=500) #training

  0%|          | 0/500 [00:00<?, ?it/s]

Iteration 500: Layer 3: 100%|██████████| 500/500 [03:01<00:00,  2.75it/s]


In [6]:
final_layer_obj = m_dgp.estimate()
emu = emulator(final_layer_obj)

## Evaluation

In [7]:
log_losses = {}

# GP Classifier
probs_gp = m_gp.predict_proba(X_test_wine)
log_losses["GPClassifier"] = log_loss(y_test_wine, probs_gp)

# DGP Classifier
log_losses["DGPClassifier"] = emu.log_loss(X_test_wine, y_test_wine)

# Convert the results into a DataFrame
df_log_losses = pd.DataFrame(list(log_losses.items()), columns=["Classifier", "Log Loss"])

# Display the result
df_log_losses


Unnamed: 0,Classifier,Log Loss
0,GPClassifier,0.456299
1,DGPClassifier,0.007794
