In [45]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
import sklearn.metrics
from sklearn.preprocessing import StandardScaler

from datasets import CaliforniaHousingDataset, AdultDataset, TitanicDataset, AutoMpgDataset, WineDataset
from metrics import calculate_global_fidelity, calculate_global_neighborhood_fidelity
from models.base_model import BaseClassifier, BaseRegressor
from models.surrogate_model import SurrogateClassifier, SurrogateRegressor

device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [46]:
housing_train = CaliforniaHousingDataset(
    dataset_path="data/california_housing/cal_housing.data", normalize=True, train=True)
housing_test = CaliforniaHousingDataset(
    dataset_path="data/california_housing/cal_housing.data", normalize=True, train=False)

adult_train = AdultDataset(dataset_path="data/adult/adult.data", normalize=True, train=True)
adult_test = AdultDataset(dataset_path="data/adult/adult.data", normalize=True, train=False)

titanic_train = TitanicDataset(dataset_path="data/titanic/titanic.arff", normalize=True, train=True)
titanic_test = TitanicDataset(dataset_path="data/titanic/titanic.arff", normalize=True, train=False)

wine_train = WineDataset(dataset_path="data/wines/winequality-red.csv", normalize=True, train=True)
wine_test = WineDataset(dataset_path="data/wines/winequality-red.csv", normalize=True, train=False)

autompg_train = AutoMpgDataset(dataset_path="data/autompg/auto-mpg.data", normalize=True, train=True)
autompg_test = AutoMpgDataset(dataset_path="data/autompg/auto-mpg.data", normalize=True, train=False)

In [47]:
# target normalization just to check (the mse scores in the paper are low - what's the transformation / metric used?)
scaler = StandardScaler()

# after seeing the results, it seems the authors just put the target through a StandardScaler
housing_train.target = pd.Series(scaler.fit_transform(np.array(housing_train.target).reshape(-1, 1)).flatten())
housing_test.target = pd.Series(scaler.fit_transform(np.array(housing_test.target).reshape(-1, 1)).flatten())
wine_train.target = pd.Series(scaler.fit_transform(np.array(wine_train.target).reshape(-1, 1)).flatten())
wine_test.target = pd.Series(scaler.fit_transform(np.array(wine_test.target).reshape(-1, 1)).flatten())
autompg_train.target = pd.Series(scaler.fit_transform(np.array(autompg_train.target).reshape(-1, 1)).flatten())
autompg_test.target = pd.Series(scaler.fit_transform(np.array(autompg_test.target).reshape(-1, 1)).flatten())

# TODO move that to datasets

In [48]:
lr = 0.001
batch_size = 128  # not from the paper
binary_classification_criterion = torch.nn.BCELoss()
regression_criterion = ... # "logarithm of the hyperbolic cosine" from the paper (?)
regression_criterion = torch.nn.MSELoss()
# TODO early stopping

In [67]:
def train(
        base_model: nn.Module,
        surrogate_model: nn.Module,
        train_data: Dataset,
        criterion,
        epochs: int,
        alpha: float
):
    params = list(base_model.parameters()) + list(surrogate_model.parameters())
    optimizer = Adam(params, lr=lr)
    loader = DataLoader(train_data, batch_size=batch_size)
    for epoch in range(epochs):
        running_loss = 0
        for data, labels in loader:
            data, labels = data.to(device), labels.to(device)
            labels = labels.reshape(-1, 1)
            optimizer.zero_grad()

            base_model_preds = base_model(data)
            surrogate_model_preds = surrogate_model(data)
            loss = criterion(base_model_preds, labels)
            point_fidelity = calculate_global_fidelity(base_model_preds, surrogate_model_preds)
            mtl_loss = alpha * loss + (1 - alpha) * point_fidelity

            mtl_loss.backward()
            optimizer.step()
            running_loss += mtl_loss
        print(f"epoch: {epoch + 1}, train loss: {running_loss / len(loader):.3f}")


def validate_base_classifier(
        model: nn.Module,
        test_data: Dataset,
):
    loader = DataLoader(test_data, batch_size=len(test_data))
    with torch.no_grad():
        data, labels = next(iter(loader))
        data, labels = data.to(device), labels.to(device)
        labels = labels.reshape(-1, 1)
        preds_proba = model(data)
        preds = torch.where(preds_proba >= 0.5, 1, 0)
        accuracy = sklearn.metrics.accuracy_score(labels.cpu(), preds.cpu())
        f1_score = sklearn.metrics.f1_score(labels.cpu(), preds.cpu())
        print(f"test accuracy: {accuracy:.3f}, f1 score: {f1_score:.3f}")


def validate_base_regressor(
        model: nn.Module,
        test_data: Dataset
):
    loader = DataLoader(test_data, batch_size=len(test_data))
    with torch.no_grad():
        data, labels = next(iter(loader))
        data, labels = data.to(device), labels.to(device)
        labels = labels.reshape(-1, 1)
        preds = model(data)
        mse = sklearn.metrics.mean_squared_error(labels.cpu(), preds.cpu())
        print(f"test mse: {mse:.3f}")


def validate_surrogate_model(
        base_model: nn.Module,
        surrogate_model: nn.Module,
        test_data: Dataset
):
    loader = DataLoader(test_data, batch_size=len(test_data))
    with torch.no_grad():
        data, _ = next(iter(loader))
        data = data.to(device)
        base_model_preds = base_model(data)
        surrogate_model_preds = surrogate_model(data)
        global_fidelity = calculate_global_fidelity(base_model_preds, surrogate_model_preds)
        global_neighborhood_fidelity = calculate_global_neighborhood_fidelity(base_model, surrogate_model, data)
        print(f"global fidelity: {global_fidelity:.3f}, global neighborhood fidelity: {global_neighborhood_fidelity:.3f}")


def validate_regressors(
        base_model: nn.Module,
        surrogate_model: nn.Module,
        test_data: Dataset
):
    validate_base_regressor(base_model, test_data)
    validate_surrogate_model(base_model, surrogate_model, test_data)


def validate_classifiers(
        base_model: nn.Module,
        surrogate_model: nn.Module,
        test_data: Dataset
):
    validate_base_classifier(base_model, test_data)
    validate_surrogate_model(base_model, surrogate_model, test_data)

# TODO local explainability evaluation

In [68]:
epochs = 5
alpha = 0.5

classification_data = {
    "adult": (adult_train, adult_test),
    "titanic": (titanic_train, titanic_test)
}
classifiers = []

for dataset in classification_data.keys():
    train_data, test_data = classification_data[dataset]
    base_model = BaseClassifier(
        input_dim=train_data.features.shape[1], output_dim=1, n_hidden_layers=4, layer_size=128).to(device)
    surrogate_model = SurrogateClassifier(input_dim=train_data.features.shape[1], output_dim=1).to(device)
    print(f"{dataset}:")
    train(base_model, surrogate_model, train_data, binary_classification_criterion, epochs, alpha)
    classifiers.append((base_model, surrogate_model))
    print()

adult:
epoch: 1, train loss: 0.219
epoch: 2, train loss: 0.180
epoch: 3, train loss: 0.170
epoch: 4, train loss: 0.164
epoch: 5, train loss: 0.160

titanic:
epoch: 1, train loss: 0.347
epoch: 2, train loss: 0.314
epoch: 3, train loss: 0.282
epoch: 4, train loss: 0.273
epoch: 5, train loss: 0.265



In [69]:
for i, dataset in enumerate(classification_data.keys()):
    base_model, surrogate_model = classifiers[i]
    test_data = classification_data[dataset][1]
    print(f"{dataset}:")
    validate_classifiers(base_model, surrogate_model, test_data)
    print()

adult:
test accuracy: 0.863, f1 score: 0.694
global fidelity: 0.016, global neighborhood fidelity: 0.016

titanic:
test accuracy: 0.780, f1 score: 0.750
global fidelity: 0.055, global neighborhood fidelity: 0.055



In [71]:
regression_data = {
    "wine": (wine_train, wine_test),
    "housing": (housing_train, housing_test),
    "autompg": (autompg_train, autompg_test)
}
regressors = []

for dataset in regression_data.keys():
    train_data, test_data = regression_data[dataset]
    base_regressor = BaseRegressor(
        input_dim=train_data.features.shape[1], output_dim=1, n_hidden_layers=4, layer_size=128).to(device)
    surrogate_regressor = SurrogateRegressor(input_dim=train_data.features.shape[1], output_dim=1).to(device)
    print(f"{dataset}:")
    train(base_regressor, surrogate_regressor, train_data, regression_criterion, epochs, alpha)
    regressors.append((base_regressor, surrogate_regressor))
    print()

wine:
epoch: 1, train loss: 0.605
epoch: 2, train loss: 0.519
epoch: 3, train loss: 0.487
epoch: 4, train loss: 0.467
epoch: 5, train loss: 0.448

housing:
epoch: 1, train loss: 0.417
epoch: 2, train loss: 0.323
epoch: 3, train loss: 0.274
epoch: 4, train loss: 0.238
epoch: 5, train loss: 0.211

autompg:
epoch: 1, train loss: 0.595
epoch: 2, train loss: 0.540
epoch: 3, train loss: 0.485
epoch: 4, train loss: 0.450
epoch: 5, train loss: 0.438



In [72]:
for i, dataset in enumerate(regression_data.keys()):
    base_model, surrogate_model = regressors[i]
    test_data = regression_data[dataset][1]
    print(f"{dataset}:")
    validate_regressors(base_model, surrogate_model, test_data)
    print()

wine:
test mse: 0.742
global fidelity: 0.153, global neighborhood fidelity: 0.154

housing:
test mse: 0.315
global fidelity: 0.086, global neighborhood fidelity: 0.093

autompg:
test mse: 0.510
global fidelity: 0.420, global neighborhood fidelity: 0.423

