## Imports and dataset

In [1]:
from src.models.conjugate_bayes_lin_reg import NormalInverseGammaPriorLinearRegression
from src.attacks.point_attacks import attack

import numpy as np
import torch
import pandas as pd

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style="whitegrid", palette="muted", font="serif")

sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
plt.rcParams.update({
    'axes.titlesize': 18,
    'axes.labelsize': 14,
    'xtick.labelsize': 12,
    'ytick.labelsize': 12,
    'axes.titleweight': 'bold',
    'axes.edgecolor': 'black',
    'axes.linewidth': 1,
    'grid.alpha': 0.5,
    'grid.linestyle': '--',
    'legend.fontsize': 12,
    'legend.frameon': False,
    'figure.dpi': 300,  
})

In [None]:
# set all seeds for reproducibility
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

# Comparison of 3 datasets: Wine, energy and housing

In [16]:
# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx"
data = pd.read_excel(url)

# The last two columns are the target variables (Heating Load and Cooling Load)
X = data.iloc[:, :-2].values  # Covariates
y = data.iloc[:, -2].values  # Heating Load
y_energy = y / 4  # to make the target variable similar to the other datasets

# Normalize the data
scaler = MinMaxScaler()
X_energy = scaler.fit_transform(X)

In [17]:
# Wine Quality dataset with response in {3, 4, 5, 6, 7, 8}, 11 features and 4898 samples

# URL to the Wine Quality dataset (for example, from UCI Machine Learning Repository)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
# Load the dataset directly into a Pandas DataFrame
data = pd.read_csv(url, delimiter=";")

X = data.iloc[:, :-1].values
y_wine = data.iloc[:, -1].values

# Normalize the data
scaler = MinMaxScaler()
X_wine = scaler.fit_transform(X)

In [18]:
# Housing dataset
california_housing = fetch_california_housing(as_frame=True)

X = california_housing.data.values
y_housing = california_housing.target.values

X_housing = MinMaxScaler().fit_transform(X)

In [None]:
# For all 3 datasets: Fit the model, compute the average rmse over the test set for the original data and the adversarial examples with epsilon = 0.2 and epsilon = 0.5
datasets = ['energy', 'wine', 'housing']
epsilons = [0, 0.2, 0.5]

results = []
results_attack = []

for dataset in datasets:
    print(f'Running dataset {dataset}')
    if dataset == 'energy':
        X, y = X_energy, y_energy
    elif dataset == 'wine':
        X, y = X_wine, y_wine
    elif dataset == 'housing':
        X, y = X_housing, y_housing

    
    results_dataset = []
    results_dataset_attack = []
    for i in range(10):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)
        y_star = y_train.mean() * 2

        model = NormalInverseGammaPriorLinearRegression(prior_params={
            'mu': torch.zeros(X_train.shape[1]), 
            'lam': torch.eye(X_train.shape[1]), 
            'a': torch.tensor([1]), 
            'b': torch.tensor([1])},)
        data = {'X': torch.tensor(X_train, dtype=torch.float32), 'y': torch.tensor(y_train, dtype=torch.float32)}
        model.fit(data)

        X_test = X_test[:100, :]
        y_test = y_test[:100]

        res_it = []
        res_it_attack = []
        for epsilon in epsilons:
            rmse = []
            rmse_attack = []
            for i in range(X_test.shape[0]):
                x_adv = torch.tensor(X_test[i,:].copy(), dtype=torch.float32, requires_grad=True)
                x_adv_values, loss_values, func_values = attack(x_adv, model, y_star, epsilon=epsilon, learning_rate=1e-4, num_iterations=1000, early_stopping_patience=20)
                y_adv = model.mu @ x_adv_values[-1]
                rmse.append((y_adv - y_test[i]) ** 2)
                rmse_attack.append((y_adv - y_star) ** 2)
            res_it.append(np.sqrt(np.mean(rmse)))
            res_it_attack.append(np.sqrt(np.mean(rmse_attack)))
        results_dataset.append(res_it)
        results_dataset_attack.append(res_it_attack)
    results.append(results_dataset)
    results_attack.append(results_dataset_attack)

In [None]:
# Display as a table
results_array = np.array(results).mean(axis=1)
results_array = np.round(results_array, 3)
results_df = pd.DataFrame(results_array, columns=epsilons, index=datasets)
results_df

In [None]:
# Display as a table
results_array = 2 * np.array(results).std(axis=1)
results_array = np.round(results_array, 3)
results_df = pd.DataFrame(results_array, columns=epsilons, index=datasets)
results_df

In [None]:
# Display as a table
results_array = np.array(results_attack).mean(axis=1)
results_array = np.round(results_array, 3)
results_df = pd.DataFrame(results_array, columns=epsilons, index=datasets)
results_df

In [None]:
# Display as a table
results_array = 2 * np.array(results_attack).std(axis=1)
results_array = np.round(results_array, 3)
results_df = pd.DataFrame(results_array, columns=epsilons, index=datasets)
results_df