In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

from counterfactuals.cf_methods.ppcef import PPCEF
from counterfactuals.generative_models import MaskedAutoregressiveFlow
from counterfactuals.discriminative_models import LinearRegression
from counterfactuals.losses import RegressionLoss

In [None]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
# from sklearn.linear_model import LinearRegression

In [None]:
X_large, y_large = make_regression(
    n_samples=1000, n_features=2, noise=0.1, random_state=32
)
X = MinMaxScaler().fit_transform(X_large)
y = MinMaxScaler().fit_transform(y_large.reshape(-1, 1)).reshape(-1)

# Plotting the dataset with predictions and a color gradient based on the target value
fig_prediction = plt.figure(figsize=(10, 8))
ax_prediction = fig_prediction.add_subplot(111, projection="3d")

# Scatter plot using the actual test data and coloring based on predicted target values
scatter = ax_prediction.scatter(X[:, 0], X[:, 1], y, c=y, cmap="viridis", marker="o")

# Adding color bar to indicate the scale of the target values
cbar = fig_prediction.colorbar(scatter, ax=ax_prediction, shrink=0.5, aspect=5)
cbar.set_label("Predicted Target Value")

ax_prediction.set_xlabel("Feature 1")
ax_prediction.set_ylabel("Feature 2")
ax_prediction.set_zlabel("Actual Target")

plt.title("Regression Plot with Color Gradient Based on Target Value")
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_large, y_large, test_size=0.2, random_state=42
)

# Initializing and fitting the Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Predicting target values on the testing set
y_pred_train = lr_model.predict(X_train)
y_pred_test = lr_model.predict(X_test)

# Calculating the Mean Squared Error and R^2 Score
mse_test = mean_squared_error(y_test, y_pred_test)
mse_train = mean_squared_error(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)
r2_train = r2_score(y_train, y_pred_train)

print(f"Mean Squared Error (Test): {mse_test}")
print(f"Mean Squared Error (Train): {mse_train}")
print(f"R^2 Score (Test): {r2_test}")
print(f"R^2 Score (Train): {r2_train}")

In [None]:
# Plotting the dataset with predictions and adding the linear regression plane
fig_with_plane = plt.figure(figsize=(10, 8))
ax_with_plane = fig_with_plane.add_subplot(111, projection="3d")

# Creating grid for the regression plane
x_grid, y_grid = np.meshgrid(
    np.linspace(min(X_train[:, 0]), max(X_train[:, 0]), 20),
    np.linspace(min(X_train[:, 1]), max(X_train[:, 1]), 20),
)
# Predicting over the grid to plot the plane
z_grid = lr_model.predict(np.c_[x_grid.ravel(), y_grid.ravel()]).reshape(x_grid.shape)

# Plotting the regression plane
ax_with_plane.plot_surface(x_grid, y_grid, z_grid, alpha=0.5, cmap="coolwarm")

# Scatter plot using the actual test data with a color gradient
scatter = ax_with_plane.scatter(
    X_train[:, 0], X_train[:, 1], y_train, c=y_pred_train, cmap="viridis", marker="o"
)

# Adding color bar to indicate the scale of the target values
cbar = fig_with_plane.colorbar(scatter, ax=ax_with_plane, shrink=0.5, aspect=5)
cbar.set_label("Predicted Target Value")

ax_with_plane.set_xlabel("Feature 1")
ax_with_plane.set_ylabel("Feature 2")
ax_with_plane.set_zlabel("Actual Target")

plt.title("3D Regression Plot with Prediction Plane and Color Gradient")
plt.show()

In [None]:
# Plotting the dataset with predictions and adding the linear regression plane
fig_with_plane = plt.figure(figsize=(10, 8))
ax_with_plane = fig_with_plane.add_subplot(111, projection="3d")

# Creating grid for the regression plane
x_grid, y_grid = np.meshgrid(
    np.linspace(min(X_test[:, 0]), max(X_test[:, 0]), 20),
    np.linspace(min(X_test[:, 1]), max(X_test[:, 1]), 20),
)
# Predicting over the grid to plot the plane
z_grid = lr_model.predict(np.c_[x_grid.ravel(), y_grid.ravel()]).reshape(x_grid.shape)

# Plotting the regression plane
ax_with_plane.plot_surface(x_grid, y_grid, z_grid, alpha=0.5, cmap="coolwarm")

# Scatter plot using the actual test data with a color gradient
scatter = ax_with_plane.scatter(
    X_test[:, 0], X_test[:, 1], y_test, c=y_pred_test, cmap="viridis", marker="o"
)

# Adding color bar to indicate the scale of the target values
cbar = fig_with_plane.colorbar(scatter, ax=ax_with_plane, shrink=0.5, aspect=5)
cbar.set_label("Predicted Target Value")

ax_with_plane.set_xlabel("Feature 1")
ax_with_plane.set_ylabel("Feature 2")
ax_with_plane.set_zlabel("Actual Target")

plt.title("3D Regression Plot with Prediction Plane and Color Gradient")
plt.show()

In [None]:
import torch.utils
import torch.utils.data


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

train_dataloader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32),
    ),
    batch_size=1024,
    shuffle=True,
)
test_dataloader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(
        torch.tensor(X_test, dtype=torch.float32),
        torch.tensor(y_test, dtype=torch.float32),
    ),
    batch_size=1024,
    shuffle=False,
)

In [None]:
# dataset = WineQualityDataset("../data/regression/winequality.csv")
# train_dataloader = dataset.train_dataloader(batch_size=256, shuffle=True, label_noise_lvl=0.0)
# test_dataloader = dataset.test_dataloader(batch_size=256, shuffle=False)

In [None]:
disc_model = LinearRegression(2, 1)
disc_model.fit(train_dataloader, test_dataloader, epochs=3000, lr=0.003)

In [None]:
# Plotting the dataset with predictions and adding the linear regression plane
fig_with_plane = plt.figure(figsize=(10, 8))
ax_with_plane = fig_with_plane.add_subplot(111, projection="3d")

# Creating grid for the regression plane
x_grid, y_grid = np.meshgrid(
    np.linspace(min(X_test[:, 0]), max(X_test[:, 0]), 20),
    np.linspace(min(X_test[:, 1]), max(X_test[:, 1]), 20),
)
# Predicting over the grid to plot the plane
z_grid = disc_model.predict(np.c_[x_grid.ravel(), y_grid.ravel()]).reshape(x_grid.shape)

# Plotting the regression plane
ax_with_plane.plot_surface(x_grid, y_grid, z_grid, alpha=0.5, cmap="viridis")

y_pred_test = disc_model.predict(X_test)

# Scatter plot using the actual test data with a color gradient
scatter = ax_with_plane.scatter(
    X_test[:, 0],
    X_test[:, 1],
    y_test,
    c=y_pred_test,
    cmap="viridis",
    marker="o",
    edgecolors="black",
)

# Adding color bar to indicate the scale of the target values
cbar = fig_with_plane.colorbar(scatter, ax=ax_with_plane, shrink=0.5, aspect=5)
cbar.set_label("Predicted Target Value")

ax_with_plane.set_xlabel("Feature 1")
ax_with_plane.set_ylabel("Feature 2")
ax_with_plane.set_zlabel("Actual Target")

plt.title("3D Regression Plot with Prediction Plane")
plt.show()

In [None]:
def collate_fn(batch):
    X, y = zip(*batch)
    X = torch.stack(X)
    y = torch.stack(y)

    # Add Gaussian noise to train features
    X = X + (torch.randn_like(X) * 0.03)

    # Add Gaussian noise to train labels
    y = y + (torch.randn_like(y) * 0.01)
    return X, y


gen_train_dataloader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(
        torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float()
    ),
    batch_size=1024,
    shuffle=True,
    collate_fn=collate_fn,
)

In [None]:
# train_dataloader = dataset.train_dataloader(batch_size=256, shuffle=True, noise_lvl=0.003)
# test_dataloader = dataset.test_dataloader(batch_size=256, shuffle=False)

gen_model = MaskedAutoregressiveFlow(
    features=2,
    hidden_features=8,
    num_layers=2,
    num_blocks_per_layer=2,
    context_features=1,
)
gen_model.fit(
    gen_train_dataloader,
    test_dataloader,
    num_epochs=3000,
    patience=100,
    learning_rate=0.001,
)

In [None]:
xline = torch.linspace(-0.25, 1.25, 200)
yline = torch.linspace(-0.25, 1.25, 200)
xgrid, ygrid = torch.meshgrid(xline, yline)
xyinput = torch.cat([xgrid.reshape(-1, 1), ygrid.reshape(-1, 1)], dim=1)

In [None]:
median_log_prob = torch.median(gen_model.predict_log_prob(test_dataloader))
median_log_prob

In [None]:
with torch.no_grad():
    ys = torch.fill_(torch.zeros_like(xyinput[:, 0]), 0.55)
    zgrid = gen_model(xyinput, ys).exp().reshape(200, 200)
zgrid = zgrid.numpy()
plt.contour(
    xgrid.numpy(),
    ygrid.numpy(),
    zgrid,
    levels=10,
    cmap="Greys",
    linewidths=0.4,
    antialiased=True,
)

prob_threshold_exp = np.exp(median_log_prob)
plt.contourf(
    xgrid.numpy(),
    ygrid.numpy(),
    zgrid,
    levels=[prob_threshold_exp, prob_threshold_exp * 10.00],
    alpha=0.5,
    colors="#DC143C",
)  # 10.00 is an arbitrary huge value to colour the whole distribution.
# plt.scatter(X_train[:, 0], y_train, c="blue", alpha=0.1)

In [None]:
cf = PPCEF(
    gen_model=gen_model,
    disc_model=disc_model,
    disc_model_criterion=RegressionLoss(),
    neptune_run=None,
)
median_log_prob = torch.median(gen_model.predict_log_prob(test_dataloader))
X_cf, X_orig, y_orig, y_target, losses = cf.search_batch(
    test_dataloader,
    alpha=100,
    delta=median_log_prob,
    target=0.2,
    patience=100,
    epochs=2000,
)
# print(X_cf)

In [None]:
plt.figure(figsize=(15, 10))
for i, (loss_name, loss_values) in enumerate(losses.items()):
    plt.subplot(len(losses), 1, i + 1)
    plt.plot(loss_values, label=loss_name)
    plt.legend()

In [None]:
np.mean(np.abs(disc_model.predict(X_cf).numpy() - y_target.reshape(-1)))

In [None]:
np.mean(np.abs(disc_model.predict(X_orig).numpy() - y_target))

In [None]:
(
    gen_model(torch.from_numpy(X_cf), torch.from_numpy(y_target)) > median_log_prob
).float().mean()

In [None]:
# plt.scatter(X_train[:, 1], y_train, c="black", label="train")
plt.scatter(X_test[:, 1], y_test, c="blue", label="test")
plt.scatter(X_cf[:, 1], y_target, c="red", label="cf")

In [None]:
# Plotting the dataset with predictions and adding the linear regression plane
fig_with_plane = plt.figure(figsize=(10, 8))
ax_with_plane = fig_with_plane.add_subplot(111, projection="3d")

# Creating grid for the regression plane
x_grid, y_grid = np.meshgrid(
    np.linspace(min(X_train[:, 0]), max(X_train[:, 0]), 20),
    np.linspace(min(X_train[:, 1]), max(X_train[:, 1]), 20),
)
# Predicting over the grid to plot the plane
z_grid = disc_model.predict(np.c_[x_grid.ravel(), y_grid.ravel()]).reshape(x_grid.shape)

# Plotting the regression plane
# ax_with_plane.plot_surface(x_grid, y_grid, z_grid, alpha=0.5, cmap='coolwarm')

y_cf_pred = disc_model.predict(X_cf)

# Scatter plot using the actual test data with a color gradient
scatter = ax_with_plane.scatter(
    X_train[:, 0],
    X_train[:, 1],
    y_train,
    c=y_train,
    cmap="viridis",
    marker="o",
    alpha=0.2,
)
scatter = ax_with_plane.scatter(
    X_cf[:, 0], X_cf[:, 1], y_cf_pred, c="red", cmap="viridis", marker="o"
)

# Adding color bar to indicate the scale of the target values
cbar = fig_with_plane.colorbar(scatter, ax=ax_with_plane, shrink=0.5, aspect=5)
cbar.set_label("Predicted Target Value")

ax_with_plane.set_xlabel("Feature 1")
ax_with_plane.set_ylabel("Feature 2")
ax_with_plane.set_zlabel("Actual Target")

plt.title("3D Regression Plot with Prediction Plane and Color Gradient")
plt.show()