In [2]:
import tqdm
import torch
import numpy as np
import pandas as pd
import plotly.express as px

In [3]:
def get_stats(x_0, x, theta, lamb):
    f_x = torch.nn.Sigmoid()(torch.matmul(x, theta[:-1]) + theta[[-1]])
    bce_loss = torch.nn.BCELoss()(f_x, torch.ones(1).float()).detach()
    cost = torch.dist(x, x_0, 1).detach()
    price = (bce_loss + lamb*cost).detach()
    
    return bce_loss, cost, price

In [4]:
def display(x_0: torch.Tensor = None, theta_0: torch.Tensor = None, x: torch.Tensor = None, theta: torch.Tensor = None, alpha: float = None, lamb: float = None, ):
    if theta_0 != None:
        bce_loss, cost, price = get_stats(x_0, x_0, theta_0, lamb)
        
        print(f'x_0           : {x_0.round(decimals=4)}')
        print(f'theta_0       : {theta_0.round(decimals=4)}')
        print(f'alpha         : {alpha}')
        print(f'lambda        : {lamb}')
        print(f'Cost          : {cost:.4f}')
        print(f'BCE Loss      : {bce_loss:.4f}')
        print(f'Total Cost    : {price:.4f}\n')
    
    if x != None and theta != None:
        bce_loss, cost, price = get_stats(x_0, x, theta, lamb)
        
        print(f'x_r           : {x.round(decimals=4)}')
        print(f'theta_r       : {theta.round(decimals=4)}')
        print(f'alpha         : {alpha}')
        print(f'lambda        : {lamb}')
        print(f'Cost          : {cost:.4f}')
        print(f'BCE Loss      : {bce_loss:.4f}')
        print(f'Total Cost    : {price:.4f}')

In [5]:
def calc_theta_r(x: torch.Tensor, theta_0: torch.Tensor, alpha: float):
    theta_r = theta_0.clone()
    x_t = torch.cat([x.clone(), torch.ones(1)], 0)
    i = torch.argmax(torch.abs(x_t))
    
    # if multiple x values tie for max value
    # pick the one where theta has the max value (hurts the most)
    for j in range(len(x_t)):
        if abs(x_t[j]) == abs(x_t[i]):
            if abs(theta_r[j]) >= abs(theta_r[i]):
                i = j
    theta_r[i] -= (alpha * torch.sign(x_t[i]))
    
    return theta_r.detach().float()

def calc_theta_r2(x: torch.Tensor, theta_0: torch.Tensor, alpha: float):
    x_t = torch.cat([x.clone(), torch.ones(1)], 0)
    theta_t = theta_0 - (alpha * torch.sign(x_t))
    
    f_x = torch.nn.Sigmoid()(torch.mul(x_t, theta_t))
    bce_loss = torch.nn.BCELoss(reduction='none')(f_x, torch.ones(f_x.shape)).detach()
    
    i = torch.argmax(bce_loss)
    theta_r = theta_0.clone()
    theta_r[i] -= (alpha * torch.sign(x_t[i]))
    return theta_r

In [8]:
def get_recourse(x_0: torch.Tensor, theta_0: torch.Tensor, alpha: float, lamb: float):
    x_r = x_0.clone().requires_grad_(True)
    theta_r = theta_0.clone()
    optimizer = torch.optim.Adam([x_r])

    abstol = 1e-7
    loss = torch.tensor(1.)
    loss_diff = 1.

    while loss_diff > abstol:
        loss_prev = loss.clone().detach()
        optimizer.zero_grad()
        
        theta_r = calc_theta_r(x_r, theta_0, alpha)
        
        f_x = torch.nn.Sigmoid()(torch.matmul(x_r, theta_r[:-1]) + theta_r[[-1]])
        bce_loss = torch.nn.BCELoss()(f_x, torch.ones(1))
        cost = torch.dist(x_0, x_r, 1)
        loss = bce_loss + lamb*cost
        
        loss.backward()
        optimizer.step()
        
        loss_diff = torch.dist(loss_prev, loss, 1)

    return x_r.detach()

In [9]:
def search_recourse(x_0: torch.Tensor, theta_0: torch.Tensor, alpha: float, lamb: float, n: int = 1001, return_grid: bool = False):
    dx, dy = [-abs(x_0[0]), 2*abs(x_0[0])], [-abs(x_0[1]), 2*abs(x_0[1])]
    delta_x = [
            np.arange(x_0[0].item() + dx[0].item(), x_0[0].item() + dx[1].item(), 0.001), 
            np.arange(x_0[1].item() + dy[0].item(), x_0[1].item() + dy[1].item(), 0.001)]
    X = np.array(np.meshgrid(*delta_x)).T.reshape(-1, 2)
    X = torch.from_numpy(X).float()

    results = {'x_r': [], 'theta_r': [], 'bce_loss': [], 'cost': [], 'total_cost': []}
    for x in tqdm.tqdm(X):
        theta_r = calc_theta_r(x, theta_0, alpha)
        bce_loss, cost, total_cost = get_stats(x_0, x, theta_r, lamb)
        
        results['x_r'].append(x)
        results['theta_r'].append(theta_r)
        results['bce_loss'].append(bce_loss.item())
        results['cost'].append(cost.item())
        results['total_cost'].append(total_cost.item())
        
    results = pd.DataFrame(results)
    i = np.argmin(results['total_cost'])
    results_min = results.iloc[i]
    if return_grid:
        return results_min['x_r'], results
    return results_min['x_r']

### Case 1. $\theta$ cannot switch signs

In [10]:
alpha = 0.1
lamb = 1.
x_0 = torch.tensor([-1.5, -2.5]).float()
theta_0 = torch.tensor([1., 2., .2]).float()

display(x_0, theta_0, alpha=alpha, lamb=lamb)

x_0           : tensor([-1.5000, -2.5000])
theta_0       : tensor([1.0000, 2.0000, 0.2000])
alpha         : 0.1
lambda        : 1.0
Cost          : 0.0000
BCE Loss      : 6.3018
Total Cost    : 6.3018



In [11]:
x_r = get_recourse(x_0, theta_0, alpha, lamb)
theta_r = calc_theta_r(x_r, theta_0, alpha)

print('Gradient Descent Result\n')
display(x_0, theta_0, x_r, theta_r, alpha, lamb)

Gradient Descent Result

x_0           : tensor([-1.5000, -2.5000])
theta_0       : tensor([1.0000, 2.0000, 0.2000])
alpha         : 0.1
lambda        : 1.0
Cost          : 0.0000
BCE Loss      : 6.3018
Total Cost    : 6.3018

x_r           : tensor([-1.4996,  0.7011])
theta_r       : tensor([1.1000, 2.0000, 0.2000])
alpha         : 0.1
lambda        : 1.0
Cost          : 3.2014
BCE Loss      : 0.7172
Total Cost    : 3.9186


In [12]:
x_r = search_recourse(x_0, theta_0, alpha, lamb)
theta_r = calc_theta_r(x_r, theta_0, alpha)

print('Grid Search Result\n')
display(x_0, theta_0, x_r, theta_r, alpha, lamb)

RuntimeError: dot : expected both vectors to have same dtype, but found Double and Float

In [None]:
x_t = x_r.clone()
x_t[0] = -1.5
x_t[1] = 0.7
theta_t = calc_theta_r(x_t, theta_0, alpha)
display(x_0, None, lamb, x_t, theta_t)

### Case 2. $\theta$ can change sign

In [None]:
alpha = 0.5
lamb = 1.
x_0 = torch.tensor([-.15, -2.5]).float()
theta_0 = torch.tensor([1., 2., .2]).float()

display(x_0, theta_0, lamb)

In [None]:
x_r = get_recourse(x_0, theta_0, alpha, lamb)
theta_t = calc_theta_r(x_r, theta_0, alpha)

display(x_0, theta_0, lamb, x_r, theta_t)

In [None]:
x_t = x_r.clone()
x_t[0] = -0.15
x_t[1] = 0.0557
theta_t = calc_theta_r(x_t, theta_0, alpha)
display(x_0, None, lamb, x_t, theta_t)