In [None]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.simplefilter('ignore')

In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np
from mliv.shape import ShapeIV, LipschitzShapeIV
from mliv.shape.curve_fit import project_convex_lip
import matplotlib.pyplot as plt
from sklearn.isotonic import IsotonicRegression

In [None]:
def plot_est_vs_true(est, ind, T_test, T_train, true_fn, fname=None):
    sort_inds = np.argsort(T_test[:, ind])
    plt.plot(T_test[sort_inds, ind], est.predict(T_test[sort_inds]), label='est')
    plt.plot(T_test[sort_inds, ind], true_fn(T_test[sort_inds]), '--', label='true')
    RMSE = np.sqrt(np.mean((est.predict(T_train).flatten() - true_fn(T_train).flatten())**2))
    R2 = 1 - RMSE**2 / np.var(true_fn(T_train).flatten())
    plt.title("RMSE on Train: {:.3f}, "
              "R2 on train: {:.2f}".format(RMSE, R2))
    plt.legend()
    if fname is not None:
        plt.savefig(fname)
    plt.show()

# ShapeIV  Small TV

In [None]:
a = .4
theta = .2
n_z = 1

def true_fn(T):
    return np.abs(T[:, [0]])

n = 2000
Z = np.random.normal(0, 2, size=(n, n_z))
U = np.random.normal(0, 2, size=(n, 1))
delta = np.random.normal(0, .1, size=(n, 1))
zeta = np.random.normal(0, .1, size=(n, 1))
T = (1 - a) * Z + a * U + delta
Y = true_fn(T) + U + zeta

x_grid = np.linspace(np.quantile(T[:, 0], .01), np.quantile(T[:, 0], .99), 100)
T_test = np.zeros((100, n_z))
T_test[:, 0] = x_grid

plt.figure(figsize=(10,3))
plt.subplot(1, 2, 1)
plt.scatter(Z[:, 0], Y)
plt.subplot(1, 2, 2)
plt.scatter(T[:, 0], Y)
plt.plot(T[np.argsort(T[:, 0]), 0], true_fn(T[np.argsort(T[:, 0])]))
plt.show()

In [None]:
n_iter = 1000
eta_theta = 2/np.sqrt(n_iter)
eta_w = 2/np.sqrt(n_iter)
y_min = 0
y_max = 5
lambda_w = 2

In [None]:
est = ShapeIV(n_iter=n_iter, eta_theta=eta_theta, eta_w=eta_w,
                 y_min=y_min, y_max=y_max, lambda_w=lambda_w).fit(Z, T, Y)

plot_est_vs_true(est, 0, T_test, T, true_fn)

In [None]:
est = LipschitzShapeIV(L=None, convexity=None, n_iter=n_iter, eta_theta=eta_theta, eta_w=eta_w,
                       y_min=y_min, y_max=y_max, lambda_w=lambda_w,
                       n_projection_subsamples=50, max_projection_iters=100).fit(Z, T, Y)

plot_est_vs_true(est, 0, T_test, T, true_fn)

### Adding Lipschitz constraint

In [None]:
n_iter = 1000
eta_theta = 2/np.sqrt(n_iter)
eta_w = 2/np.sqrt(n_iter)
y_min = -5
y_max = 5
lambda_w = 2

In [None]:
est = LipschitzShapeIV(L=1, convexity=None, n_iter=n_iter, eta_theta=eta_theta, eta_w=eta_w,
                       y_min=y_min, y_max=y_max, lambda_w=lambda_w,
                       n_projection_subsamples=50, max_projection_iters=100).fit(Z, T, Y)

plot_est_vs_true(est, 0, T_test, T, true_fn)

### Adding Lipschitz and Convex Constraint

In [None]:
est = LipschitzShapeIV(L=1, convexity='convex', n_iter=n_iter, eta_theta=eta_theta, eta_w=eta_w,
                       y_min=y_min, y_max=y_max, lambda_w=lambda_w,
                       n_projection_subsamples=50, max_projection_iters=20).fit(Z, T, Y)

plot_est_vs_true(est, 0, T_test, T, true_fn)

In [None]:
plt.plot(T[np.argsort(T[:,0]), 0], project_convex_lip(T, Y, convexity='convex', ymin=y_min, ymax=y_max,
                                                      monotone=None, L=1, n_subsamples=50)[np.argsort(T[:,0])],
         '--',
         label='y ~ x')
plt.plot(T[np.argsort(T[:,0]), 0], est.predict(T[np.argsort(T[:,0])]), label='est')
plt.plot(T[np.argsort(T[:,0]), 0], true_fn(T[np.argsort(T[:,0])]), '--', label='true')
plt.title("RMSE on Train: {:.3f}".format(np.sqrt(np.mean((est.predict(T).flatten()\
                                                              - true_fn(T).flatten())**2))))
plt.legend()
plt.show()

In [None]:
1 - np.mean((est.predict(T_test).flatten() - true_fn(T_test).flatten())**2)/np.var(true_fn(T_test))

# ShapeIV Concave

In [None]:
a = .5
theta = .2
n_z = 1

def true_fn(T):
    return -np.abs(T[:, [0]])

n = 1000
Z = np.random.normal(0, 2, size=(n, n_z))
U = np.random.normal(0, 2, size=(n, 1))
delta = np.random.normal(0, .1, size=(n, 1))
zeta = np.random.normal(0, .1, size=(n, 1))
T = (1 - a) * Z + a * U + delta
Y = true_fn(T) + U + zeta

x_grid = np.linspace(np.quantile(T[:, 0], .01), np.quantile(T[:, 0], .99), 100)
T_test = np.zeros((100, n_z))
T_test[:, 0] = x_grid

plt.figure(figsize=(10,3))
plt.subplot(1, 2, 1)
plt.scatter(Z[:, 0], Y)
plt.subplot(1, 2, 2)
plt.scatter(T[:, 0], Y)
plt.plot(T[np.argsort(T[:, 0]), 0], true_fn(T[np.argsort(T[:, 0])]))
plt.show()

In [None]:
n_iter = 5000
eta_theta = 2/np.sqrt(n_iter)
eta_w = 2/np.sqrt(n_iter)
y_min = -5
y_max = 1
lambda_w = 2

est = LipschitzShapeIV(L=1, convexity='concave', n_iter=n_iter, eta_theta=eta_theta, eta_w=eta_w,
                       y_min=y_min, y_max=y_max, lambda_w=lambda_w,
                       n_projection_subsamples=50, max_projection_iters=20).fit(Z, T, Y)

plot_est_vs_true(est, 0, T_test, T, true_fn)

In [None]:
plt.plot(T[np.argsort(T[:,0]), 0], project_convex_lip(T, Y, convexity='concave',
                                                      monotone=None, L=1, n_subsamples=50)[np.argsort(T[:,0])])
plt.plot(T[np.argsort(T[:,0]), 0], est.predict(T[np.argsort(T[:,0])]))
plt.plot(T[np.argsort(T[:,0]), 0], true_fn(T[np.argsort(T[:,0])]))
plt.show()

# ShapeIV Increasing

In [None]:
a = .4
theta = .2
n_z = 1

def true_fn(T):
    return (T[:, [0]]**2/2) * (T[:, [0]]>0) 

n = 1000
Z = np.random.normal(0, 2, size=(n, n_z))
U = np.random.normal(0, 2, size=(n, 1))
delta = np.random.normal(0, .1, size=(n, 1))
zeta = np.random.normal(0, .1, size=(n, 1))
T = (1 - a) * Z + a * U + delta
Y = true_fn(T) + U + zeta

x_grid = np.linspace(np.quantile(T[:, 0], .05), np.quantile(T[:, 0], .95), 100)
T_test = np.zeros((100, n_z))
T_test[:, 0] = x_grid

plt.figure(figsize=(10,3))
plt.subplot(1, 2, 1)
plt.scatter(Z[:, 0], Y)
plt.subplot(1, 2, 2)
plt.scatter(T[:, 0], Y)
plt.plot(T[np.argsort(T[:, 0]), 0], true_fn(T[np.argsort(T[:, 0])]))
plt.show()

In [None]:
iso = IsotonicRegression(y_min=-5, y_max=10, increasing=True).fit(T.flatten(), Y.flatten())
plt.plot(T_test.flatten(), iso.predict(T_test.flatten()), label='est')
plt.plot(T_test.flatten(), true_fn(T_test), '--', label='true')
RMSE = np.sqrt(np.mean((iso.predict(T.flatten()) - true_fn(T).flatten())**2))
R2 = 1 - RMSE**2 / np.var(true_fn(T).flatten())
plt.title("RMSE on Train: {:.3f}, "
              "R2 on train: {:.2f}".format(RMSE, R2))
plt.legend()
plt.savefig('iso_direct.png')
plt.show()

In [None]:
n_iter = 1000
eta_theta = .05
eta_w = .05
lambda_w = 2

In [None]:
est = ShapeIV(n_iter=n_iter, eta_theta=eta_theta, eta_w=eta_w,
                 y_min=-5, y_max=10, lambda_w=lambda_w,
                 monotonic='increasing').fit(Z, T, Y)

plot_est_vs_true(est, 0, T_test, T, true_fn, fname='iso_iv.png')

In [None]:
est = LipschitzShapeIV(L=8, convexity=None, n_iter=100, eta_theta=eta_theta, eta_w=eta_w,
                       y_min=-5, y_max=10, lambda_w=lambda_w, monotonic='increasing',
                       n_projection_subsamples=50, max_projection_iters=20)
est.fit(Z, T, Y)

plot_est_vs_true(est, 0, T_test, T, true_fn, fname='iso_lip_iv.png')

# ShapeIV Decreasing

In [None]:
a = .5
endog_strenth = 2
n_z = 1

def true_fn(T):
    return np.abs(T[:, [0]]) * (T[:, [0]]<0)

n = 100
Z = np.random.normal(0, 2, size=(n, n_z))
U = np.random.normal(0, .5, size=(n, 1))
T = a*Z + U
Y = true_fn(T) - endog_strenth*U

x_grid = np.linspace(np.quantile(T[:, 0], .05), np.quantile(T[:, 0], .95), 100)
T_test = np.zeros((100, n_z))
T_test[:, 0] = x_grid

plt.figure(figsize=(10,3))
plt.subplot(1, 2, 1)
plt.scatter(Z[:, 0], Y)
plt.subplot(1, 2, 2)
plt.scatter(T[:, 0], Y)
plt.plot(T[np.argsort(T[:, 0]), 0], true_fn(T[np.argsort(T[:, 0])]))
plt.show()

np.mean(true_fn(T))

In [None]:
iso = IsotonicRegression(y_min=0, y_max=4, increasing=False, out_of_bounds='clip').fit(T.flatten(), Y.flatten())
plt.plot(T_test.flatten(), iso.predict(T_test.flatten()), label='est')
plt.plot(T_test.flatten(), true_fn(T_test), '--', label='true')
plt.title("RMSE on Train: {:.3f}".format(np.sqrt(np.mean((iso.predict(T.flatten()) - true_fn(T).flatten())**2))))
plt.legend()
plt.show()
np.mean(iso.predict(T.flatten()))

In [None]:
n_iter = 1000
eta_theta = .1
eta_w = .1
lambda_w = 2

In [None]:
est = ShapeIV(n_iter=n_iter, eta_theta=eta_theta, eta_w=eta_w,
                 y_min=0, y_max=4, lambda_w=lambda_w,
                 monotonic='decreasing').fit(Z, T, Y)

plot_est_vs_true(est, 0, T_test, T, true_fn)

### Adding Convexity and Lipschitz Constraints

In [None]:
est = LipschitzShapeIV(L=4, convexity='convex', n_iter=n_iter, eta_theta=eta_theta, eta_w=eta_w,
                       y_min=0, y_max=4, lambda_w=lambda_w, monotonic='decreasing',
                       n_projection_subsamples=50, max_projection_iters=20).fit(Z, T, Y)

plot_est_vs_true(est, 0, T_test, T, true_fn)

In [None]:
def proj_true_fn(z):
    U = np.random.uniform(-.2, .2, size=(n, 1, 100))
    return np.mean([true_fn(a*z + U[:, :, i]) for i in range(100)], axis=0)
def est_proj_true_fn(z):
    T = a*z.reshape(n, -1, 1) + np.random.uniform(-.2, .2, size=(n, 1, 100))
    return np.mean([est.predict(T[:, :, i]) for i in range(100)], axis=0)

plt.figure(figsize=(6,6))
plt.plot(Z[np.argsort(Z[:, 0]), 0], proj_true_fn(Z[np.argsort(Z[:, 0])]), label='Th0')
plt.plot(Z[np.argsort(Z[:, 0]), 0], est_proj_true_fn(Z[np.argsort(Z[:, 0])]), label='Th')
plt.plot(Z[np.argsort(Z[:, 0]), 0], est.predict(Z[np.argsort(Z[:, 0])]), label='h')
plt.plot(Z[np.argsort(Z[:, 0]), 0], true_fn(Z[np.argsort(Z[:, 0])]), label='h0')
plt.plot(Z[np.argsort(Z[:, 0]), 0], LinearRegression().fit(T, Y).predict(Z[np.argsort(Z[:, 0])]), label='lr')
plt.plot(Z[np.argsort(Z[:, 0]), 0], iso.predict(Z[np.argsort(Z[:, 0]), 0]), label='iso')
plt.legend()
plt.show()