# DGP

We generate $n$ samples from the data generating process:

\begin{align}
z \sim & N(\mu=0, \sigma=I_d)\\
v \sim & N(\mu=0, \sigma=I_d)\\
x = & \gamma z + v\\
y \sim & N(\mu=\langle x + 4 \nu, \theta\rangle, \sigma=1)
\end{align}

where $z$ is a $d$-dimensional instrument, $x$ is a $d$-dimensional treatment, $v$ is an unobserved confounder. Each instrument $z_i$ is an instrument for treatment $x_i$. The coefficient $\theta$ is $s$-sparse. The parameter $\gamma$ controls the strength of the instrument.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import warnings
warnings.simplefilter("ignore")
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso, ElasticNetCV, ElasticNet, LassoCV
from mliv.linear import TSLasso, OptimisticHedgeVsOptimisticHedge, StochasticOptimisticHedgeVsOptimisticHedge,\
                        ProxGradientVsHedge, SubGradientVsHedge, L2OptimisticHedgeVsOGD, L2ProxGradient,\
                        L2SubGradient
from mliv.linear.utilities import cross_product

In [None]:
np.random.seed(12)
n = 600
d = 1000
s = 2
gamma = 1
true_coefs = np.zeros(d)
true_coefs[:s] = (2*np.random.binomial(1, .5, size=s)-1) #np.random.uniform(-1, 1, size=s)
z = np.random.normal(0, 1, size=(n, d))
v = np.random.normal(0, 1, size=(n, d))
Gamma = gamma * np.eye(d)
x = z @ Gamma + v
#x[:, :s] = gamma * z[:, :s] - v[:, :s]
y = np.dot(x + v, true_coefs)#+ np.random.normal(0, 1, size=(n,))
print(true_coefs[:s])
plt.hist(x[:, s])
plt.hist(x[:, 1])
plt.show()
z_test = np.random.normal(0, 1, size=(n, d))
v_test = np.random.normal(0, 1, size=(n, 1))
x_test = z_test @ Gamma + v_test

In [None]:
direct = LassoCV(cv=3).fit(x, y)
direct_coefs = direct.coef_
print("Estimated non-zero coefs: ", direct_coefs[:s])
print("ell2 error:", np.linalg.norm(direct_coefs-true_coefs, ord=2))
print("RMSE:", np.sqrt(np.mean(np.dot(x_test, direct_coefs - true_coefs)**2)))
plt.plot(direct_coefs)
plt.show()

In [None]:
gamma = np.min(np.linalg.eigvalsh(Gamma.T @ Gamma))
gamma/(8*s)

# Two Stage Lasso

In [None]:
tslasso = TSLasso(first_stage=Lasso(alpha=.01)).fit(z, x, y)
tslasso_coefs = tslasso.coef_
print("Estimated non-zero coefs: ", tslasso_coefs[:s])
print("ell2 error:", np.linalg.norm(tslasso_coefs-true_coefs, ord=2))
print("RMSE:", np.sqrt(np.mean(np.dot(x_test, tslasso_coefs - true_coefs)**2)))
plt.plot(tslasso_coefs)
plt.show()

In [None]:
est = OptimisticHedgeVsOptimisticHedge(B=3, lambda_theta=gamma/(8*s),
                                       eta_theta=.5,
                                       eta_w=.5,
                                       n_iter=10000, tol=.0001, sparsity=None).fit(z, x, y)
coefs = est.coef
print("Maximum violation: ", est.max_violation_)
print("Estimated non-zero coefs: ", coefs[:s])
print("ell2 error:", np.linalg.norm(coefs-true_coefs, ord=2))
print("RMSE:", np.sqrt(np.mean(np.dot(x_test, coefs - true_coefs)**2)))
print("Min/Max: ", est.min_response_loss_, est.max_response_loss_)
print("Dualit Gap: ", est.duality_gap_)
print("Iters: ", est.n_iters_)
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(est.coef_)
plt.subplot(1,2,2)
plt.plot(est.w_)
plt.show()

In [None]:
stochest = StochasticOptimisticHedgeVsOptimisticHedge(B=3, lambda_theta=gamma/(8*s),
                                       eta_theta=.1, eta_w=.1,
                                       n_iter=1000, tol=1/n).fit(z, x, y)
stochcoefs = stochest.coef
print("Maximum violation: ", stochest.max_violation_)
print("Estimated non-zero coefs: ", stochcoefs[:s])
print("ell2 error:", np.linalg.norm(stochcoefs-true_coefs, ord=2))
print("RMSE:", np.sqrt(np.mean(np.dot(x_test, stochcoefs - true_coefs)**2)))
print("Min/Max: ", stochest.min_response_loss_, stochest.max_response_loss_)
print("Dualit Gap: ", stochest.duality_gap_)
print("Iters: ", stochest.n_iters_)
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(stochest.coef_)
plt.subplot(1,2,2)
plt.plot(stochest.w_)
plt.show()

In [None]:
plt.scatter(true_coefs, direct_coefs, label='Lasso')
plt.scatter(true_coefs, tslasso_coefs, label='2SLasso')
plt.scatter(true_coefs, coefs, label='SparseIV')
plt.scatter(true_coefs, stochcoefs, label='StochasticSparseIV')
plt.plot(np.linspace(np.min(true_coefs), np.max(true_coefs), 10),
         np.linspace(np.min(true_coefs), np.max(true_coefs), 10), '--', label='x=y')
plt.xlabel('true coefficient')
plt.ylabel('estimated coefficient')
plt.legend()
plt.savefig('true_v_est_sparse_linear.png')
plt.show()

In [None]:
plt.plot(stochest.w_)
plt.xlabel('variable')
plt.ylabel('dual parameter')
plt.savefig('duals.png')
plt.show()

In [None]:
print("ell2 error:", np.linalg.norm(coefs-true_coefs, ord=np.inf))

### Optimistic Proximal Gradient vs Optimistic MWU

In [None]:
est = ProxGradientVsHedge(B=3, lambda_theta=gamma/(8*s),
                          eta_theta=1, eta_w=1,
                          n_iter=10000, tol=1/n).fit(z, x, y)
coefs = est.coef
print("Maximum violation: ", est.max_violation_)
print("Estimated non-zero coefs: ", coefs[:s])
print("ell2 error:", np.linalg.norm(coefs-true_coefs, ord=2))
print("RMSE:", np.sqrt(np.mean(np.dot(x_test, coefs - true_coefs)**2)))
print("Min/Max: ", est.min_response_loss_, est.max_response_loss_)
print("Dualit Gap: ", est.duality_gap_)
print("Iters: ", est.n_iters_)
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(coefs)
plt.subplot(1,2,2)
plt.plot(est.w_)
plt.show()

### Simultaneous Descent: Subgradient Descent vs MWU


In [None]:
est = SubGradientVsHedge(B=3, lambda_theta=gamma/(8*s),
                         eta_theta='auto', eta_w='auto',
                         n_iter=10000, tol=1/n**(2/3)).fit(z, x, y)
coefs = est.coef
print("Maximum violation: ", est.max_violation_)
print("Estimated non-zero coefs: ", coefs[:s])
print("ell2 error:", np.linalg.norm(coefs-true_coefs, ord=2))
print("RMSE:", np.sqrt(np.mean(np.dot(x_test, coefs - true_coefs)**2)))
print("Min/Max: ", est.min_response_loss_, est.max_response_loss_)
print("Dualit Gap: ", est.duality_gap_)
print("Iters: ", est.n_iters_)
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(coefs)
plt.subplot(1,2,2)
plt.plot(est.w_)
plt.show()

# L2 Adversary

In [None]:
#np.random.seed(1456)
n = 5000
d_x = 10
d_z = 100
s = 2
gamma = 1
true_coefs = np.zeros(d_x)
true_coefs[1] = 2*(2*np.random.binomial(1, .5,)-1)
sigma_z = 1
z = np.random.normal(0, sigma_z, size=(n, d_z))
v = np.random.normal(0, 1, size=(n, 1))
Gamma = np.random.normal(0, 1, size=(d_z, d_x))
Gamma[d_z//2:, 0] = 0
Gamma[:d_z//2, 1] = 0
Gamma /= np.linalg.norm(Gamma, ord=2, axis=0, keepdims=True)
x = z @ Gamma + v
y = np.dot(x, true_coefs) + v[:, 0] #+ np.random.normal(0, 1, size=(n,))
print(true_coefs[:s])

z_test = np.random.normal(0, 1, size=(n, d_z))
v_test = np.random.normal(0, 1, size=(n, 1))
x_test = z_test @ Gamma + v_test

In [None]:
gamma = np.min(np.linalg.eigvalsh(Gamma.T @ Gamma)) * (sigma_z**2)
gamma

In [None]:
est = TSLasso(first_stage=ElasticNet(l1_ratio=.05, alpha=0.001)).fit(z, x, y)
coefs = est.coef_
print("Estimated non-zero coefs: ", coefs[:s])
print("ell2 error:", np.linalg.norm(coefs-true_coefs, ord=2))
print("RMSE:", np.sqrt(np.mean(np.dot(x_test, coefs - true_coefs)**2)))
plt.plot(coefs)
plt.show()

In [None]:
est = L2OptimisticHedgeVsOGD(B=3, tol=1/n, lambda_theta=gamma/(8*s),
                     n_iter=10000, eta_theta=.1, eta_w=.1, sparsity=None).fit(z, x, y)
coefs = est.coef
print("Maximum violation: ", est.max_violation_)
print("Estimated non-zero coefs: ", coefs[:s])
print("ell2 error:", np.linalg.norm(coefs-true_coefs, ord=2))
print("RMSE:", np.sqrt(np.mean(np.dot(x_test, coefs - true_coefs)**2)))
print("Min/Max: ", est.min_response_loss_, est.max_response_loss_)
print("Dualit Gap: ", est.duality_gap_)
print("Iterations: ", est.n_iters_)
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(coefs)
plt.subplot(1,2,2)
plt.plot(est.w_)
plt.show()

In [None]:
est = L2ProxGradient(B=2, tol=.0001, lambda_theta=.0001,
                     n_iter=10000).fit(z, x, y)
coefs = est.coef
print("Maximum violation: ", est.max_violation_)
print("Estimated non-zero coefs: ", coefs[:s])
print("ell2 error:", np.linalg.norm(coefs-true_coefs, ord=2))
print("RMSE:", np.sqrt(np.mean(np.dot(x_test, coefs - true_coefs)**2)))
print("Min/Max: ", est.min_response_loss_, est.max_response_loss_)
print("Dualit Gap: ", est.duality_gap_)
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(coefs)
plt.subplot(1,2,2)
plt.plot(est.w_)
plt.show()

In [None]:
est = L2SubGradient(B=2, lambda_theta=gamma/(8*s), n_iter=1000).fit(z, x, y)
coefs = est.coef
print("Maximum violation: ", est.max_violation_)
print("Estimated non-zero coefs: ", coefs[:s])
print("ell2 error:", np.linalg.norm(coefs-true_coefs, ord=2))
print("RMSE:", np.sqrt(np.mean(np.dot(x_test, coefs - true_coefs)**2)))
print("Min/Max: ", est.min_response_loss_, est.max_response_loss_)
print("Dualit Gap: ", est.duality_gap_)
plt.plot(coefs)
plt.show()