# Comparing Gradients 
In this notebook, we aim to emphasis the practicality of our framework, by showing how are gradients can also be used in the untruncated regression setting. We do this by showing that our TruncatedUnknownVarianceMSE and TruncatedMSE gradients are equal to the respective MSE gradient in the untruncated setting. Further, we claim that our gradients work in teh untruncated setting by passing in an identitry oracle into our frameowork.

In [1]:
import sys 
sys.path.append('../..')
from cox.utils import Parameters
from cox.store import Store
from cox.readers import CollectionReader
from sklearn.linear_model import LinearRegression
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import config
import numpy as np
import torch as ch
from torch import Tensor
from torch.distributions.normal import Normal
from torch.distributions.multivariate_normal import MultivariateNormal
from torch.distributions import Uniform
from torch.utils.data import DataLoader
from delphi.stats.linear_regression import TruncatedLinearRegression
from delphi import oracle
from delphi import grad
import datetime

# Experiment Hyperparameters

In [56]:
args = Parameters({ 
    'bias': True,
    'num_samples': 100,
    'samples': 10000, 
    'shuffle': True, 
    'tol': 5e-2,
    'momentum': .5,
    'w_lower': -1, 
    'w_upper': 1, 
    'x_lower': -5, 
    'x_upper': 5,
    'var': 1.0,
    'dims': 3,
    'k': 1,
    'eps': 1e-5,
})
config.args = args
print("args: ", args)

args:  {
  "bias": true,
  "num_samples": 100,
  "samples": 10000,
  "shuffle": true,
  "tol": 0.05,
  "momentum": 0.5,
  "w_lower": -1,
  "w_upper": 1,
  "x_lower": -5,
  "x_upper": 5,
  "var": 1.0,
  "dims": 3,
  "k": 1,
  "eps": 1e-05
}


# Gradients of Interest

In [29]:
mse = ch.nn.MSELoss()
trunc_mse = grad.TruncatedMSE.apply 
trunc_unknown_mse = grad.TruncatedUnknownVarianceMSE.apply 

# Helpers

In [74]:
def gradient_similarity(args, grad_1, grad_2, x_trunc, y_trunc):
    """
    grad_1: first gradient
    grad_2: second gradient 
    x_trunc: truncated independent random variable 
    y_trnuc: truncated dependent random variable
    returns: grad_1 and grad_2 gradients with respect to the noised output
    """
    # pass data through model
    model = ch.nn.Linear(in_features=args.dims, out_features=args.k)
    pred = model(x_trunc)
    
    # see if works with lambda
    try: 
        g1_loss = grad_1(pred, y_trunc, Tensor([1.0])[None,...])

#         g1_loss = grad_1(pred, y_trunc, ch.abs(ch.randn(1))[None,...])
    except: 
        g1_loss = grad_1(pred, y_trunc)
    g1_g, = ch.autograd.grad(g1_loss, [pred])
        
    # see if works with lambda
    try: 
        g2_loss = grad_2(pred, y_trunc, ch.abs(ch.randn(1))[None,...])
    except: 
        g2_loss = grad_2(pred, y_trunc)

    g2_g, = ch.autograd.grad(g2_loss, [pred])


    print('Cosine Similarity between grad 1 and grad 2: ',
            ch.nn.CosineSimilarity(dim=1)(g2_g.T, g1_g.T))
    print("----")
    
    return g1_g, g2_g

# Generate Synthetic Data for Comparing Gradients

In [35]:
# distributions to sample from 
U = Uniform(args.w_lower, args.w_upper)
U_ = Uniform(args.x_lower, args.x_upper)

# generate ground truth
gt = ch.nn.Linear(in_features=args.dims, out_features=1, bias=args.bias)
gt.weight = ch.nn.Parameter(U.sample(ch.Size([1, args.dims]))) 
# bias term 
if args.bias: 
    gt.bias = ch.nn.Parameter(U.sample(ch.Size([1, 1])))
# ground-truth data
X = U_.sample(ch.Size([args.samples, args.dims]))
y = gt(X) + ch.sqrt(Tensor([args.var])) * ch.randn(X.size(0), 1)

alpha:  1.0


# Identity Oracle
We first compare our methods in the case where we pass in an identity oracle. We show how the gradients calculate by TruncatedMSE and TruncatedUnknownMSE consistently have a very high cosine similarity to each other. 

In [67]:
config.args.__setattr__('phi', oracle.Identity())
config.args.__setattr__('var', Tensor([1.0]))

filtered = config.args.phi(y)
indices = filtered.nonzero(as_tuple=False).flatten()
x_trunc, y_trunc = X[indices], y[indices]

print("alpha: ", x_trunc.size(0) / X.size(0))

print("Truncated MSE and MSE:")
g_trunc, g_mse = gradient_similarity(config.args, trunc_mse, mse, x_trunc, y_trunc)

print("Truncated Unknown Variance MSE and MSE:")
g_trunc_unknown, g_mse = gradient_similarity(config.args, trunc_unknown_mse, mse, x_trunc, y_trunc)

print("Truncated Unknown Variance MSE and Truncated MSE:")
g_trunc_unknown, g_trunc = gradient_similarity(config.args, trunc_unknown_mse, trunc_mse, x_trunc, y_trunc)


alpha:  1.0
Truncated MSE and MSE:
Cosine Similarity between grad 1 and grad 2:  tensor([0.9998])
----
Truncated Unknown Variance MSE and MSE:
Cosine Similarity between grad 1 and grad 2:  tensor([0.9937])
----
Truncated Unknown Variance MSE and Truncated MSE:
Cosine Similarity between grad 1 and grad 2:  tensor([0.9985])
----


# Untruncated Regression
Now we show how our framework can be used to solve the untruncated rergression problem. 

TODO: change framework for regression so that start at random initial points. Instead calculate empirical estimates in projection set, and then prroject initial estimates to within the projection set in pre-training hook.

# Left Truncation Oracle

In [87]:
config.args.__setattr__('phi', oracle.Left(Tensor([0.0])))
config.args.__setattr__('var', Tensor([1.0]))

filtered = config.args.phi(y)
indices = filtered.nonzero(as_tuple=False).flatten()
x_trunc, y_trunc = X[indices], y[indices]

print("alpha: ", x_trunc.size(0) / X.size(0))

print("Truncated MSE and MSE:")
g_trunc, g_mse = gradient_similarity(config.args, trunc_mse, mse, x_trunc, y_trunc)

print("Truncated Unknown Variance MSE and MSE:")
g_trunc_unknown, g_mse = gradient_similarity(config.args, trunc_unknown_mse, mse, x_trunc, y_trunc)

print("Truncated Unknown Variance MSE and Truncated MSE:")
g_trunc_unknown, g_trunc = gradient_similarity(config.args, trunc_unknown_mse, trunc_mse, x_trunc, y_trunc)


alpha:  0.4904
Truncated MSE and MSE:
Cosine Similarity between grad 1 and grad 2:  tensor([0.9870])
----
Truncated Unknown Variance MSE and MSE:
Cosine Similarity between grad 1 and grad 2:  tensor([0.9513])
----
Truncated Unknown Variance MSE and Truncated MSE:
Cosine Similarity between grad 1 and grad 2:  tensor([0.9989])
----


In [84]:
ch.nn.MSELoss()(g_trunc, g_mse)

tensor(6.0874e-05)

In [88]:
ch.nn.Linear(in_features=1, out_features=1, bias=True)

Linear(in_features=1, out_features=1, bias=True)