In [1]:
import sys 
sys.path.append('../..')
from cox.utils import Parameters
from cox.store import Store
from cox.readers import CollectionReader
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import itertools
import os
import IPython
import numpy as np
import torch as ch
from torch import Tensor
from torch.distributions.normal import Normal
from torch.distributions.multivariate_normal import MultivariateNormal
from torch.distributions import Uniform
from torch.utils.data import DataLoader
from delphi.stats.truncated_regression import truncated_regression
from delphi.oracle import Left, Interval
import delphi.utils.constants as consts

os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'

# Helpers

In [11]:
STORE_PATH = '/home/gridsan/stefanou/Regression/GradSteps'
STORE_TABLE_NAME = 'uniform2'

## Default Experiment Parameters

In [12]:
# regression parameters
left = Left(Tensor([0.0]))

# procedure hyperparameters
args = Parameters({ 
    'epochs': 5,
    'workers': 0, 
    'batch_size': 100,
    'bias': True,
    'num_samples': 1000,
    'samples': 10000, 
    'in_features': 2, 
    'clamp': True, 
    'radius': 5.0, 
    'var_lr': 1e-2,
    'lr': 1e-1,
    'shuffle': False, 
    'eps': 1e-5, 
    'custom_lr_multiplier': consts.COSINE,
    'trials': 10,
    'tol': 1e-2,
})

if ch.cuda.is_available(): 
    args.__setattr__('device', 'cuda')
    print("using cuda...")
else: 
    args.__setattr__('device', 'cpu')
    print("using cpu...")
args
args.__setattr__('phi', left)

using cuda...


In [None]:
# perform each experiment a total of 10 times
for iter_ in range(args.trials):        
    for var in range(1, 20):
        # create store and add table
        store = Store(STORE_PATH)

        store.add_table(STORE_TABLE_NAME, { 
            'known_param_mse': float,
            'unknown_param_mse': float,
            'unknown_var_mse': float,
            'ols_param_mse': float,
            'ols_var_mse': float,
            'alpha': float, 
            'var': float, 
        })
        # generate random uniform weights
        W = Uniform(-1, 1).sample(ch.Size([args.in_features, 1]))
        W0 = Uniform(-1, 1).sample(ch.Size([1, 1]))

        # generate data
#         X = MultivariateNormal(ch.zeros(DIMS), ch.eye(DIMS)/DIMS).sample(ch.Size([NUM_SAMPLES]))
        X = Uniform(-5, 5).sample(ch.Size([args.samples, args.in_features]))
        y = X.mm(W) + W0 + Normal(ch.zeros(1), ch.sqrt(Tensor([var]))).sample(ch.Size([args.samples]))
        # truncate
        indices = args.phi(y).nonzero(as_tuple=False).flatten()
        y_trunc, x_trunc = y[indices], X[indices]

        # empirical linear regression
        OLS = LinearRegression() 
        OLS.fit(x_trunc, y_trunc)
        OLS_VAR = ch.var(ch.from_numpy(OLS.predict(x_trunc)) - y_trunc, dim=0).unsqueeze(0)
        # empirical varince
        args.__setattr__('ols_var', OLS_VAR)
        ALPHA = Tensor([y_trunc.size(0)/args.samples])
        # set survival probability
        args.__setattr__('alpha', ALPHA)

        # truncated linear regression with known noise variance
        trunc_reg = truncated_regression(phi=args.phi, alpha=args.alpha, args=args, bias=args.bias, var=args.ols_var)
        results = trunc_reg.fit(x_trunc, y_trunc)
        w_, w0_ = results.weight.detach().cpu(), results.bias.detach().cpu()

        # truncated linear regression with unknown noise variance
        trunc_reg = truncated_regression(phi=args.phi, alpha=args.alpha, args=args, bias=args.bias)
        results = trunc_reg.fit(x_trunc, y_trunc)
        var_ = results.lambda_.inverse().detach()
        w, w0 = (results.v.detach()*var_).cpu(), (results.bias.detach()*var_).cpu()

        # parameter estimates 
        real_params = ch.cat([W, W0])
        ols_params = ch.cat([Tensor(OLS.coef_).flatten(), Tensor(OLS.intercept_)])
        unknown_params = ch.cat([w, w0])
        known_params = ch.cat([w_.t(), w0_.unsqueeze(0)])
        
        # metrics
        unknown_param_mse = mean_squared_error(unknown_params, real_params)
        unknown_var_mse = mean_squared_error(var_, Tensor([var]))
        ols_param_mse = mean_squared_error(ols_params, real_params)
        ols_var_mse = mean_squared_error(args.ols_var, Tensor([var]))
        known_param_mse = mean_squared_error(known_params, real_params)

        store[STORE_TABLE_NAME].append_row({ 
            'known_param_mse': known_param_mse,
            'unknown_param_mse': unknown_param_mse,
            'unknown_var_mse': unknown_var_mse,
            'ols_param_mse': ols_param_mse,
            'ols_var_mse': ols_var_mse,
            'alpha': float(args.alpha.flatten()),
            'var': float(var), 
        })
        
        IPython.display.clear_output(wait=False)

        # close current store
        store.close()

Logging in: /home/gridsan/stefanou/Regression/GradSteps/bf27654d-3027-4fc9-b44c-7effea373c19
==> Preparing dataset truncated_regression..


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))

# Results

In [10]:
reader = CollectionReader(STORE_PATH)
results = reader.df(STORE_TABLE_NAME)
results.head()
reader.close() # close reader

 43%|████▎     | 373/860 [00:28<00:37, 12.99it/s]


OSError: HDF5 error back trace

  File "H5F.c", line 509, in H5Fopen
    unable to open file
  File "H5Fint.c", line 1400, in H5F__open
    unable to open file
  File "H5Fint.c", line 1700, in H5F_open
    unable to read superblock
  File "H5Fsuper.c", line 623, in H5F__super_read
    truncated file: eof = 96, sblock->base_addr = 0, stored_eof = 2048

End of HDF5 error back trace

Unable to open/create file '/home/gridsan/stefanou/Regression/Var20_200Trials/327809b2-d5af-48a2-9d19-be31346bbb32/store.h5'

# All Trials

In [None]:
# variance vs param mse
sns.lineplot(data=results, x='var', y='unknown_param_mse', label='unknown', color='blue')
sns.lineplot(data=results, x='var', y='known_param_mse', label='known', color='purple')
ax = sns.lineplot(data=results, x='var', y='ols_param_mse', label='ols', color='red')
ax.set(xlabel='Noise Variance', ylabel='Squared Error Theta*')
plt.show()

# var vs var mse
ax = sns.lineplot(data=results, x='var', y='unknown_var_mse', label='unknown', color='blue')
ax = sns.lineplot(data=results, x='var', y='ols_var_mse', label='ols', color='red')
ax.set(xlabel='Noise Variance', ylabel='Squared Error Sigma^2*')
plt.show()

In [7]:
len(results)

37