In [1]:
import sys 
sys.path.append('../..')
from cox.utils import Parameters
from cox.store import Store
from cox.readers import CollectionReader
from sklearn.linear_model import LogisticRegression
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import itertools
import numpy as np
import os
import IPython
import torch as ch
from torch import Tensor
from torch.distributions import Uniform
from torch.distributions.transforms import SigmoidTransform
from torch.distributions.transformed_distribution import TransformedDistribution
from torch.utils.data import DataLoader
from delphi.stats.truncated_logistic_regression import truncated_logistic_regression
from delphi.oracle import Left, Interval
import delphi.utils.constants as consts
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'

if ch.cuda.is_available(): 
    ch.set_default_tensor_type(ch.cuda.FloatTensor)
    print("cuda tensors...")

cuda tensors...


# Helpers

In [18]:
# EXP_NAME = 'TruncatedLogisticRegressionWeightDecay'
# EXP_NAME = 'TruncatedLogisticRegression1e-1'
EXP_NAME = '10Trials'
STORE_PATH = '/home/gridsan/stefanou/LogisticRegression/' + EXP_NAME
STORE_TABLE_NAME = 'results'

## Default Experiment Parameters

In [19]:
# regression parameters

# procedure hyperparameters
args = Parameters({ 
    'workers': 0, 
    'batch_size': 100,
    'bias': True,
    'num_samples': 1000,
    'samples': 10000, 
    'in_features': 2, 
    'clamp': True, 
    'radius': 5.0, 
    'lr': 1e-1,
    'shuffle': False, 
    'tol': 1e-2,
    'eps': 1e-5,
    'score': True,
    'custom_lr_multiplier': consts.CYCLIC,
    'var': True,
    'momentum': .9, 
    'weight_decay': 5e-4, 
    'trials': 10,
    'lower': -1, 
    'upper': 1,
    'steps': 500,
})

if ch.cuda.is_available(): 
    args.__setattr__('device', 'cuda')
    print("using cuda...")
else: 
    args.__setattr__('device', 'cpu')

using cuda...


# Logistic Distribution

In [20]:
# define logistic distribution
base_distribution = Uniform(0, 1)
transforms_ = [SigmoidTransform().inv]
logistic = TransformedDistribution(base_distribution, transforms_)

In [None]:
# create store
store = Store(STORE_PATH)

store.add_table(STORE_TABLE_NAME, { 
    'delphi_cos': float,
    'mle_cos': float,
    'alpha': float, 
    'C': float,
})

# perform each experiment a total of 10 times
for iter_ in range(args.trials): 
    for C in [-2, -1.75, -1.5, -1.25, -1, -.75, -.5, -.25]:
        args.__setattr__('phi', Left(Tensor([C])))
        
        # generate random uniform weights        
        ground_truth = ch.nn.Linear(in_features=args.in_features, out_features=1, bias=args.bias)
        ground_truth.weight = ch.nn.Parameter(Uniform(args.lower, args.upper).sample(ch.Size([1, args.in_features])))
        if ground_truth.bias is not None: 
            ground_truth.bias = ch.nn.Parameter(Uniform(args.lower, args.upper).sample(ch.Size([1,])))

        # generate data
        X = Uniform(-100, 100).sample(ch.Size([args.samples, args.in_features]))
        z = ground_truth(X) + logistic.sample(ch.Size([args.samples, 1]))
        y = ch.where(z > 0, ch.ones(1), ch.zeros(1))

        # truncate
        indices = args.phi(z).nonzero(as_tuple=False).flatten()
        z_trunc, y_trunc, x_trunc = z[indices], y[indices], X[indices]
        args.__setattr__('alpha', Tensor([y_trunc.size(0) / args.samples]))

        # truncated logistic regression
        trunc_log_reg = truncated_logistic_regression(args.phi, args.alpha, args)
        results = trunc_log_reg.fit(x_trunc.cpu(), y_trunc.cpu())
        w, w0 = results.weight.detach().cpu(), results.bias.detach().cpu()
        
        # naive estimates 
        naive_log_reg = LogisticRegression(penalty='none')
        naive_log_reg.fit(x_trunc.cpu(), y_trunc.cpu().flatten())

        # parameter estimates 
        real_params = ch.cat([ground_truth.weight, ground_truth.bias.unsqueeze(0)], axis=1)
        mle_params = ch.cat([Tensor(naive_log_reg.coef_), Tensor(naive_log_reg.intercept_).unsqueeze(0)], axis=1)
        delphi_params = ch.cat([w, w0.unsqueeze(0)], axis=1)

        store[STORE_TABLE_NAME].append_row({ 
            'delphi_cos': ch.nn.functional.cosine_similarity(delphi_params.cuda(), Tensor(real_params).cuda()),
            'mle_cos': ch.nn.functional.cosine_similarity(mle_params, real_params),
            'alpha': float(args.alpha),
            'C': C,
        })
        
        IPython.display.clear_output(wait=False)

# close current store
store.close()

Logging in: /home/gridsan/stefanou/LogisticRegression/10Trials/c51194f7-9952-4ba9-9c93-1feb7e2a5f29
==> Preparing dataset truncated_logistic_regression..


  0%|          | 0/52 [00:00<?, ?it/s]

# Results

In [15]:
# parameter estimates 
real_params = ch.cat([ground_truth.weight, ground_truth.bias.unsqueeze(0)], axis=1)
mle_params = ch.cat([Tensor(naive_log_reg.coef_), Tensor(naive_log_reg.intercept_).unsqueeze(0)], axis=1)
delphi_params = ch.cat([w, w0.unsqueeze(0)], axis=1)

store[STORE_TABLE_NAME].append_row({ 
    'delphi_cos': ch.nn.functional.cosine_similarity(delphi_params.cuda(), Tensor(real_params).cuda()),
    'mle_cos': ch.nn.functional.cosine_similarity(mle_params, real_params),
    'alpha': float(args.alpha),
    'C': C,
})

In [None]:
reader = CollectionReader(STORE_PATH)
results = reader.df(STORE_TABLE_NAME)
reader.close() # close reader
results.head()

# All Trials

In [None]:
# variance vs param mse
sns.lineplot(data=results, x='C', y='delphi_cos', label='delphi', color='blue')
ax = sns.lineplot(data=results, x='C', y='mle_cos', label='mle', color='red')
ax.set(xlabel='Truncation Parameter C', ylabel='Cosine Similarity with Theta*')
plt.show()

In [17]:
len(results[results['C'] == -.25]['mle_cos'])

60

In [7]:
filtered = Tensor([[[0.],
         [0.],
         [1.],
         [0.],
         [0.],
         [1.],
         [0.],
         [0.],
         [0.],
         [0.]]])

stuff = Tensor([[0, 1, 0, 0, 0, 0, 1, 0, 1, 0]]).eq(Tensor([0, 1, 0, 0, 0, 0, 0, 0, 1, 0]))[...,None]

stuff * filtered

tensor([[[0.],
         [0.],
         [1.],
         [0.],
         [0.],
         [1.],
         [0.],
         [0.],
         [0.],
         [0.]]])

In [8]:
stuff

tensor([[[ True],
         [ True],
         [ True],
         [ True],
         [ True],
         [ True],
         [False],
         [ True],
         [ True],
         [ True]]])

In [12]:
1 - ch.exp(-Tensor([-0.2534]))

tensor([-0.2884])

In [None]:
inner_exp = Tensor([[[-0.2883,  0.5253],
         [-1.5561,  0.0693],
         [ 0.6731,  0.4460],
         [ 0.6190,  0.5776],
         [ 0.8741,  0.7104],
         [ 0.7807, -1.7973],
         [ 0.4094,  0.8161],
         [-0.8624,  0.8666],
         [ 0.9460,  0.8933],
         [ 0.0144,  0.5563]]])


filtered = Tensor([[[0.],
         [0.],
         [1.],
         [0.],
         [0.],
         [1.],
         [0.],
         [0.],
         [0.],
         [0.]]])

stuff = Tensor([[0, 1, 0, 0, 0, 0, 1, 0, 1, 0]]).eq(Tensor([0, 1, 0, 0, 0, 0, 0, 0, 1, 0]))