<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"></ul></div>

In [None]:
from esper.prelude import *
import torch
import torch.nn as nn
import pyro
import pyro.distributions as dist
import pyro.optim as optim
import pyro.infer as infer
from torch.utils.data import DataLoader
from transcript_utils import *
from timeit import default_timer as now
from custom_mlp import MLP, Exp
import itertools

In [None]:
mi_dict = {ngram: score for [ngram, score] in mutual_info('immigration')}
mi_priors = torch.tensor([mi_dict[ngram] if ngram in mi_dict else 0 for ngram in vocabulary])

In [None]:
for k in ['immigration', 'border', 'healthcare']:
    print('{} {:.4f}'.format(k, mi_priors[vocabulary.index(k)].item()))

In [None]:
# compute_vectors(video_list(), vocabulary, SEGMENT_SIZE, SEGMENT_STRIDE)

In [None]:
class RegressionModel(nn.Module):
    def __init__(self, p):
        # p = number of features
        super(RegressionModel, self).__init__()
        self.linear = nn.Linear(p, 1)
        self.sigmoid = nn.Sigmoid()
        self.softplus = nn.Softplus()
        self.p = p

    def forward(self, x):
        return self.sigmoid(self.linear(x))
    
    def model(self, x, y):
        # Create unit normal priors over the parameters
        loc, scale = torch.zeros(self.p), torch.ones(self.p) * 10
        bias_loc, bias_scale = torch.zeros(1), torch.ones(1) * 10
        w_prior = dist.Normal(loc, scale).independent(1)
        b_prior = dist.Normal(bias_loc, bias_scale).independent(1)
        priors = {'linear.weight': w_prior, 'linear.bias': b_prior}
        # lift module parameters to random variables sampled from the priors
        lifted_module = pyro.random_module("module", self, priors)
        # sample a regressor (which also samples w and b)
        lifted_reg_model = lifted_module()
        with pyro.iarange("map", x.shape[0]):
            # run the regressor forward conditioned on data
            prediction_mean = lifted_reg_model(x).squeeze(-1)
            # condition on the observed data
            pyro.sample("obs", dist.Bernoulli(prediction_mean), obs=y)                                                

    def guide(self, x, y):
        # define our variational parameters
        w_loc = torch.tensor(mi_priors)
        # note that we initialize our scales to be pretty narrow
        w_log_sig = torch.tensor(-3.0 * torch.ones(1, self.p) + 0.05 * torch.randn(1, self.p))
        b_loc = torch.tensor(0.5) + 0.05 * torch.randn(1)
        b_log_sig = torch.tensor(-3.0 * torch.ones(1) + 0.05 * torch.randn(1))
        # register learnable params in the param store
        mw_param = pyro.param("guide_mean_weight", w_loc)
        sw_param = self.softplus(pyro.param("guide_log_scale_weight", w_log_sig))
        mb_param = pyro.param("guide_mean_bias", b_loc)
        sb_param = self.softplus(pyro.param("guide_log_scale_bias", b_log_sig))
        # guide distributions for w and b
        w_dist = dist.Normal(mw_param, sw_param).independent(1)
        b_dist = dist.Normal(mb_param, sb_param).independent(1)
        dists = {'linear.weight': w_dist, 'linear.bias': b_dist}
        # overload the parameters in the module with random samples
        # from the guide distributions
        lifted_module = pyro.random_module("module", self, dists)
        # sample a regressor (which also samples w and b)
        return lifted_module()  

In [None]:
unsup_dataset = SegmentVectorDataset(video_list(), vocab_size=vocab_size, inmemory=True)
sup_dataset = LabeledSegmentDataset(unsup_dataset, pcache.get('labeled_segments'), categories=2)
active_sup_dataset = LabeledSegmentDataset(unsup_dataset, pcache.get('active_labels'), categories=2)
loader_params = {'shuffle': True}
unsup_loader = DataLoader(unsup_dataset, batch_size=100, **loader_params)
sup_loader = DataLoader(sup_dataset, batch_size=100, **loader_params)

In [None]:
x, _ = unsup_dataset[0]
m = model_gen()
infer.EmpiricalMarginal(infer.Importance(model_gen(), num_samples=100).run(x))

In [None]:
def get_accuracy(model_gen, x, y, iters=100):
    samples = []
    for _ in range(iters):
        model = model_gen()
        y_pred = model(x).squeeze(-1).round()
        fp = torch.sum((y_pred != y) & (y_pred == 1)).item()
        fn =  torch.sum((y_pred != y) & (y_pred == 0)).item()
        acc = torch.sum(y_pred == y).item()
        n = float(y_pred.shape[0])
        samples.append(torch.tensor([acc/n, fp/n, fn/n]))
    return torch.mean(torch.stack(samples), dim=0).tolist(), torch.std(torch.stack(samples), dim=0).tolist(), 

In [None]:
def get_labels(dataset):
    x_data, y_data, _ = unzip(list(dataset))
    y_data = torch.tensor([y[1] for y in y_data])
    x_data = torch.stack(x_data)
    
    split = int(len(x_data) / 2)

    (train_x, val_x) = (x_data[:split], x_data[split:])
    (train_y, val_y) = (y_data[:split], y_data[split:])
    
    return train_x, val_x, train_y, val_y

train_x, val_x, train_y, val_y = get_labels(sup_dataset)

active_train_x, active_val_x, active_train_y, active_val_y = get_labels(active_sup_dataset)
train_x = torch.cat((train_x, active_train_x))
train_y = torch.cat((train_y, active_train_y))
val_x = torch.cat((val_x, active_val_x))
val_y = torch.cat((val_y, active_val_y))

In [None]:
mi_priors_raw = torch.tensor([mi_dict[ngram] if ngram in mi_dict else 0 for ngram in vocabulary])
def baseline_model(x):
    return torch.mm(x, mi_priors_raw.unsqueeze(0).t()).squeeze()
acc = get_accuracy(lambda: baseline_model,
             torch.cat((train_x, val_x)), torch.cat((train_y, val_y)), iters=2)[0][0]

print('Baseline accuracy: {:.4f}'.format(acc))

In [None]:
model_name = 'regression_active'
regression_model = RegressionModel(vocab_size)

def torch_trainer():
    loss_fn = nn.MSELoss(size_average=False)
    optim = torch.optim.Adam(regression_model.parameters(), lr=0.05)
    
    def train(x, y):
        # run the model forward on the data
        y_pred = regression_model(x).squeeze(-1)
        # calculate the mse loss
        loss = loss_fn(y_pred, y)
        # initialize gradients to zero
        optim.zero_grad()
        # backpropagate
        loss.backward()
        # take a gradient step
        optim.step()
        return loss.item()
        
    return train, lambda: regression_model

def pyro_trainer():
    pyro.clear_param_store()
    opt = optim.Adam({"lr": 0.01})
    svi = infer.SVI(regression_model.model, regression_model.guide, opt, loss=infer.Trace_ELBO())
    
    def train(x, y):
        return svi.step(x, y)
    
    return train, lambda: regression_model.guide(None, None)
        
num_iterations = 100
train, model_gen = pyro_trainer()

for epoch in range(num_iterations):
    loss = train(train_x, train_y)
    if epoch % 5 == 0:
        [tacc, tfp, tfn], _ = get_accuracy(model_gen, train_x, train_y)
        [vacc, vfp, vfn], [vaccstd, vfpstd, vfnstd] = get_accuracy(model_gen, val_x, val_y)
        print("[iteration %04d] loss: %.0f, train: acc %.3f, val: acc %.3f (+/- %.3f) fp %.3f (+/- %.3f) fn %.3f (+/ %.3f)" % 
              (epoch, loss, tacc, vacc, vaccstd, vfp, vfpstd, vfn, vfnstd))
        pyro.get_param_store().save(
            '/app/data/models/transcript_{}_weights_epoch{:05d}.pt'.format(model_name, epoch))

In [None]:
def eval_model(path):
    pyro.get_param_store().load(path)
    old_model = RegressionModel(vocab_size)
    return get_accuracy((lambda: old_model.guide(None, None)), val_x, val_y, iters=1000)

In [None]:
eval_model('/app/data/models/transcript_regression_weights_epoch00080.pt')

In [None]:
eval_model('/app/data/models/transcript_regression_active_weights_epoch00060.pt')

In [None]:
def model_uncertainty(model_gen, x, iters=5):    
    ys_pred = []
    for _ in range(iters):
        model = model_gen()
        ys_pred.append(model(x).squeeze(-1).round())
    return torch.stack(ys_pred).std(dim=0)

all_std = []
all_idx = []
for x, i in tqdm(unsup_loader):
    all_std.append(model_uncertainty(model_gen, x))
    all_idx.append(i)
    
all_std = torch.cat(all_std)
all_idx = torch.cat(all_idx)

In [None]:
top_std, top_idx = all_std.topk(1000)
top_idx = top_idx.tolist()
random.shuffle(top_idx)

In [None]:
text_dataset = SegmentTextDataset(video_list())

In [None]:
labels = label_widget(text_dataset, [sup_dataset[0][2]])

In [None]:
pcache.set('active_labels', labels)

In [None]:
data = regression_model.guide(None, None)(x_data).squeeze(-1)
print(data)
print(data.round())

In [None]:
list(regression_model.named_parameters())

In [None]:
# Inspect learned parameters
print("Learned parameters:")
for name, param in regression_model.named_parameters():
    if name == 'linear.weight':
        weights = param.data.numpy().squeeze()
        idx = np.argsort(weights)[::-1]
        print(weights[idx])
        print(np.array(vocabulary)[idx][:100])