In [24]:
import argparse
import gym
import numpy as np
from itertools import count

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable


seed = 12
gamma = 0.95
render = True
log_interval = 5

env = gym.make('CartPole-v0')
env.seed(seed)
torch.manual_seed(seed)


class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.affine1 = nn.Linear(4, 128)
        self.affine2 = nn.Linear(128, 2)

        self.saved_actions = []
        self.rewards = []

    def forward(self, x):
        x = F.relu(self.affine1(x))
        action_scores = self.affine2(x)
        return F.softmax(action_scores)


policy = Policy()
optimizer = optim.Adam(policy.parameters(), lr=1e-2)


def select_action(state):
    state = torch.from_numpy(state).float().unsqueeze(0)
    probs = policy(Variable(state))
    action = probs.multinomial()
    policy.saved_actions.append(action)
    return action.data


def finish_episode():
    R = 0
    rewards = []
    for r in policy.rewards[::-1]:
        R = r + gamma * R
        rewards.insert(0, R)
    rewards = torch.Tensor(rewards)
    rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
    for action, r in zip(policy.saved_actions, rewards):
        action.reinforce(r)
    optimizer.zero_grad()
    autograd.backward(policy.saved_actions, [None for _ in policy.saved_actions])
    optimizer.step()
    del policy.rewards[:]
    del policy.saved_actions[:]


running_reward = 10

for i_episode in count(1):
    state = env.reset()
    
    for t in range(10000): # Don't infinite loop while learning
        action = select_action(state)
        state, reward, done, _ = env.step(action[0,0])
        if render:
            env.render()
        policy.rewards.append(reward)
        if done:
            break

    running_reward = running_reward * 0.99 + t * 0.01
    finish_episode()
    
    if i_episode % log_interval == 0:
        print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format(
            i_episode, t, running_reward))
    if running_reward > 200:
        print("Solved! Running reward is now {} and "
              "the last episode runs to {} time steps!".format(running_reward, t))
        break

[2017-07-25 11:46:30,955] Making new env: CartPole-v0


Episode 5	Last length:    13	Average length: 10.29
Episode 10	Last length:    16	Average length: 11.17
Episode 15	Last length:    57	Average length: 14.22
Episode 20	Last length:   108	Average length: 17.45
Episode 25	Last length:    66	Average length: 19.96
Episode 30	Last length:   165	Average length: 23.00
Episode 35	Last length:   199	Average length: 29.26
Episode 40	Last length:   199	Average length: 36.82
Episode 45	Last length:   167	Average length: 43.33
Episode 50	Last length:   199	Average length: 50.39


KeyboardInterrupt: 

In [31]:
policy.saved_actions

[Variable containing:
  1
 [torch.LongTensor of size 1x1], Variable containing:
  0
 [torch.LongTensor of size 1x1], Variable containing:
  0
 [torch.LongTensor of size 1x1], Variable containing:
  1
 [torch.LongTensor of size 1x1], Variable containing:
  0
 [torch.LongTensor of size 1x1]]

In [10]:
# Simple Application 
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.affine1 = nn.Linear(4, 128)
        self.affine2 = nn.Linear(128, 2)

        self.saved_actions = []
        self.rewards = []

    def forward(self, x):
        x = F.relu(self.affine1(x))
        action_scores = self.affine2(x)
        return F.softmax(action_scores)

torch.manual_seed(12)
policy = Policy()
state = Variable(torch.randn(1,4),requires_grad=True)
probs = policy(state)
action = probs.multinomial(2, replace=True)
policy.saved_actions.append(action)
action.reinforce(10)
action.backward()
state.grad

TypeError: multinomial() got an unexpected keyword argument 'replace'

In [11]:
probs

Variable containing:
 0.5387  0.4613
[torch.FloatTensor of size 1x2]

In [None]:
autograd.backward(policy.saved_actions, [None for _ in policy.saved_actions])

In [None]:
torch.manual_seed(12)
A = torch.randn(30,20)
#L = A.mm(A.t())
#vals, vecs = torch.eig(L, eigenvectors=True)
vecs, vals, _ = torch.svd(A)
vecs, vals = Variable(vecs, requires_grad=True), Variable(vals, requires_grad=True)
sample = DPP()

for i in range(5):
    subset = sample(vals, vecs)
    sample.saved_actions.append(subset)

for i in range(5):    
    sample.rewards.append(torch.ones(30))
    
for action, r in zip(sample.saved_actions, sample.rewards):
        action.reinforce(r)    

# okay, strictly as in the reinforcement example, we need to abstract KERNEL = POLICY
# DPP = MULTINOMIAL


In [None]:
from dpp_nets.my_torch.linalg import custom_eig
from collections import defaultdict
from dpp_nets.my_torch.utilities import pad_with_zeros
from dpp_nets.my_torch.utilities import orthogonalize
from dpp_nets.my_torch.utilities import omit_slice
from dpp_nets.my_torch.utilities import compute_baseline
from dpp_nets.my_torch.DPP import DPP
from torch.autograd import Variable
from torch.autograd import Function
from torch.autograd import StochasticFunction
from dpp_nets.my_torch.linalg import custom_eig
import torch
import torch.nn as nn

# Set up data
batch_size = 5
max_set_size = 6
feat_dim = 7
target_dim = 3
alpha_iter = 5
hidden_dim = 10
kernel = nn.Linear(feat_dim, hidden_dim)
predictor = nn.Linear(feat_dim, target_dim)

data = torch.zeros(batch_size, max_set_size, feat_dim)
data[0,:4] = torch.randn(4,feat_dim)
data[1,:3] = torch.randn(3,feat_dim)
data[2,:6] = torch.randn(6,feat_dim)
data[3,:4] = torch.randn(4,feat_dim)
data[4,:5] = torch.randn(5,feat_dim)
data = Variable(data)
target = Variable(torch.randn(batch_size, target_dim))
criterion = nn.MSELoss()

# Step 1: Data is here!
data

# Step 2: Selecting Words for each set of words in the batch
samples = make_selections(data)

# Step 2b: Make a mask
mask1 = prepare_mask(samples)

# Step 3: Making a prediction based on the selection of words in batch
pred = make_predictions(data, mask1)

# Step 4: Evaluate predictions (Define global loss_variable + loss_list)
target = target.unsqueeze(1).expand(batch_size, alpha_iter, target_dim)
loss = criterion(pred, target)
loss_list = list(((pred - target)**2).mean(2).data)
loss_list = [list(i.view(-1)) for i in loss_list]
baseline_list = [compute_baseline(i) for i in loss_list]

# Register rewards
for action_list, reward_list in zip(samples, baseline_list):
    for action, reward in zip(action_list, reward_list):
        action.reinforce(reward*torch.ones(action.size(0)))
    #torch.autograd.backward(action_list, [None for _ in action_list])

In [None]:
samples[0][0].sum().backward(None)

In [None]:
def make_selections(batched_data): 
    
    batch_size, max_set_size, feat_dim = batched_data.size()
    mask = batched_data.abs().sum(2).sign().byte()
    length = mask.sum(1).squeeze()

    batch_kernel = kernel(batched_data.masked_select(mask.expand_as(data)).view(-1, feat_dim))
    s = 0
    samples = [[] for i in range(batch_size)]
    
    for i, e in enumerate(length.data):    
        A = batch_kernel[s:e]
        L = A.mm(A.t())
        vals, vecs = custom_eig()(L)

        for j in range(alpha_iter):
            subset = DPP()(vals, vecs)
            samples[i].append(subset)

    return samples

# one dict contains the samples; key = batch_index, value = 
# Work on mask
samples = make_selections(data)
def prepare_mask(samples):
    mask1 = [pad_with_zeros(torch.stack(i),1,max_set_size).data for i in samples]
    return mask1

def make_predictions(batched_data, mask):
    """
    This may be improved by substituting indexing for matrix multiplication
    Also find a better solution for data structures
    """
    batch_size = batched_data.size(0)
    reps = [Variable(mask1[i]).mm(batched_data[i]) for i in range(batch_size)]
    rationales = torch.cat(reps)
    predictions = predictor(rationales).view(batch_size, alpha_iter, target_dim)
    return predictions

In [None]:
# Set up data
batch_size = 5
max_set_size = 6
feat_dim = 7
target_dim = 3
alpha_iter = 5
hidden_dim = 10
alpha_iter = 2
kernel = nn.Linear(feat_dim, hidden_dim)
predictor = nn.Linear(feat_dim, target_dim)

data = torch.zeros(batch_size, max_set_size, feat_dim)
data[0,:4] = torch.randn(4,feat_dim)
data[1,:3] = torch.randn(3,feat_dim)
data[2,:6] = torch.randn(6,feat_dim)
data[3,:4] = torch.randn(4,feat_dim)
data[4,:5] = torch.randn(5,feat_dim)
data = Variable(data)
target = Variable(torch.randn(batch_size, target_dim))
criterion = nn.MSELoss()

# Forward pass
mask = data.abs().sum(2).sign().byte()
length = mask.sum(1).squeeze()
batch_kernel = kernel(data.masked_select(mask.expand_as(data)).view(-1, feat_dim))
#batch_kernel.sum().backward()
s = 0
samples = [[] for i in range(batch_size)]

for i, e in enumerate(length.data):
    
    A = batch_kernel[s:e]
    L = A.mm(A.t())
    e, v = custom_eig()(L)
    
    for j in range(alpha_iter):
        subset = DPP()(e,v)
        sample = pad_with_zeros(subset, 0,max_set_size)
        samples[i].append(sample)
        
samples = [torch.stack(i) for i in samples]
reps = [samples[i].mm(data[i]) for i in range(batch_size)]
big = torch.cat(reps)
predictions = predictor(big).view(batch_size, alpha_iter, target_dim)

target = target.unsqueeze(1).expand(batch_size, alpha_iter, target_dim)
loss = criterion(predictions, target)
loss_list = list(((predictions - target)**2).mean(2).view(-1).data)
loss_list = list(((predictions - target)**2).mean(2).data)
loss_list = [list(i.view(-1)) for i in loss_list]
loss.backward()

In [None]:
def _assess(target, subset):

    set_size = target.size(0)
    target = target.expand(set_size, set_size)
    target_mat = (target == target.t()).float()
    target_sums = target_mat.sum(1)

    subset_mat = subset.expand_as(target_mat).float()

    loss = ((target_mat * subset_mat).sum(1) - torch.ones(set_size)).abs()
    loss.div_(target_sums)
    loss = loss.sum()**2

    return loss

In [None]:
subset = (0.5*torch.ones(20)).bernoulli()
target = (torch.ones(5)).multinomial(20, replacement=True)
print(_assess(target, subset))
print(torch.cat([subset.double().view(-1,1), target.double().view(-1,1)],dim=1))

In [None]:
target = target.expand(set_size, set_size)

In [None]:
target

In [None]:
a.evaluate(1000)

In [7]:
import torch
from dpp_nets.my_torch.simulator import SimKDPP

network_params = {'set_size': 200, 'n_clusters': 10}
dtype = torch.DoubleTensor
torch.manual_seed(10)
a = SimKDPP(network_params, dtype)
torch.manual_seed(10)
a.evaluate(1000)
torch.manual_seed(10)
a.random_benchmark(1000)

Average Subset Size:  29.14799999999998
Subset Variance:  7.520096000000017
Average Loss 374.16599999999994
n_missed share 0.0
n_one share 0.14000000000000012
n_many share 0.86
Average Subset Size:  10.092999999999996
Subset Variance:  9.992351000000008
Average Loss 58.49699999999999
n_missed share 0.3608
n_one share 0.3764
n_many share 0.2628


In [16]:
torch.manual_seed(14)
a.train(1000,10,1e-3,4)
torch.manual_seed(10)
a.evaluate(1000)

Loss at it  100  is:  160.6249999999997
Loss at it  200  is:  182.37499999999966
Loss at it  300  is:  178.5249999999998
Loss at it  400  is:  178.04999999999967
Loss at it  500  is:  162.44999999999976
Loss at it  600  is:  174.07499999999976
Loss at it  700  is:  163.34999999999985
Loss at it  800  is:  165.5499999999997
Loss at it  900  is:  178.5999999999998
Loss at it  1000  is:  174.47499999999974
Average Subset Size:  22.59799999999999
Subset Variance:  6.354396000000003
Average Loss 165.16
n_missed share 0.00019999999999999996
n_one share 0.27230000000000004
n_many share 0.7274999999999999


In [None]:
A = torch.randn(2,2)

In [12]:
from torch.autograd import StochasticFunction

class STO(StochasticFunction):
    
    def forward(self, x):
        return x
    
    def backward(self, grad_output):
        return grad_output

In [30]:
x = Variable(torch.randn(2), requires_grad=True)
y = STO()(x)
y.reinforce(5*torch.ones(2))
torch.autograd.backward([y], [None])
print(x.grad.data)

RuntimeError: element 46246104 of gradients tuple is None, but the corresponding Variable requires grad

In [36]:
A = torch.randn(2,2)
print(A)


-1.4060  1.1086
-0.6354 -0.3295
[torch.FloatTensor of size 2x2]



In [37]:
A.mul_(2*torch.ones(2,2))


-2.8120  2.2171
-1.2709 -0.6590
[torch.FloatTensor of size 2x2]