In [24]:
import argparse
import gym
import numpy as np
from itertools import count

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable


seed = 12
gamma = 0.95
render = True
log_interval = 5

env = gym.make('CartPole-v0')
env.seed(seed)
torch.manual_seed(seed)


class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.affine1 = nn.Linear(4, 128)
        self.affine2 = nn.Linear(128, 2)

        self.saved_actions = []
        self.rewards = []

    def forward(self, x):
        x = F.relu(self.affine1(x))
        action_scores = self.affine2(x)
        return F.softmax(action_scores)


policy = Policy()
optimizer = optim.Adam(policy.parameters(), lr=1e-2)


def select_action(state):
    state = torch.from_numpy(state).float().unsqueeze(0)
    probs = policy(Variable(state))
    action = probs.multinomial()
    policy.saved_actions.append(action)
    return action.data


def finish_episode():
    R = 0
    rewards = []
    for r in policy.rewards[::-1]:
        R = r + gamma * R
        rewards.insert(0, R)
    rewards = torch.Tensor(rewards)
    rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
    for action, r in zip(policy.saved_actions, rewards):
        action.reinforce(r)
    optimizer.zero_grad()
    autograd.backward(policy.saved_actions, [None for _ in policy.saved_actions])
    optimizer.step()
    del policy.rewards[:]
    del policy.saved_actions[:]


running_reward = 10

for i_episode in count(1):
    state = env.reset()
    
    for t in range(10000): # Don't infinite loop while learning
        action = select_action(state)
        state, reward, done, _ = env.step(action[0,0])
        if render:
            env.render()
        policy.rewards.append(reward)
        if done:
            break

    running_reward = running_reward * 0.99 + t * 0.01
    finish_episode()
    
    if i_episode % log_interval == 0:
        print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format(
            i_episode, t, running_reward))
    if running_reward > 200:
        print("Solved! Running reward is now {} and "
              "the last episode runs to {} time steps!".format(running_reward, t))
        break

[2017-07-25 11:46:30,955] Making new env: CartPole-v0


Episode 5	Last length:    13	Average length: 10.29
Episode 10	Last length:    16	Average length: 11.17
Episode 15	Last length:    57	Average length: 14.22
Episode 20	Last length:   108	Average length: 17.45
Episode 25	Last length:    66	Average length: 19.96
Episode 30	Last length:   165	Average length: 23.00
Episode 35	Last length:   199	Average length: 29.26
Episode 40	Last length:   199	Average length: 36.82
Episode 45	Last length:   167	Average length: 43.33
Episode 50	Last length:   199	Average length: 50.39


KeyboardInterrupt: 

In [31]:
policy.saved_actions

[Variable containing:
  1
 [torch.LongTensor of size 1x1], Variable containing:
  0
 [torch.LongTensor of size 1x1], Variable containing:
  0
 [torch.LongTensor of size 1x1], Variable containing:
  1
 [torch.LongTensor of size 1x1], Variable containing:
  0
 [torch.LongTensor of size 1x1]]

In [10]:
# Simple Application 
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.affine1 = nn.Linear(4, 128)
        self.affine2 = nn.Linear(128, 2)

        self.saved_actions = []
        self.rewards = []

    def forward(self, x):
        x = F.relu(self.affine1(x))
        action_scores = self.affine2(x)
        return F.softmax(action_scores)

torch.manual_seed(12)
policy = Policy()
state = Variable(torch.randn(1,4),requires_grad=True)
probs = policy(state)
action = probs.multinomial(2, replace=True)
policy.saved_actions.append(action)
action.reinforce(10)
action.backward()
state.grad

TypeError: multinomial() got an unexpected keyword argument 'replace'

In [None]:
def gen_matrix_from_cluster_ix(cluster_ix):
	if not isinstance(cluster_ix, np.ndarray):
		cluster_ix = cluster_ix.numpy()
	set_size = cluster_ix.shape[0]
	matrix = np.tile(cluster_ix, (set_size,1))
	matrix = matrix - matrix.T
	matrix = ~matrix.astype(bool)
	return matrix

def plot_matrix(matrix):
	if not isinstance(matrix, np.ndarray):
		matrix = matrix.numpy()

	plt.matshow(matrix, interpolation='nearest')
	plt.show()

def plot_embd(embd):
	"""
	Plots a colormap of the L-kernel given embd,
	such that L = embd * embd.T
	Arguments:
	- embd: numpy array or torch tensor
	"""
	if not isinstance(embd, np.ndarray):
		embd = embd.numpy()
	
	L = embd.dot(embd.T)
	plot_matrix(L)

In [4]:
import torch
from torch.autograd import Variable
A = Variable(torch.randn(2,2), requires_grad=True)
B = Variable(torch.randn(2,2), requires_grad=True)
C = Variable(torch.randn(2,2), requires_grad=True)
my_l = [A, B, C]
loss = 0
for var in my_l:
    lo = torch.sin(var)
    loss += lo
loss.sum().backward()
print(A.grad)
print(B.grad)

Variable containing:
-0.2218  0.8815
-0.0302  0.6539
[torch.FloatTensor of size 2x2]

Variable containing:
-0.0219  0.7302
 0.8764  0.9063
[torch.FloatTensor of size 2x2]



In [49]:
from dpp_nets.dpp import score_dpp
import numpy as np
# Gradient Exploration
set_size = 5
kernel_dim = 10
K = torch.randn(set_size, kernel_dim)
L = K.mm(K.t())
subset1 = torch.ByteTensor([1, 1, 1, 0, 0])
subset2 = torch.ByteTensor([1, 1, 0, 0, 0])
# Gradient
embd = K.numpy()
subset1 = subset1.numpy()
subset2 = subset2.numpy()
print('Grad1 :', np.sign(score_dpp(embd, subset1)))
print('Grad2 :',np.sign(score_dpp(embd, subset2)))
print('Grad Agreement', np.sign(score_dpp(embd, subset1)) == np.sign(score_dpp(embd, subset2)))

Grad1 : [[ 1. -1.  1.  1. -1.  1.  1. -1. -1.  1.]
 [ 1.  1. -1. -1.  1. -1. -1.  1. -1. -1.]
 [-1.  1. -1. -1. -1. -1.  1.  1. -1.  1.]
 [ 1.  1.  1.  1.  1. -1. -1. -1. -1. -1.]
 [-1.  1. -1. -1.  1. -1. -1.  1.  1. -1.]]
Grad2 : [[-1. -1.  1.  1. -1.  1. -1. -1.  1. -1.]
 [-1.  1.  1. -1.  1. -1. -1.  1. -1. -1.]
 [-1. -1. -1. -1. -1. -1. -1.  1.  1. -1.]
 [ 1.  1.  1.  1.  1. -1. -1. -1. -1. -1.]
 [-1.  1. -1. -1.  1. -1. -1.  1.  1. -1.]]
Grad Agreement [[False  True  True  True  True  True False  True False False]
 [False  True False  True  True  True  True  True  True  True]
 [ True False  True  True  True  True False  True False False]
 [ True  True  True  True  True  True  True  True  True  True]
 [ True  True  True  True  True  True  True  True  True  True]]


In [26]:
K


-0.0038  0.2483 -1.4515 -1.5514  0.6025 -0.7342  0.0642 -0.4024  0.5271 -0.6581
 0.5920 -0.4561 -0.0731 -0.8933 -1.2156  1.9095  2.0881  1.5159 -0.1664  0.5997
-0.1518 -0.9442  0.0375  0.9015 -2.3705 -0.0967  1.5697  2.3204 -0.1346  0.0100
-0.7355 -1.9643  0.1382  0.6559  0.4406 -1.2344  1.2915  0.4911 -0.5186  1.2513
-1.4058 -1.1972 -0.9700 -1.5969 -0.0701 -0.0360  1.7563 -0.3487 -0.9293 -0.0764
[torch.FloatTensor of size 5x10]

In [9]:
score_dpp

<function dpp_nets.dpp.score_dpp.score_dpp>

In [55]:
import numpy as np
import matplotlib.pyplot as plt

#pgf.rcfonts : False
    
# set up figure size
plt.figure(figsize=(2, 2))

# do some plotting here
x = np.linspace(-2, 2, 1e3)
plt.plot(x, x**2)

# save to file
plt.savefig('example.pdf')
plt.savefig('example.pgf')

  # Remove the CWD from sys.path while we load stuff.


In [54]:
plt.rcParams

RcParams({'_internal.classic_mode': False,
          'agg.path.chunksize': 0,
          'animation.avconv_args': [],
          'animation.avconv_path': 'avconv',
          'animation.bitrate': -1,
          'animation.codec': 'h264',
          'animation.convert_args': [],
          'animation.convert_path': 'convert',
          'animation.ffmpeg_args': [],
          'animation.ffmpeg_path': 'ffmpeg',
          'animation.frame_format': 'png',
          'animation.html': 'none',
          'animation.mencoder_args': [],
          'animation.mencoder_path': 'mencoder',
          'animation.writer': 'ffmpeg',
          'axes.autolimit_mode': 'data',
          'axes.axisbelow': 'line',
          'axes.edgecolor': 'k',
          'axes.facecolor': 'w',
          'axes.formatter.limits': [-7, 7],
          'axes.formatter.offset_threshold': 4,
          'axes.formatter.use_locale': False,
          'axes.formatter.use_mathtext': False,
          'axes.formatter.useoffset': True,
          'ax