In [13]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np

# Concepts
Check whether nn.sequential will return a model object i.e. composite or singular, they are same first-class Module citizen.

In [14]:
imh, imw = 8, 12
x = Variable(torch.rand(1, 1, imh, imw))
layer0 = nn.Sequential(
    nn.Conv2d(1,4,3,padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2)
)
layer1 = nn.Sequential(
    nn.Conv2d(4,2,3,padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2)
)
feat = nn.Sequential(layer0, layer1)
y_ = feat(x)
N = np.prod(feat(x).size()[1:])
fc1 = nn.Linear(N, 16)
fc2 = nn.Linear(16, 4)

fconn = nn.Sequential(fc1, fc2)

In [18]:
y = feat(x)
y = y.view(-1, N)
y = fconn(y)

In [19]:
print y

Variable containing:
1.00000e-02 *
  5.7332 -1.3607  5.3090 -5.2932
[torch.FloatTensor of size 1x4]



# Unit Tests

## 1. Policy Net

In [34]:
INPUT_CHANNELS = 1
KSIZE1 = 3
KNUM1  = 32
KSIZE2 = 3
KNUM2  = 64
HIDDEN_UNITS_NUM1 = 256
ACTION_NUM = 4
class PolicyNet(nn.Module):

    def __init__(self, im_width, im_height):
        super(PolicyNet, self).__init__()
        self._conv_layer1 = nn.Sequential(
            nn.Conv2d(in_channels=INPUT_CHANNELS, out_channels=KNUM1, kernel_size=KSIZE1, padding=(KSIZE1-1)/2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self._conv_layer2 = nn.Sequential(
            nn.Conv2d(in_channels=KNUM1, out_channels=KNUM2, kernel_size=KSIZE2, padding=(KSIZE2-1)/2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self._feature = nn.Sequential(self._conv_layer1, self._conv_layer2)
        dummy_input = Variable(torch.rand(1, INPUT_CHANNELS, im_height, im_width))
        dummy_feature = self._feature(dummy_input)
        nfeat = np.prod(dummy_feature.size()[1:])

        self._fc1 = nn.Linear(in_features=nfeat, out_features=HIDDEN_UNITS_NUM1)
        self._fc2 = nn.Linear(in_features=HIDDEN_UNITS_NUM1, out_features=ACTION_NUM)
        self._fullconn = nn.Sequential(self._fc1, self._fc2, nn.Softmax())

        self._num_features = nfeat

    def forward(self, input):
        y = self._feature(input)
        y = y.view(-1, self._num_features)
        y = self._fullconn(y)
        return y

In [35]:
IM_WIDTH, IM_HEIGHT = 80, 80
pn = PolicyNet(im_width=IM_WIDTH, im_height=IM_HEIGHT)

In [36]:
input_image = Variable(torch.randn(1, INPUT_CHANNELS, IM_WIDTH, IM_HEIGHT))
av = pn(input_image)

In [50]:
print "Action probabilities", av, av.sum()
rng = np.random.RandomState(0)
print "Randomly drawn action", rng.choice(4)
v = av.data.numpy().squeeze()
print v, type(v)
print "Randomly drawn action", rng.choice(4, p=v)

Action probabilities Variable containing:
 0.2127  0.2743  0.2390  0.2740
[torch.FloatTensor of size 1x4]
 Variable containing:
 1
[torch.FloatTensor of size 1]

Randomly drawn action 0
[ 0.21272509  0.2742905   0.23895836  0.27402607] <type 'numpy.ndarray'>
Randomly drawn action 2


### 2. numpy concatenate

In [66]:
rng = np.random.RandomState(0)
a = [rng.rand(3,4) for _ in range(2)]
b = np.stack(a)
print b.shape

# what if only one elem?
a = [rng.rand(3,4) for _ in range(1)]
b = np.stack(a)
print b.shape

(2, 3, 4)
(1, 3, 4)


### 3 Compute future reward

The "advantage" at time $t$ should be 
$$
A_t = R_t + \gamma R_{t+1} + \gamma^2 R_{t+2} + \dots
$$
Now, we have a variable $G = R_{t+1} + \gamma R_{t+2} + \dots $, so we do
$$
A_t \leftarrow R_t + \gamma G
$$
Then to carry on to the next step, we have
$$
G \leftarrow A_t
$$



### 4 JSON

In [67]:
import json

In [68]:
a = {'a1':100.5, 'b':"bbb"}
with open('tmp.json', 'w') as f:
    json.dump(a, f)

In [69]:
with open('tmp.json', 'r') as f:
    b=json.load(f)

In [71]:
b['a1']

100.5

### 5 torch NLLLoss

In [98]:
# Modulate predicted probability with advantages
adv = Variable(torch.rand(3))
p = Variable(torch.rand(3,2))

In [102]:
adv1 = adv.unsqueeze(1)
#adv.unqueeze(1).expand(adv.size(0), p.size(1))

In [106]:
adv.unsqueeze(1).expand(adv1.size(0),p.size(1))

Variable containing:
 0.3374  0.3374
 0.7244  0.7244
 0.2928  0.2928
[torch.FloatTensor of size 3x2]

In [86]:
a-a.mean()

RuntimeError: inconsistent tensor size at /Users/soumith/code/builder/wheel/pytorch-src/torch/lib/TH/generic/THTensorMath.c:831