In [24]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

### about shape

In [4]:
x = torch.tensor(1)
y = torch.tensor([1])
z = torch.tensor([[1]])
print(x.shape)
print(y.shape)
print(z.shape)

torch.Size([])
torch.Size([1])
torch.Size([1, 1])


### torch.cat(tensors, dim=0)
Concatenated tensors must have the same shape. <br>
Tensors whose sizes are [] cannot be concatenated.

In [9]:
x = torch.tensor(1)
y = torch.tensor([1])
z = torch.tensor([[1]])
# print(torch.cat([x, x])) Error
print(torch.cat([y, y]))
print(torch.cat([z, z]))

tensor([1, 1])
tensor([[1],
        [1]])


### squeeze and unsqueeze

You should distinguish between tensors of size [x] and tensors of size [1, x]

In [23]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(10, 64)
        self.fc2 = nn.Linear(64, 4)
    
    def forward(self, x):
        h = F.relu(self.fc1(x))
        out = self.fc2(h)
        return out

model = Network()
x = torch.tensor([1,3,2,4,5,1,2,10,2,3]).float()
y = x.unsqueeze(dim=0)
print(f"x's shape : {x.shape}")
print(f"y's shape : {y.shape}")
x_out = model(x)
y_out = model(y)
print(f"x_out : {x_out}")
print(f"y_out : {y_out}")
print(f"x_out's shape : {x_out.shape}")
print(f"y_out's shape : {y_out.shape}")

x's shape : torch.Size([10])
y's shape : torch.Size([1, 10])
x_out : tensor([ 0.3916,  0.9076,  1.0526, -0.3560], grad_fn=<AddBackward0>)
y_out : tensor([[ 0.3916,  0.9076,  1.0526, -0.3560]], grad_fn=<AddmmBackward0>)
x_out's shape : torch.Size([4])
y_out's shape : torch.Size([1, 4])


Especially you should be careful when comparing model's outputs and their target values.

In [31]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(10, 64)
        self.fc2 = nn.Linear(64, 1)
    
    def forward(self, x):
        h = F.relu(self.fc1(x))
        out = self.fc2(h)
        return out

model = Network()
input_data = torch.randint(low=0, high=100, size=(15, 10)).float()
outputs = model(input_data)
print(outputs)

tensor([[-10.7460],
        [-11.1797],
        [ -1.1707],
        [ -3.2383],
        [ -0.7901],
        [ -4.1679],
        [  0.0161],
        [ -2.6506],
        [ -6.7087],
        [ -9.1433],
        [  8.3897],
        [ -1.1377],
        [ -0.3294],
        [-13.0641],
        [  7.9195]], grad_fn=<AddmmBackward0>)


In [33]:
target_values = torch.randint(low=-30, high=30, size=(15,))
print(outputs.shape)
print(target_values.shape)

torch.Size([15, 1])
torch.Size([15])


In [35]:
incorrect_mse_loss = (outputs - target_values) ** 2.0
print(incorrect_mse_loss.shape)

torch.Size([15, 15])


In [36]:
outputs = outputs.squeeze()
print(outputs.shape == target_values.shape)
correct_mse_loss = (outputs - target_values) ** 2.0
print(correct_mse_loss.shape)

True
torch.Size([15])


 ### torch.max

one dimentinal array case

In [14]:
one_dimentinal = torch.randn(size=(4,))
one_dimentinal

tensor([-0.8259,  0.1532,  0.4869,  0.6050])

In [15]:
print(torch.max(one_dimentinal))
print(torch.argmax(one_dimentinal))

tensor(0.6050)
tensor(3)


multi dimentional array case

In [16]:
multi_dimentinal = torch.randn(size=(4, 4))
multi_dimentinal

tensor([[-0.1024,  1.7617,  0.3214, -0.4511],
        [ 0.6514,  0.0096,  0.6269,  0.4008],
        [-0.1541,  0.4636, -0.1261, -0.9660],
        [-0.9941, -0.3250, -2.4723, -1.1183]])

In [17]:
print(torch.max(multi_dimentinal))
print(torch.argmax(multi_dimentinal))

tensor(1.7617)
tensor(1)


In [20]:
print(torch.max(multi_dimentinal, dim=1))

torch.return_types.max(
values=tensor([ 1.7617,  0.6514,  0.4636, -0.3250]),
indices=tensor([1, 0, 1, 1]))


### torch.gather
torch.gather is convenient when you extract specific action values from the outputs of Q-Network.

In [9]:
class QNetwork(nn.Module):
    def __init__(self):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, 4)
    
    def forward(self, x):
        h = F.relu(self.fc1(x))
        out = self.fc2(h)
        return out

model = QNetwork()
x = torch.randn(size=(10, 128))
out = model(x)
print(out)

tensor([[-0.1670,  0.2223, -0.2218,  0.1082],
        [ 0.3159,  0.0948, -0.0561, -0.2919],
        [ 0.0075,  0.0884,  0.0200, -0.0119],
        [-0.2024,  0.3445,  0.0845, -0.1729],
        [-0.2071,  0.2636, -0.0842, -0.0267],
        [ 0.4055,  0.1637,  0.1946, -0.1843],
        [-0.0638,  0.0570, -0.1194,  0.0264],
        [ 0.1102,  0.1326, -0.0682, -0.3671],
        [-0.2040,  0.4147, -0.2616,  0.0087],
        [ 0.0892, -0.0922, -0.1161,  0.0331]], grad_fn=<AddmmBackward0>)


In [10]:
action_indexs = torch.tensor([0,1,2,0,3,2,3,2,1,0]).unsqueeze(1)
out.gather(1, action_indexs)

tensor([[-0.1670],
        [ 0.0948],
        [ 0.0200],
        [-0.2024],
        [-0.0267],
        [ 0.1946],
        [ 0.0264],
        [-0.0682],
        [ 0.4147],
        [ 0.0892]], grad_fn=<GatherBackward0>)

### scatter_
scatter_ can be used when you create two-hot representations of tensors as R2D2 or MuZero.

In [3]:
x = torch.tensor([5.6, 2.4, 3.7, 4.1, 1.5])

In [6]:
ceil = x.ceil().long().unsqueeze(1)
floor = x.floor().long().unsqueeze(1)
ceil_values = (x.ceil() - x).unsqueeze(1)
floor_values = (x - x.floor()).unsqueeze(1)

In [7]:
y = torch.zeros(size=(5, 8))
y.scatter_(dim=1, index=floor, src=floor_values)
y.scatter_(dim=1, index=ceil, src=ceil_values)

# x = torch.tensor([5.6, 2.4, 3.7, 4.1, 1.5])
print(y)

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6000, 0.4000, 0.0000],
        [0.0000, 0.0000, 0.4000, 0.6000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.7000, 0.3000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.1000, 0.9000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.5000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])


### torch.index_select

In [7]:
x = torch.randn(3, 4)
print("x = ")
print(x)
indices = torch.tensor([0, 2])
print("indices = [0, 2]")
print("dimention 0:")
print(torch.index_select(x, 0, indices))
print("dimention 1:")
print(torch.index_select(x, 1, indices))

x = 
tensor([[-0.8281, -0.9005, -0.0318, -0.1816],
        [-1.2648, -1.5191, -0.8421,  0.8288],
        [ 0.4185, -1.0238,  1.4596, -0.7858]])
indices = [0, 2]
dimention 0:
tensor([[-0.8281, -0.9005, -0.0318, -0.1816],
        [ 0.4185, -1.0238,  1.4596, -0.7858]])
dimention 1:
tensor([[-0.8281, -0.0318],
        [-1.2648, -0.8421],
        [ 0.4185,  1.4596]])


### Softmax and Categorical

In [2]:
class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.fc1 = nn.Linear(5, 32)
        self.fc2 = nn.Linear(32, 4)
    
    def forward(self, x):
        h = self.fc1(x)
        out = self.fc2(h)
        return out

In [3]:
x = torch.randn(size=(10, 5))
x

tensor([[ 6.0372e-01,  8.0985e-01, -1.9476e+00,  2.4229e+00,  1.8329e-01],
        [ 1.3898e+00, -1.4558e-02, -1.2919e+00,  3.1531e-01,  1.4693e+00],
        [ 8.0232e-01, -1.3846e+00,  6.7568e-02, -9.3594e-01, -1.4392e+00],
        [ 9.6074e-01, -1.2411e+00, -4.4081e-05,  4.6432e-01, -4.5036e-01],
        [ 7.9190e-01,  1.5345e-01, -1.1187e+00,  1.4659e+00, -2.1129e+00],
        [ 9.3293e-01,  8.2537e-01,  4.4583e-01,  9.4266e-01,  7.4855e-01],
        [ 5.8167e-01,  1.3324e+00, -2.8116e-01, -5.7951e-01, -1.1179e+00],
        [ 8.5637e-01,  1.2373e+00,  7.8629e-01,  9.4180e-01, -1.0740e+00],
        [ 2.4080e-01,  3.6027e-01, -6.9321e-01,  1.7567e-01,  1.3894e+00],
        [ 3.3829e-01,  5.7435e-01, -1.3709e+00, -1.0826e+00, -1.4085e-01]])

In [4]:
model = Policy()
logits = model(x)
print(logits)

tensor([[ 0.1465,  0.0506,  0.3270,  0.5210],
        [-0.3177, -0.1910,  0.0661,  0.6125],
        [ 0.5380, -0.1261,  0.3877, -0.0265],
        [ 0.3427,  0.1663,  0.2317,  0.2643],
        [ 0.7497,  0.0257,  0.7787,  0.0279],
        [-0.1567,  0.3012, -0.0839,  0.0313],
        [ 0.2256, -0.2465,  0.3482, -0.3967],
        [ 0.2988,  0.3287,  0.2914, -0.4169],
        [-0.2862, -0.0881, -0.2732,  0.3279],
        [ 0.0261, -0.5530,  0.1829,  0.0539]], grad_fn=<AddmmBackward0>)


In [5]:
softmax = F.softmax(logits, dim=1)
print(softmax)

tensor([[0.2193, 0.1992, 0.2626, 0.3189],
        [0.1629, 0.1849, 0.2391, 0.4130],
        [0.3397, 0.1749, 0.2923, 0.1932],
        [0.2734, 0.2292, 0.2447, 0.2528],
        [0.3333, 0.1616, 0.3431, 0.1620],
        [0.2056, 0.3251, 0.2212, 0.2482],
        [0.3039, 0.1895, 0.3435, 0.1631],
        [0.2848, 0.2934, 0.2826, 0.1392],
        [0.1968, 0.2400, 0.1994, 0.3638],
        [0.2661, 0.1491, 0.3112, 0.2736]], grad_fn=<SoftmaxBackward0>)


In [6]:
m = torch.distributions.categorical.Categorical(softmax)
m.sample()

tensor([1, 2, 2, 3, 0, 1, 0, 2, 0, 1])