In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

### about shape

In [2]:
x = torch.tensor(1)
y = torch.tensor([1])
z = torch.tensor([[1]])
print(x.shape)
print(y.shape)
print(z.shape)

torch.Size([])
torch.Size([1])
torch.Size([1, 1])


### torch.cat(tensors, dim=0)
Concatenated tensors must have the same shape. <br>
Tensors whose sizes are [] cannot be concatenated.

In [3]:
x = torch.tensor(1)
y = torch.tensor([1])
z = torch.tensor([[1]])
# print(torch.cat([x, x])) Error
print(torch.cat([y, y]))
print(torch.cat([z, z]))

tensor([1, 1])
tensor([[1],
        [1]])


### squeeze and unsqueeze

You should distinguish between tensors of size [x] and tensors of size [1, x]

In [4]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(10, 64)
        self.fc2 = nn.Linear(64, 4)
    
    def forward(self, x):
        h = F.relu(self.fc1(x))
        out = self.fc2(h)
        return out

model = Network()
x = torch.tensor([1,3,2,4,5,1,2,10,2,3]).float()
y = x.unsqueeze(dim=0)
print(f"x's shape : {x.shape}")
print(f"y's shape : {y.shape}")
x_out = model(x)
y_out = model(y)
print(f"x_out : {x_out}")
print(f"y_out : {y_out}")
print(f"x_out's shape : {x_out.shape}")
print(f"y_out's shape : {y_out.shape}")

x's shape : torch.Size([10])
y's shape : torch.Size([1, 10])
x_out : tensor([ 2.8576, -1.1859, -0.6926, -0.4234], grad_fn=<AddBackward0>)
y_out : tensor([[ 2.8576, -1.1859, -0.6926, -0.4234]], grad_fn=<AddmmBackward0>)
x_out's shape : torch.Size([4])
y_out's shape : torch.Size([1, 4])


Especially you should be careful when comparing model's outputs and their target values.

In [5]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(10, 64)
        self.fc2 = nn.Linear(64, 1)
    
    def forward(self, x):
        h = F.relu(self.fc1(x))
        out = self.fc2(h)
        return out

model = Network()
input_data = torch.randint(low=0, high=100, size=(15, 10)).float()
outputs = model(input_data)
print(outputs)

tensor([[  0.9639],
        [ -9.2171],
        [ -9.1887],
        [  3.0913],
        [-10.7156],
        [  4.7492],
        [ -1.5500],
        [ -7.6851],
        [ -6.7137],
        [ -5.3190],
        [-10.1170],
        [ -3.2073],
        [-12.6269],
        [ -7.7646],
        [  5.4059]], grad_fn=<AddmmBackward0>)


In [6]:
target_values = torch.randint(low=-30, high=30, size=(15,))
print(outputs.shape)
print(target_values.shape)

torch.Size([15, 1])
torch.Size([15])


In [7]:
incorrect_mse_loss = (outputs - target_values) ** 2.0
print(incorrect_mse_loss.shape)

torch.Size([15, 15])


In [8]:
outputs = outputs.squeeze()
print(outputs.shape == target_values.shape)
correct_mse_loss = (outputs - target_values) ** 2.0
print(correct_mse_loss.shape)

True
torch.Size([15])


 ### torch.max

one dimentinal array case

In [9]:
one_dimentinal = torch.randn(size=(4,))
one_dimentinal

tensor([-0.5484, -0.8904,  2.0223,  0.9601])

In [10]:
print(torch.max(one_dimentinal))
print(torch.argmax(one_dimentinal))

tensor(2.0223)
tensor(2)


multi dimentional array case

In [11]:
multi_dimentinal = torch.randn(size=(4, 4))
multi_dimentinal

tensor([[ 0.9755,  0.3382,  0.4434, -0.8260],
        [-0.3339, -0.2542,  1.7370, -0.3511],
        [ 0.6496,  1.3269,  1.4022,  1.4153],
        [-1.9361, -0.6137, -0.0198,  1.3351]])

In [12]:
print(torch.max(multi_dimentinal))
print(torch.argmax(multi_dimentinal))

tensor(1.7370)
tensor(6)


In [13]:
print(torch.max(multi_dimentinal, dim=1))

torch.return_types.max(
values=tensor([0.9755, 1.7370, 1.4153, 1.3351]),
indices=tensor([0, 2, 3, 3]))


### torch.gather
torch.gather is convenient when you extract specific action values from the outputs of Q-Network.

In [14]:
class QNetwork(nn.Module):
    def __init__(self):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, 4)
    
    def forward(self, x):
        h = F.relu(self.fc1(x))
        out = self.fc2(h)
        return out

model = QNetwork()
x = torch.randn(size=(10, 128))
out = model(x)
print(out)

tensor([[-0.0962,  0.2107, -0.0853, -0.1119],
        [-0.0509, -0.1319, -0.0826, -0.1039],
        [-0.2024, -0.2447, -0.4339, -0.2535],
        [-0.1886, -0.3505, -0.0699, -0.4098],
        [-0.2471, -0.1916, -0.1524, -0.4220],
        [-0.1830, -0.0300,  0.1245, -0.2025],
        [-0.5126, -0.0266, -0.1706, -0.0860],
        [ 0.1305, -0.3697, -0.2185, -0.5217],
        [-0.0516, -0.2895, -0.0609, -0.3497],
        [-0.0044, -0.2623, -0.3063, -0.6574]], grad_fn=<AddmmBackward0>)


In [15]:
action_indexs = torch.tensor([0,1,2,0,3,2,3,2,1,0]).unsqueeze(1)
out.gather(1, action_indexs)

tensor([[-0.0962],
        [-0.1319],
        [-0.4339],
        [-0.1886],
        [-0.4220],
        [ 0.1245],
        [-0.0860],
        [-0.2185],
        [-0.2895],
        [-0.0044]], grad_fn=<GatherBackward0>)

### scatter_
scatter_ can be used when you create two-hot representations of tensors as R2D2 or MuZero.

In [16]:
x = torch.tensor([5.6, 2.4, 3.7, 4.1, 1.5])

In [17]:
ceil = x.ceil().long().unsqueeze(1)
floor = x.floor().long().unsqueeze(1)
ceil_values = (x.ceil() - x).unsqueeze(1)
floor_values = (x - x.floor()).unsqueeze(1)

In [18]:
y = torch.zeros(size=(5, 8))
y.scatter_(dim=1, index=floor, src=floor_values)
y.scatter_(dim=1, index=ceil, src=ceil_values)

# x = torch.tensor([5.6, 2.4, 3.7, 4.1, 1.5])
print(y)

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6000, 0.4000, 0.0000],
        [0.0000, 0.0000, 0.4000, 0.6000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.7000, 0.3000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.1000, 0.9000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.5000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])


### torch.index_select

In [19]:
x = torch.randn(3, 4)
print("x = ")
print(x)
indices = torch.tensor([0, 2])
print("indices = [0, 2]")
print("dimention 0:")
print(torch.index_select(x, 0, indices))
print("dimention 1:")
print(torch.index_select(x, 1, indices))

x = 
tensor([[ 1.5279,  0.4968,  0.8722, -0.1914],
        [ 0.6810,  0.9399,  0.1952,  0.8909],
        [-0.7884,  1.4546, -0.5774,  0.9142]])
indices = [0, 2]
dimention 0:
tensor([[ 1.5279,  0.4968,  0.8722, -0.1914],
        [-0.7884,  1.4546, -0.5774,  0.9142]])
dimention 1:
tensor([[ 1.5279,  0.8722],
        [ 0.6810,  0.1952],
        [-0.7884, -0.5774]])


### Softmax and Categorical

In [20]:
class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.fc1 = nn.Linear(5, 32)
        self.fc2 = nn.Linear(32, 4)
    
    def forward(self, x):
        h = self.fc1(x)
        out = self.fc2(h)
        return out

In [21]:
x = torch.randn(size=(10, 5))
x

tensor([[-1.2683, -1.1006,  0.1326, -0.4530, -0.7892],
        [ 0.6995, -0.4313,  1.3323, -0.3492, -2.7021],
        [ 0.5192,  0.0177,  1.1358, -0.3235,  0.3427],
        [ 0.5555, -0.5110,  1.8292,  1.5230,  1.4374],
        [ 0.1958,  2.5710,  1.6723, -0.1605,  1.6718],
        [-0.0480, -1.1800, -0.3729,  1.3253,  0.4322],
        [ 1.3606, -0.4978, -0.3857,  0.4112,  0.2754],
        [-1.0308, -0.3386,  0.2789, -0.3737,  1.0994],
        [ 0.4798,  2.0738,  0.1857,  1.4852,  0.4030],
        [ 0.6676, -2.4768,  0.3229,  0.9359,  0.7048]])

In [22]:
model = Policy()
logits = model(x)
print(logits)

tensor([[ 0.3014, -0.2413, -0.0759, -0.1829],
        [ 0.6713, -0.0614,  0.3755, -0.4795],
        [-0.4424, -0.2120,  0.0260, -0.1894],
        [-0.5145, -0.1500, -0.5337, -0.0333],
        [-1.1807, -0.2945,  0.0552, -0.3313],
        [ 0.0864, -0.1423, -0.2924,  0.1772],
        [-0.2198, -0.1294,  0.1410,  0.1377],
        [-0.4826, -0.3182, -0.2228, -0.0543],
        [-0.3224, -0.1292,  0.1182, -0.1112],
        [-0.0462, -0.1348, -0.4220,  0.2235]], grad_fn=<AddmmBackward0>)


In [23]:
softmax = F.softmax(logits, dim=1)
print(softmax)

tensor([[0.3469, 0.2016, 0.2378, 0.2137],
        [0.3936, 0.1891, 0.2928, 0.1245],
        [0.1944, 0.2448, 0.3105, 0.2503],
        [0.1985, 0.2857, 0.1947, 0.3211],
        [0.1086, 0.2635, 0.3738, 0.2540],
        [0.2797, 0.2225, 0.1915, 0.3063],
        [0.2017, 0.2207, 0.2893, 0.2883],
        [0.1996, 0.2353, 0.2588, 0.3063],
        [0.1999, 0.2425, 0.3106, 0.2469],
        [0.2557, 0.2340, 0.1756, 0.3348]], grad_fn=<SoftmaxBackward0>)


In [24]:
m = torch.distributions.categorical.Categorical(softmax)
m.sample()

tensor([1, 2, 0, 3, 3, 3, 1, 3, 2, 3])