In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

In [44]:
class Net(nn.Module):
    def __init__(self, w, l):
        super(Net, self).__init__()
        self.w = w
        self.l = l
        self.fc_layers = nn.ModuleList()
        for item in self.w:
            s1, s2, d, b = item
            fc = nn.Linear(s1, s2)
            fc.weight.data = d
            #fc.bias.data = b
            self.fc_layers.append(fc)

    def forward(self, x):
        self.print_w()
        for i in range(len(self.fc_layers)-2):
            x = F.relu(self.fc_layers[i](x))
        a = F.softmax(F.relu(self.fc_layers[-2](x)), dim=-1)
        v = F.relu(self.fc_layers[-1](x))
        return a, v

    def print_w(self):
        for item in self.w:
            s1, s2, d, b = item
            print(s1, s2, np.shape(d), np.shape(b))
        print()
        for i, l in enumerate(self.fc_layers):
            d = l.weight.data.detach().numpy()
            print("fc"+str(i)+" weights: ", d.shape)
            b = l.bias.data.detach().numpy()
            print("fc"+str(i)+" biases: ", b.shape)
        print()
        
    def get_w(self):
        w = []
        for fc in self.fc_layers:
            d = fc.weight.data.detach().numpy()
            d = list(np.ravel(d))
            w.extend(d)
            b = fc.bias.data.detach().numpy()
            b = list(np.ravel(b))
            w.extend(b)
        return w

    def set_w(self, w):
        self.w = w
        for i, item in enumerate(self.w):
            s1, s2, d, b = item
            self.fc_layers[i].weight.data = d
            self.fc_layers[i].bias.data = b

In [45]:
hidden_size = [32,64,32]
action_size = 10
state_size = 20
genome_size = 0
genome_size += state_size*hidden_size[0]
genome_size += hidden_size[0]
if len(hidden_size) > 1:
    for i in range(len(hidden_size)):
        if i+1 < len(hidden_size):
            genome_size += hidden_size[i]*hidden_size[i+1]
            bl = max(hidden_size[i], hidden_size[i+1])
            genome_size += bl
genome_size += action_size*hidden_size[-1]
genome_size += hidden_size[-1]
genome_size += hidden_size[-1]
genome_size += 1
print(genome_size)
genome = np.random.uniform(-1, 1, genome_size)

5281


In [46]:
weights = []
m1 = 0
m2 = state_size * hidden_size[0]
m3 = m2 + hidden_size[0]
w = torch.Tensor(np.reshape(genome[m1:m2], (hidden_size[0], state_size)))
b = torch.Tensor(np.reshape(genome[m2:m3], (hidden_size[0])))
weights.append([state_size, hidden_size[0], w, b])
if len(hidden_size) > 1:
    for i in range(len(hidden_size)):
        if i+1 < len(hidden_size):
            m1 = m3
            m2 = m1 + (hidden_size[i] * hidden_size[i+1])
            m3 = m2 + hidden_size[i]
            w = torch.Tensor(np.reshape(genome[m1:m2],
                             (hidden_size[i+1], hidden_size[i])))
            b = torch.Tensor(np.reshape(genome[m2:m3], (hidden_size[i])))
            weights.append([hidden_size[i], hidden_size[i+1], w, b])
m1 = m3
m2 = m1 + action_size*hidden_size[-1]
m3 = m2 + action_size
w = torch.Tensor(np.reshape(genome[m1:m2], (action_size, hidden_size[-1])))
b = torch.Tensor(np.reshape(genome[m2:m3], (action_size)))
weights.append([hidden_size[-1], action_size, w, b])
m1 = m3
m2 = m1 + hidden_size[-1]
m3 = m2 + 1
w = torch.Tensor(np.reshape(genome[m1:m2], (1, hidden_size[-1])))
b = torch.Tensor(np.reshape(genome[m2:m3], (1)))
weights.append([hidden_size[-1], 1, w, b])

In [47]:
model = Net(weights, True)
state = np.random.rand(state_size)
state = torch.FloatTensor(state).unsqueeze(0)
print(state)
a, v = model(state)
print(a, v)

tensor([[0.4146, 0.1531, 0.7824, 0.6306, 0.8297, 0.6776, 0.1704, 0.5467, 0.4947,
         0.2913, 0.0253, 0.3067, 0.5047, 0.1875, 0.6361, 0.1143, 0.6513, 0.4027,
         0.0687, 0.8890]])
20 32 torch.Size([32, 20]) torch.Size([32])
32 64 torch.Size([64, 32]) torch.Size([32])
64 32 torch.Size([32, 64]) torch.Size([64])
32 10 torch.Size([10, 32]) torch.Size([10])
32 1 torch.Size([1, 32]) torch.Size([1])

fc0 weights:  (32, 20)
fc0 biases:  (32,)
fc1 weights:  (64, 32)
fc1 biases:  (32,)
fc2 weights:  (32, 64)
fc2 biases:  (64,)
fc3 weights:  (10, 32)
fc3 biases:  (10,)
fc4 weights:  (1, 32)
fc4 biases:  (1,)



RuntimeError: The expanded size of the tensor (64) must match the existing size (32) at non-singleton dimension 1.  Target sizes: [1, 64].  Tensor sizes: [32]

In [6]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.fc1 = nn.Linear(40, 32)
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 8)
        self.fc5 = nn.Linear(32, 1)
 
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))

        a = F.softmax(F.relu(self.fc4(x)), dim=-1)
        v = F.relu(self.fc5(x))
        return a, v

    def get_w(self):
        layers = [self.fc1, self.fc2, self.fc3, self.fc4, self.fc5]
        for i, l in enumerate(layers):
            d = l.weight.data.detach().numpy()
            print("fc"+str(i)+" weights: ", d.shape)
            b = l.bias.data.detach().numpy()
            print("fc"+str(i)+" biases: ", b.shape)
            print()

model2 = Net2()
_ = model2.get_w()

fc0 weights:  (32, 40)
fc0 biases:  (32,)

fc1 weights:  (64, 32)
fc1 biases:  (64,)

fc2 weights:  (32, 64)
fc2 biases:  (32,)

fc3 weights:  (8, 32)
fc3 biases:  (8,)

fc4 weights:  (1, 32)
fc4 biases:  (1,)

