In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

In [44]:
class Net(nn.Module):
    def __init__(self, w, l):
        super(Net, self).__init__()
        self.w = w
        self.l = l
        self.fc_layers = nn.ModuleList()
        for item in self.w:
            s1, s2, d, b = item
            fc = nn.Linear(s1, s2)
            fc.weight.data = d
            #fc.bias.data = b
            self.fc_layers.append(fc)

    def forward(self, x):
        self.print_w()
        for i in range(len(self.fc_layers)-2):
            x = F.relu(self.fc_layers[i](x))
        a = F.softmax(F.relu(self.fc_layers[-2](x)), dim=-1)
        v = F.relu(self.fc_layers[-1](x))
        return a, v

    def print_w(self):
        for item in self.w:
            s1, s2, d, b = item
            print(s1, s2, np.shape(d), np.shape(b))
        print()
        for i, l in enumerate(self.fc_layers):
            d = l.weight.data.detach().numpy()
            print("fc"+str(i)+" weights: ", d.shape)
            b = l.bias.data.detach().numpy()
            print("fc"+str(i)+" biases: ", b.shape)
        print()
        
    def get_w(self):
        w = []
        for fc in self.fc_layers:
            d = fc.weight.data.detach().numpy()
            d = list(np.ravel(d))
            w.extend(d)
            b = fc.bias.data.detach().numpy()
            b = list(np.ravel(b))
            w.extend(b)
        return w

    def set_w(self, w):
        self.w = w
        for i, item in enumerate(self.w):
            s1, s2, d, b = item
            self.fc_layers[i].weight.data = d
            self.fc_layers[i].bias.data = b

In [45]:
hidden_size = [32,64,32]
action_size = 10
state_size = 20
genome_size = 0
genome_size += state_size*hidden_size[0]
genome_size += hidden_size[0]
if len(hidden_size) > 1:
    for i in range(len(hidden_size)):
        if i+1 < len(hidden_size):
            genome_size += hidden_size[i]*hidden_size[i+1]
            bl = max(hidden_size[i], hidden_size[i+1])
            genome_size += bl
genome_size += action_size*hidden_size[-1]
genome_size += hidden_size[-1]
genome_size += hidden_size[-1]
genome_size += 1
print(genome_size)
genome = np.random.uniform(-1, 1, genome_size)

5281


In [46]:
weights = []
m1 = 0
m2 = state_size * hidden_size[0]
m3 = m2 + hidden_size[0]
w = torch.Tensor(np.reshape(genome[m1:m2], (hidden_size[0], state_size)))
b = torch.Tensor(np.reshape(genome[m2:m3], (hidden_size[0])))
weights.append([state_size, hidden_size[0], w, b])
if len(hidden_size) > 1:
    for i in range(len(hidden_size)):
        if i+1 < len(hidden_size):
            m1 = m3
            m2 = m1 + (hidden_size[i] * hidden_size[i+1])
            m3 = m2 + hidden_size[i]
            w = torch.Tensor(np.reshape(genome[m1:m2],
                             (hidden_size[i+1], hidden_size[i])))
            b = torch.Tensor(np.reshape(genome[m2:m3], (hidden_size[i])))
            weights.append([hidden_size[i], hidden_size[i+1], w, b])
m1 = m3
m2 = m1 + action_size*hidden_size[-1]
m3 = m2 + action_size
w = torch.Tensor(np.reshape(genome[m1:m2], (action_size, hidden_size[-1])))
b = torch.Tensor(np.reshape(genome[m2:m3], (action_size)))
weights.append([hidden_size[-1], action_size, w, b])
m1 = m3
m2 = m1 + hidden_size[-1]
m3 = m2 + 1
w = torch.Tensor(np.reshape(genome[m1:m2], (1, hidden_size[-1])))
b = torch.Tensor(np.reshape(genome[m2:m3], (1)))
weights.append([hidden_size[-1], 1, w, b])

In [47]:
model = Net(weights, True)
state = np.random.rand(state_size)
state = torch.FloatTensor(state).unsqueeze(0)
print(state)
a, v = model(state)
print(a, v)

tensor([[0.4146, 0.1531, 0.7824, 0.6306, 0.8297, 0.6776, 0.1704, 0.5467, 0.4947,
         0.2913, 0.0253, 0.3067, 0.5047, 0.1875, 0.6361, 0.1143, 0.6513, 0.4027,
         0.0687, 0.8890]])
20 32 torch.Size([32, 20]) torch.Size([32])
32 64 torch.Size([64, 32]) torch.Size([32])
64 32 torch.Size([32, 64]) torch.Size([64])
32 10 torch.Size([10, 32]) torch.Size([10])
32 1 torch.Size([1, 32]) torch.Size([1])

fc0 weights:  (32, 20)
fc0 biases:  (32,)
fc1 weights:  (64, 32)
fc1 biases:  (32,)
fc2 weights:  (32, 64)
fc2 biases:  (64,)
fc3 weights:  (10, 32)
fc3 biases:  (10,)
fc4 weights:  (1, 32)
fc4 biases:  (1,)



RuntimeError: The expanded size of the tensor (64) must match the existing size (32) at non-singleton dimension 1.  Target sizes: [1, 64].  Tensor sizes: [32]

In [6]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.fc1 = nn.Linear(40, 32)
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 8)
        self.fc5 = nn.Linear(32, 1)
 
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))

        a = F.softmax(F.relu(self.fc4(x)), dim=-1)
        v = F.relu(self.fc5(x))
        return a, v

    def get_w(self):
        layers = [self.fc1, self.fc2, self.fc3, self.fc4, self.fc5]
        for i, l in enumerate(layers):
            d = l.weight.data.detach().numpy()
            print("fc"+str(i)+" weights: ", d.shape)
            b = l.bias.data.detach().numpy()
            print("fc"+str(i)+" biases: ", b.shape)
            print()

model2 = Net2()
_ = model2.get_w()

fc0 weights:  (32, 40)
fc0 biases:  (32,)

fc1 weights:  (64, 32)
fc1 biases:  (64,)

fc2 weights:  (32, 64)
fc2 biases:  (32,)

fc3 weights:  (8, 32)
fc3 biases:  (8,)

fc4 weights:  (1, 32)
fc4 biases:  (1,)



In [28]:
class Net3(nn.Module):
    def __init__(self):
        super(Net3, self).__init__()
        self.block = [[4, 8], [5, 8], [4, 8]]
        self.num_actions = 4
        self.num_blocks = len(self.block)
        self.inps = [x[0] for x in self.block]
        self.out_cat = self.num_blocks * self.num_actions
        self.blocks = {}
        for index in range(self.num_blocks):
            self.blocks[index] = nn.ModuleList()
            fc = nn.Linear(self.block[index][0], self.block[index][1])
            self.blocks[index].append(fc)
            fc = nn.Linear(self.block[index][-1], self.num_actions)
            self.blocks[index].append(fc)
        self.action = nn.Linear(self.out_cat, self.num_actions)
        self.value = nn.Linear(self.out_cat, 1)
 
    def forward(self, x):
        print(x)
        block_out = torch.empty((self.num_blocks, self.num_actions))
        current_index = 0
        for index in range(len(self.blocks)):
            print(current_index)
            i = x[0, current_index:current_index+self.inps[index]]
            print(i)
            a = F.relu(self.blocks[index][0](i))
            a = F.relu(self.blocks[index][1](a))
            block_out[index] = a
            current_index = current_index+self.inps[index]
        rc = torch.ravel(torch.tensor(block_out))
        a = F.softmax(F.relu(self.action(rc)), dim=-1)
        v = F.relu(self.value(rc))
        return a, v

    def get_w(self):
        for index in range(self.num_blocks):
            print("Block: " + str(index))
            d = self.blocks[index][0].weight.data.detach().numpy()
            print("fc0 weights: ", d.shape)
            b = self.blocks[index][0].bias.data.detach().numpy()
            print("fc0 biases: ", b.shape)
            d = self.blocks[index][1].weight.detach().numpy()
            print("fc1 weights: ", d.shape)
            b = self.blocks[index][1].bias.data.detach().numpy()
            print("fc1 biases: ", b.shape)
        d = self.action.weight.data.detach().numpy()
        print("action weights: ", d.shape)
        b = self.action.bias.data.detach().numpy()
        print("action biases: ", b.shape)
        d = self.value.weight.data.detach().numpy()
        print("value weights: ", d.shape)
        b = self.value.bias.data.detach().numpy()
        print("value biases: ", b.shape)
        print()

model3 = Net3()
_ = model3.get_w()
state = np.random.rand(13)
state = torch.FloatTensor(state).unsqueeze(0)
a, v = model3(state)
print(a, v)

Block: 0
fc0 weights:  (8, 4)
fc0 biases:  (8,)
fc1 weights:  (4, 8)
fc1 biases:  (4,)
Block: 1
fc0 weights:  (8, 5)
fc0 biases:  (8,)
fc1 weights:  (4, 8)
fc1 biases:  (4,)
Block: 2
fc0 weights:  (8, 4)
fc0 biases:  (8,)
fc1 weights:  (4, 8)
fc1 biases:  (4,)
action weights:  (4, 12)
action biases:  (4,)
value weights:  (1, 12)
value biases:  (1,)

tensor([[0.3013, 0.9713, 0.1598, 0.9770, 0.9258, 0.6427, 0.0125, 0.4924, 0.2813,
         0.0841, 0.4329, 0.4911, 0.3229]])
0
tensor([0.3013, 0.9713, 0.1598, 0.9770])
4
tensor([0.9258, 0.6427, 0.0125, 0.4924, 0.2813])
9
tensor([0.0841, 0.4329, 0.4911, 0.3229])
tensor([0.2360, 0.2114, 0.2374, 0.3153], grad_fn=<SoftmaxBackward>) tensor([0.], grad_fn=<ReluBackward0>)


  rc = torch.ravel(torch.tensor(block_out))


In [122]:
import random

genome_size = []
net_desc = [[4, 8, 4], [5, 8, 4], [4, 8, 4], [6, 16, 4]]
action_size = 4 # final output layer
state_size = sum([x[0] for x in net_desc])
out_cat = sum([x[-1] for x in net_desc])
for item in net_desc:
    gs = 0
    for i in range(len(item)-1):
        gs += item[i] * item[i+1]
    genome_size.append(gs)
action_head = out_cat*action_size
genome_size.append(action_head)
net_desc.append([out_cat, action_size])
value_head = out_cat*1
genome_size.append(value_head)
net_desc.append([out_cat, 1])
state = []
for item in genome_size:
    state.append(np.random.uniform(-1, 1, item))
state = np.array(state)
print(genome_size)
print(sum(genome_size))

[64, 72, 64, 160, 64, 16]
440


In [123]:
weights = []
for index, item in enumerate(state):
    entry = []
    layer_desc = net_desc[index]
    if len(layer_desc) > 2:
        s1, s2, o = layer_desc
        w = torch.Tensor(np.reshape(item[0:s1*s2], (s2, s1)))
        entry.append([s1, s2, w])
        w = torch.Tensor(np.reshape(item[s1*s2:], (o, s2)))
        entry.append([s2, o, w])
    else:
        s1, o = layer_desc
        w = torch.Tensor(np.reshape(item, (o, s1)))
        entry.append([s1, o, w])
    weights.append(entry)
for index, entry in enumerate(weights):
    print("Entry:", index)
    for e in entry:
        print(e[0], e[1], e[2].shape)

Entry: 0
4 8 torch.Size([8, 4])
8 4 torch.Size([4, 8])
Entry: 1
5 8 torch.Size([8, 5])
8 4 torch.Size([4, 8])
Entry: 2
4 8 torch.Size([8, 4])
8 4 torch.Size([4, 8])
Entry: 3
6 16 torch.Size([16, 6])
16 4 torch.Size([4, 16])
Entry: 4
16 4 torch.Size([4, 16])
Entry: 5
16 1 torch.Size([1, 16])


In [129]:
class Net4(nn.Module):
    def __init__(self, weights):
        super(Net4, self).__init__()
        self.weights = weights
        self.block = []
        for item in weights:
            if len(item) > 1:
                self.block.append([item[0][0], item[0][1], item[1][1]])
        print("blocks", self.block)
        self.num_actions = self.weights[-2][0][1]
        print("actions", self.num_actions)
        self.num_blocks = len(self.block)
        print("num blocks", self.num_blocks)
        self.inps = [x[0] for x in self.block]
        print("inps", self.inps)
        self.out_cat = sum([x[-1] for x in self.block])
        print(self.out_cat)
        self.blocks = {}
        for index in range(self.num_blocks):
            weights1 = self.weights[index][0][2]
            weights2 = self.weights[index][1][2]
            self.blocks[index] = nn.ModuleList()
            fc = nn.Linear(self.block[index][0], self.block[index][1])
            fc.weight.data = weights1
            self.blocks[index].append(fc)
            fc = nn.Linear(self.block[index][1], self.block[index][2])
            fc.weight.data = weights2
            self.blocks[index].append(fc)
        self.action = nn.Linear(self.out_cat, self.num_actions)
        self.value = nn.Linear(self.out_cat, 1)
 
    def forward(self, x):
        print(x)
        block_out = torch.empty((self.num_blocks, self.num_actions))
        current_index = 0
        for index in range(len(self.blocks)):
            print(current_index)
            i = x[0, current_index:current_index+self.inps[index]]
            print(i)
            a = F.relu(self.blocks[index][0](i))
            a = F.relu(self.blocks[index][1](a))
            block_out[index] = a
            current_index = current_index+self.inps[index]
        rc = torch.ravel(torch.tensor(block_out))
        a = F.softmax(F.relu(self.action(rc)), dim=-1)
        v = F.relu(self.value(rc))
        return a, v

    def get_w(self):
        for index in range(self.num_blocks):
            print("Block: " + str(index))
            d = self.blocks[index][0].weight.data.detach().numpy()
            print("fc0 weights: ", d.shape)
            b = self.blocks[index][0].bias.data.detach().numpy()
            print("fc0 biases: ", b.shape)
            d = self.blocks[index][1].weight.detach().numpy()
            print("fc1 weights: ", d.shape)
            b = self.blocks[index][1].bias.data.detach().numpy()
            print("fc1 biases: ", b.shape)
        d = self.action.weight.data.detach().numpy()
        print("action weights: ", d.shape)
        b = self.action.bias.data.detach().numpy()
        print("action biases: ", b.shape)
        d = self.value.weight.data.detach().numpy()
        print("value weights: ", d.shape)
        b = self.value.bias.data.detach().numpy()
        print("value biases: ", b.shape)
        print()

model4 = Net4(weights)
_ = model4.get_w()
state = np.random.rand(4 + 5 + 4 + 6)
state = torch.FloatTensor(state).unsqueeze(0)
print(state)
a, v = model4(state)
print(a, v)

blocks [[4, 8, 4], [5, 8, 4], [4, 8, 4], [6, 16, 4]]
actions 4
num blocks 4
inps [4, 5, 4, 6]
16
Block: 0
fc0 weights:  (8, 4)
fc0 biases:  (8,)
fc1 weights:  (4, 8)
fc1 biases:  (4,)
Block: 1
fc0 weights:  (8, 5)
fc0 biases:  (8,)
fc1 weights:  (4, 8)
fc1 biases:  (4,)
Block: 2
fc0 weights:  (8, 4)
fc0 biases:  (8,)
fc1 weights:  (4, 8)
fc1 biases:  (4,)
Block: 3
fc0 weights:  (16, 6)
fc0 biases:  (16,)
fc1 weights:  (4, 16)
fc1 biases:  (4,)
action weights:  (4, 16)
action biases:  (4,)
value weights:  (1, 16)
value biases:  (1,)

tensor([[0.2772, 0.2074, 0.1915, 0.8742, 0.3553, 0.8071, 0.9260, 0.4212, 0.7631,
         0.3541, 0.1544, 0.2500, 0.6699, 0.1226, 0.7051, 0.3138, 0.4757, 0.5638,
         0.1946]])
tensor([[0.2772, 0.2074, 0.1915, 0.8742, 0.3553, 0.8071, 0.9260, 0.4212, 0.7631,
         0.3541, 0.1544, 0.2500, 0.6699, 0.1226, 0.7051, 0.3138, 0.4757, 0.5638,
         0.1946]])
0
tensor([0.2772, 0.2074, 0.1915, 0.8742])
4
tensor([0.3553, 0.8071, 0.9260, 0.4212, 0.7631])
9
ten

  rc = torch.ravel(torch.tensor(block_out))
