In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

In [None]:
class Net(nn.Module):
    def __init__(self, w, l):
        super(Net, self).__init__()
        self.w = w
        self.l = l
        self.fc_layers = nn.ModuleList()
        for item in self.w:
            s1, s2, d, b = item
            fc = nn.Linear(s1, s2)
            fc.weight.data = d
            #fc.bias.data = b
            self.fc_layers.append(fc)

    def forward(self, x):
        self.print_w()
        for i in range(len(self.fc_layers)-2):
            x = F.relu(self.fc_layers[i](x))
        a = F.softmax(F.relu(self.fc_layers[-2](x)), dim=-1)
        v = F.relu(self.fc_layers[-1](x))
        return a, v

    def print_w(self):
        for item in self.w:
            s1, s2, d, b = item
            print(s1, s2, np.shape(d), np.shape(b))
        print()
        for i, l in enumerate(self.fc_layers):
            d = l.weight.data.detach().numpy()
            print("fc"+str(i)+" weights: ", d.shape)
            b = l.bias.data.detach().numpy()
            print("fc"+str(i)+" biases: ", b.shape)
        print()
        
    def get_w(self):
        w = []
        for fc in self.fc_layers:
            d = fc.weight.data.detach().numpy()
            d = list(np.ravel(d))
            w.extend(d)
            b = fc.bias.data.detach().numpy()
            b = list(np.ravel(b))
            w.extend(b)
        return w

    def set_w(self, w):
        self.w = w
        for i, item in enumerate(self.w):
            s1, s2, d, b = item
            self.fc_layers[i].weight.data = d
            self.fc_layers[i].bias.data = b

In [None]:
hidden_size = [32,64,32]
action_size = 10
state_size = 20
genome_size = 0
genome_size += state_size*hidden_size[0]
genome_size += hidden_size[0]
if len(hidden_size) > 1:
    for i in range(len(hidden_size)):
        if i+1 < len(hidden_size):
            genome_size += hidden_size[i]*hidden_size[i+1]
            bl = max(hidden_size[i], hidden_size[i+1])
            genome_size += bl
genome_size += action_size*hidden_size[-1]
genome_size += hidden_size[-1]
genome_size += hidden_size[-1]
genome_size += 1
print(genome_size)
genome = np.random.uniform(-1, 1, genome_size)

In [None]:
weights = []
m1 = 0
m2 = state_size * hidden_size[0]
m3 = m2 + hidden_size[0]
w = torch.Tensor(np.reshape(genome[m1:m2], (hidden_size[0], state_size)))
b = torch.Tensor(np.reshape(genome[m2:m3], (hidden_size[0])))
weights.append([state_size, hidden_size[0], w, b])
if len(hidden_size) > 1:
    for i in range(len(hidden_size)):
        if i+1 < len(hidden_size):
            m1 = m3
            m2 = m1 + (hidden_size[i] * hidden_size[i+1])
            m3 = m2 + hidden_size[i]
            w = torch.Tensor(np.reshape(genome[m1:m2],
                             (hidden_size[i+1], hidden_size[i])))
            b = torch.Tensor(np.reshape(genome[m2:m3], (hidden_size[i])))
            weights.append([hidden_size[i], hidden_size[i+1], w, b])
m1 = m3
m2 = m1 + action_size*hidden_size[-1]
m3 = m2 + action_size
w = torch.Tensor(np.reshape(genome[m1:m2], (action_size, hidden_size[-1])))
b = torch.Tensor(np.reshape(genome[m2:m3], (action_size)))
weights.append([hidden_size[-1], action_size, w, b])
m1 = m3
m2 = m1 + hidden_size[-1]
m3 = m2 + 1
w = torch.Tensor(np.reshape(genome[m1:m2], (1, hidden_size[-1])))
b = torch.Tensor(np.reshape(genome[m2:m3], (1)))
weights.append([hidden_size[-1], 1, w, b])

In [None]:
model = Net(weights, True)
state = np.random.rand(state_size)
state = torch.FloatTensor(state).unsqueeze(0)
print(state)
a, v = model(state)
print(a, v)

In [None]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.fc1 = nn.Linear(40, 32)
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 8)
        self.fc5 = nn.Linear(32, 1)
 
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))

        a = F.softmax(F.relu(self.fc4(x)), dim=-1)
        v = F.relu(self.fc5(x))
        return a, v

    def get_w(self):
        layers = [self.fc1, self.fc2, self.fc3, self.fc4, self.fc5]
        for i, l in enumerate(layers):
            d = l.weight.data.detach().numpy()
            print("fc"+str(i)+" weights: ", d.shape)
            b = l.bias.data.detach().numpy()
            print("fc"+str(i)+" biases: ", b.shape)
            print()

model2 = Net2()
_ = model2.get_w()

In [None]:
class Net3(nn.Module):
    def __init__(self):
        super(Net3, self).__init__()
        self.block = [[4, 8], [5, 8], [4, 8]]
        self.num_actions = 4
        self.num_blocks = len(self.block)
        self.inps = [x[0] for x in self.block]
        self.out_cat = self.num_blocks * self.num_actions
        self.blocks = {}
        for index in range(self.num_blocks):
            self.blocks[index] = nn.ModuleList()
            fc = nn.Linear(self.block[index][0], self.block[index][1])
            self.blocks[index].append(fc)
            fc = nn.Linear(self.block[index][-1], self.num_actions)
            self.blocks[index].append(fc)
        self.action = nn.Linear(self.out_cat, self.num_actions)
        self.value = nn.Linear(self.out_cat, 1)
 
    def forward(self, x):
        print(x)
        block_out = torch.empty((self.num_blocks, self.num_actions))
        current_index = 0
        for index in range(len(self.blocks)):
            print(current_index)
            i = x[0, current_index:current_index+self.inps[index]]
            print(i)
            a = F.relu(self.blocks[index][0](i))
            a = F.relu(self.blocks[index][1](a))
            block_out[index] = a
            current_index = current_index+self.inps[index]
        rc = torch.ravel(torch.tensor(block_out))
        a = F.softmax(F.relu(self.action(rc)), dim=-1)
        v = F.relu(self.value(rc))
        return a, v

    def get_w(self):
        for index in range(self.num_blocks):
            print("Block: " + str(index))
            d = self.blocks[index][0].weight.data.detach().numpy()
            print("fc0 weights: ", d.shape)
            b = self.blocks[index][0].bias.data.detach().numpy()
            print("fc0 biases: ", b.shape)
            d = self.blocks[index][1].weight.detach().numpy()
            print("fc1 weights: ", d.shape)
            b = self.blocks[index][1].bias.data.detach().numpy()
            print("fc1 biases: ", b.shape)
        d = self.action.weight.data.detach().numpy()
        print("action weights: ", d.shape)
        b = self.action.bias.data.detach().numpy()
        print("action biases: ", b.shape)
        d = self.value.weight.data.detach().numpy()
        print("value weights: ", d.shape)
        b = self.value.bias.data.detach().numpy()
        print("value biases: ", b.shape)
        print()

model3 = Net3()
_ = model3.get_w()
state = np.random.rand(13)
state = torch.FloatTensor(state).unsqueeze(0)
a, v = model3(state)
print(a, v)

In [None]:
import random
action_size = 5 # final output layer
genome_size = []
net_desc = [[4, 8], [5, 10], [4, 8]]
for index in range(len(net_desc)):
    net_desc[index].append(action_size)
state_size = sum([x[0] for x in net_desc])
out_cat = sum([x[-1] for x in net_desc])
out_hidden = int(out_cat*0.5)
for item in net_desc:
    gs = 0
    for i in range(len(item)-1):
        gs += item[i] * item[i+1]
    genome_size.append(gs)
cat_hidden = out_cat*out_hidden
genome_size.append(cat_hidden)
action_head = out_hidden*action_size
genome_size.append(action_head)
net_desc.append([out_cat, out_hidden])
net_desc.append([out_hidden, action_size])
value_head = out_hidden*1
genome_size.append(value_head)
net_desc.append([out_hidden, 1])
state = []
print(net_desc)
for item in genome_size:
    state.append(np.random.randint(-1, 1, item))
state = np.array(state)
print(genome_size)
print(sum(genome_size))

In [None]:
weights = []
for index, item in enumerate(state):
    entry = []
    layer_desc = net_desc[index]
    if len(layer_desc) > 2:
        s1, s2, o = layer_desc
        w = torch.Tensor(np.reshape(item[0:s1*s2], (s2, s1)))
        entry.append([s1, s2, w])
        w = torch.Tensor(np.reshape(item[s1*s2:], (o, s2)))
        entry.append([s2, o, w])
    else:
        s1, o = layer_desc
        w = torch.Tensor(np.reshape(item, (o, s1)))
        entry.append([s1, o, w])
    weights.append(entry)
for index, entry in enumerate(weights):
    print("Entry:", index)
    for e in entry:
        print(e[0], e[1], e[2].shape)

In [None]:
class Net4(nn.Module):
    def __init__(self, weights):
        super(Net4, self).__init__()
        self.weights = weights
        self.block = []
        for item in weights:
            if len(item) > 1:
                self.block.append([item[0][0], item[0][1], item[1][1]])
        print("blocks", self.block)
        self.num_actions = self.weights[-2][0][1]
        print("actions", self.num_actions)
        self.num_blocks = len(self.block)
        print("num blocks", self.num_blocks)
        self.inps = [x[0] for x in self.block]
        print("inps", self.inps)
        self.out_cat = sum([x[-1] for x in self.block])
        print('out_cat', self.out_cat)
        self.out_hidden = self.weights[-3][0][1]
        print('out_hidden', self.out_hidden)
        self.blocks = {}
        for index in range(self.num_blocks):
            weights1 = self.weights[index][0][2]
            weights2 = self.weights[index][1][2]
            self.blocks[index] = nn.ModuleList()
            fc = nn.Linear(self.block[index][0], self.block[index][1])
            fc.weight.data = weights1
            self.blocks[index].append(fc)
            fc = nn.Linear(self.block[index][1], self.block[index][2])
            fc.weight.data = weights2
            self.blocks[index].append(fc)
        self.cat_hidden = nn.Linear(self.out_cat, self.out_hidden)
        self.cat_hidden.weight.data = self.weights[-3][0][2]
        self.action = nn.Linear(self.out_hidden, self.num_actions)
        self.action.weight.data = self.weights[-2][0][2]
        self.value = nn.Linear(self.out_hidden, 1)
        self.value.weight.data = self.weights[-1][0][2]

 
    def forward(self, x):
        print(x)
        block_out = torch.empty((self.num_blocks, self.num_actions))
        current_index = 0
        for index in range(len(self.blocks)):
            print(current_index)
            i = x[0, current_index:current_index+self.inps[index]]
            print(i)
            a = F.relu(self.blocks[index][0](i))
            a = F.relu(self.blocks[index][1](a))
            block_out[index] = a
            current_index = current_index+self.inps[index]
        rc = torch.ravel(torch.tensor(block_out))
        rc = F.relu(self.cat_hidden(rc))
        a = F.softmax(F.relu(self.action(rc)), dim=-1)
        v = F.relu(self.value(rc))
        return a, v

    def get_param_count(self, item):
        count = 1
        for c in item.shape:
            count = count * c
        return count
    
    def get_w(self):
        total_params = 0
        genome = []
        for index in range(self.num_blocks):
            entry = []
            print("Block: " + str(index))
            d1 = self.blocks[index][0].weight.data.detach().numpy()
            print("fc0 weights: ", d1.shape)
            total_params += self.get_param_count(d1)
            d1 = np.ravel(d1)
            entry.extend(list(d1))
            b1 = self.blocks[index][0].bias.data.detach().numpy()
            print("fc0 biases: ", b1.shape)
            d2 = self.blocks[index][1].weight.detach().numpy()
            print("fc1 weights: ", d2.shape)
            d2 = np.ravel(d2)
            entry.extend(list(d2))
            total_params += self.get_param_count(d2)
            b2 = self.blocks[index][1].bias.data.detach().numpy()
            print("fc1 biases: ", b2.shape)
            entry = np.ravel(entry)
            genome.append(entry)
        dc = self.cat_hidden.weight.data.detach().numpy()
        genome.append(np.ravel(dc))
        total_params += self.get_param_count(dc)
        print("cat_hidden weights: ", dc.shape)
        print(self.weights[-3][0][2].shape)
        da = self.action.weight.data.detach().numpy()
        genome.append(np.ravel(da))
        total_params += self.get_param_count(da)
        print("action weights: ", da.shape)
        print(self.weights[-2][0][2].shape)
        ba = self.action.bias.data.detach().numpy()
        print("action biases: ", ba.shape)
        dv = self.value.weight.data.detach().numpy()
        genome.append(np.ravel(dv))
        total_params += self.get_param_count(dv)
        print("value weights: ", dv.shape)
        print(self.weights[-1][0][2].shape)
        bv = self.value.bias.data.detach().numpy()
        print("value biases: ", bv.shape)
        print("total params: ", total_params)
        genome_shape = [len(x) for x in genome]
        print(genome_shape)
        print()

model4 = Net4(weights)
_ = model4.get_w()
state = np.random.rand(4 + 5 + 4)
state = torch.FloatTensor(state).unsqueeze(0)
print(state)
a, v = model4(state)
print(a, v)

In [None]:
import random
import torch
import numpy as np

action_size = 5 # final output layer
obs = [4, 5, 6]
prev_states = 6
genome_size = []
net_desc = []

for item in obs:
    entry = [prev_states, item, item*2, action_size]
    net_desc.append(entry)
print(net_desc)

genome_size = []
for item in net_desc:
    p, inps, hidden, outs = item
    prev_states = p
    for depth in range(prev_states):
        for width in range(p):
            size = 0
            if depth == 0:
                size += inps*hidden
            else:
                size += 2*outs*hidden
            size += hidden*outs
            genome_size.append(size)
        p -= 1
print(net_desc)
print(genome_size)
out_cat = len(obs) * action_size
out_hidden = action_size*2
cat_hidden = out_cat*out_hidden
genome_size.append(cat_hidden)
action_head = out_hidden*action_size
genome_size.append(action_head)
net_desc.append([out_cat, out_hidden])
net_desc.append([out_hidden, action_size])
value_head = out_hidden*1
genome_size.append(value_head)
net_desc.append([out_hidden, 1])


state = []
for item in genome_size:
    state.append(np.random.randint(-1, 1, item))
state = np.array(state)
print(net_desc)
print(genome_size)
print(sum(genome_size))

weights = []
state_index = 0
b = 0
for index, layer_desc in enumerate(net_desc):
    entry = []
    if len(layer_desc) > 2:
        p, s1, s2, o = layer_desc
        prev_states = p
        for depth in range(prev_states):
            for width in range(p):
                item = state[state_index]
                sn = s1
                if depth > 0:
                    sn = o * 2
                w = torch.Tensor(np.reshape(item[0:sn*s2], (s2, sn)))
                entry.append([b, depth, width, sn, s2, w])
                w = torch.Tensor(np.reshape(item[sn*s2:], (o, s2)))
                entry.append([b, depth, width, s2, o, w])
                state_index += 1
            p -= 1
        b += 1
    else:
        item = state[state_index]
        s1, o = layer_desc
        w = torch.Tensor(np.reshape(item, (o, s1)))
        entry.append([s1, o, w])
        state_index += 1
    weights.append(entry)
for index, entry in enumerate(weights):
    print("Entry:", index)
    for e in entry:
        if len(e) > 3:
            print(e[0], e[1], e[2], e[3], e[4], e[5].shape)
        else:
            print(e[0], e[1], e[2].shape)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

class Net5(nn.Module):
    def __init__(self, weights):
        super(Net5, self).__init__()
        self.weights = weights
        self.inp_blocks = {}
        for entry in self.weights:
            for item in entry:
                if len(item) > 3:
                    block = item[0]
                    depth = item[1]
                    order = item[2]
                    s1 = item[3]
                    s2 = item[4]
                    w = item[5]
                    if block not in self.inp_blocks:
                        self.inp_blocks[block] = {}
                    if depth not in self.inp_blocks[block]:
                        self.inp_blocks[block][depth] = {}
                    if order not in self.inp_blocks[block][depth]:
                        self.inp_blocks[block][depth][order] = []
                    self.inp_blocks[block][depth][order].append([s1, s2, w])
        self.block_inputs = []
        for entry in self.weights:
            if len(entry[0]) > 3:
                self.block_inputs.append(entry[0][3])
        print("block inputs", self.block_inputs)
        self.num_actions = self.weights[-2][0][1]
        print("actions", self.num_actions)
        self.num_blocks = len(self.inp_blocks)
        print("num blocks", self.num_blocks)
        self.prev_states = len(self.inp_blocks[0])
        print("prev states", self.prev_states)
        self.out_cat = self.num_blocks * self.num_actions
        print('out_cat', self.out_cat)
        self.out_hidden = self.weights[-3][0][1]
        print('out_hidden', self.out_hidden)
        self.neurons = nn.ModuleList()
        for bi, block in self.inp_blocks.items():
            for di, dblock in block.items():
                for oi, oblock in dblock.items():
                    for item in oblock:
                        s1 = item[0]
                        s2 = item[1]
                        weights = item[2]
                        fc = nn.Linear(s1, s2)
                        fc.weight.data = weights
                        self.neurons.append(fc)
        self.cat_hidden = nn.Linear(self.out_cat, self.out_hidden)
        self.cat_hidden.weight.data = self.weights[-3][0][2]
        self.action = nn.Linear(self.out_hidden, self.num_actions)
        self.action.weight.data = self.weights[-2][0][2]
        self.value = nn.Linear(self.out_hidden, 1)
        self.value.weight.data = self.weights[-1][0][2]
 
    def forward(self, x):
        #print("forward")
        block_out = torch.empty((self.num_blocks, self.num_actions))
        neuron_index = 0
        input_index = 0
        for bi, block in self.inp_blocks.items():
            binps = self.block_inputs[bi]
            i1 = input_index
            i2 = input_index + (self.block_inputs[bi]*self.prev_states)
            x1 = x[0][i1:i2]
            prev_outs = []
            out_index = 0
            last_out = None
            for di, dblock in block.items():
                if di == 0:
                    for oi, oblock in dblock.items():
                        inp = x1[oi*binps:(oi*binps)+binps]
                        out = F.relu(self.neurons[neuron_index](inp))
                        neuron_index += 1
                        out = F.relu(self.neurons[neuron_index](out))
                        last_out = out
                        prev_outs.append(out)
                        neuron_index += 1
                else:
                    for oi, oblock in dblock.items():
                        inp = torch.cat((prev_outs[out_index], prev_outs[out_index+1]))
                        out_index += 1
                        out = F.relu(self.neurons[neuron_index](inp))
                        neuron_index += 1
                        out = F.relu(self.neurons[neuron_index](out))
                        last_out = out
                        prev_outs.append(out)
                        neuron_index += 1
            block_out[bi] = last_out
            input_index += self.block_inputs[bi]*self.prev_states
        rc = torch.ravel(torch.tensor(block_out))
        rc = F.relu(self.cat_hidden(rc))
        a = F.softmax(F.relu(self.action(rc)), dim=-1)
        v = F.relu(self.value(rc))
        return a, v

    def get_param_count(self, item):
        count = 1
        for c in item.shape:
            count = count * c
        return count

    def get_w(self):
        total_params = 0
        genome = []
        neuron_index = 0
        while neuron_index < len(self.neurons):
            entry = []
            print("Block: " + str(index))
            d1 = self.neurons[neuron_index].weight.data.detach().numpy()
            neuron_index += 1
            print("fc0 weights: ", d1.shape)
            total_params += self.get_param_count(d1)
            d1 = np.ravel(d1)
            entry.extend(list(d1))
            d2 = self.neurons[neuron_index].weight.data.detach().numpy()
            neuron_index += 1
            print("fc1 weights: ", d2.shape)
            d2 = np.ravel(d2)
            entry.extend(list(d2))
            total_params += self.get_param_count(d2)
            entry = np.ravel(entry)
            genome.append(entry)
        dc = self.cat_hidden.weight.data.detach().numpy()
        genome.append(np.ravel(dc))
        total_params += self.get_param_count(dc)
        print("cat_hidden weights: ", dc.shape)
        print(self.weights[-3][0][2].shape)
        da = self.action.weight.data.detach().numpy()
        genome.append(np.ravel(da))
        total_params += self.get_param_count(da)
        print("action weights: ", da.shape)
        print(self.weights[-2][0][2].shape)
        ba = self.action.bias.data.detach().numpy()
        print("action biases: ", ba.shape)
        dv = self.value.weight.data.detach().numpy()
        genome.append(np.ravel(dv))
        total_params += self.get_param_count(dv)
        print("value weights: ", dv.shape)
        print(self.weights[-1][0][2].shape)
        bv = self.value.bias.data.detach().numpy()
        print("value biases: ", bv.shape)
        print("total params: ", total_params)
        genome_shape = [len(x) for x in genome]
        print(genome_shape)
        print()

model5 = Net5(weights)
print("Done")
_ = model5.get_w()
for _ in range(1000):
    state = np.random.rand(4*prev_states + 5*prev_states + 6*prev_states)
    state = torch.FloatTensor(state).unsqueeze(0)
    #print(state)
    a, v = model5(state)
    #print(a, v)