# RNN / Pytorch / Instruction Offset

### Constants 

In [1]:
from enum import Enum
import numpy as np
import itertools
import torch
import torch.nn.functional as F

class OPSTRING(Enum):
    OR = '|'
    AND = '&'
    NOT = '~'
    MOV = 'mov'
    XOR = 'xor'
    IDENTITY = 'identity'
    
op_string_to_ops = {OPSTRING.OR: np.bitwise_or, OPSTRING.AND: np.bitwise_and,
                    OPSTRING.NOT: np.bitwise_not, OPSTRING.XOR: np.bitwise_xor,
                    OPSTRING.MOV: lambda x: x, OPSTRING.IDENTITY: lambda x: x}

ARITY_DICT = {OPSTRING.OR: 2, OPSTRING.AND: 2, OPSTRING.NOT: 1, OPSTRING.MOV: 1, OPSTRING.IDENTITY: 1, OPSTRING.XOR: 2}
FUNCTION_SET = ["&", "|", "mov", "~", "identity"]


  from .autonotebook import tqdm as notebook_tqdm


### Data Loading

In [2]:
import numpy as np  


class Op:

    def __init__(self, op_str, fx, arity):
        self.op_str = op_str
        self.fx = fx
        self.arity = arity

    @staticmethod
    def from_string(op_str):
        op_str = OPSTRING(op_str)
        return Op(op_str, op_string_to_ops[op_str], ARITY_DICT[op_str])

class Inst:

    def __init__(self, src: int, dst: int, op: str):
        self.src = src
        self.dst = dst
        self.op = Op.from_string(op)

    @staticmethod
    def from_string(inst_str):
        """
        :param inst_str: in polish notation
        :return: Inst instance
        """
        tokens = inst_str.split(" ")
        # Lucca uses Hungarian notation
        return Inst(int(tokens[2]), int(tokens[1]), tokens[0])

    def __eq__(self, other):
        return self.op == other.op \
               and self.op.arity == other.arity \
               and self.dst == other.dst \
               and self.src == other.src

    def __str__(self):
        """
        :return: string representation of instructions in polish notation
        """
        pr_print = {"OP:": self.op.op_str, "SRC": self.src, "DST": self.dst}
        return str(pr_print)

    def __hash__(self):
        return hash((self.op, self.src, self.dst))

class Task:
    """
    Initialized only once from configuration, task parameterizes search by fixing the machine architecture, as well
    as problem specific characteristics such as max sequence length, function set etc.
    """

    def __init__(self, function_set: list, num_regs: int, num_data_regs:int, output_regs: list, dataset: str, constraints: list,
                 sequence_length:int, arity:dict):

        self.function_set = function_set
        self.num_regs = num_regs
        self.num_data_regs = num_data_regs
        self.output_regs = list(output_regs)
        self.dataset = dataset
        self.constraints = constraints
        self.instruction_shape = self.number_of_possible_insts()
        self.inst_to_vec, self.vec_to_inst = self.index_mappings()
        self.sequence_length = sequence_length
        self.arity = arity

    def index_mappings(self):
        """
        :return: mapping and reverse mapping between instructions and their one-hot encoded index
        note: this is for a very simple one-hot embedding approach
        """
        #TODO: this should be all regsiters times the number of executable regs - maybe minus introns
        a = [range(self.num_regs), range(self.num_data_regs), self.function_set]
        instructions = list(itertools.product(*a))

        l, m = {}, {}
        for idx, inst in enumerate(instructions):
            instruction = Inst(inst[0], inst[1], inst[2])
            l[instruction] = idx
            m[idx] = instruction
        return l, m

    def number_of_possible_insts(self):
        """
        :return: number of possible instructions given the machine architecture and the function set
        """
        return self.num_regs * (self.num_regs + self.num_data_regs) * len( self.function_set)

    # TODO: improve the instruction embedding
    def inst_to_onehot(self, inst_offset: int):
        """
        :return: naive one-hot encoded embedding of any possible instruction given the task
        """
        one_hot = F.one_hot(torch.tensor(inst_offset), num_classes=self.instruction_shape)
        one_hot = one_hot.type(torch.FloatTensor)
        return torch.tensor(one_hot, dtype=torch.bool)

    @staticmethod
    def constraint(action):
        """
        :return: is a given action (instruction) is an intron
        this is in an in-situ constraint to produce programs with only meaningful instructions
        for now: ones that do not contain instructions that are semantic introns
        """
        return not semantic_intron(action)




In [3]:

def test_task():
    return Task(
        function_set=FUNCTION_SET,
        num_regs=7,
        num_data_regs=7,
        output_regs= [6],
        dataset="test_data/6-bit-parity.csv",
        constraints = [],
        sequence_length=8,
        arity=ARITY_DICT
    )

In [4]:
task = test_task()

sequence_length = 4
steps_left = sequence_length
instruction_offsets = [0, 1, 2, 3]


#I think we can train with indices versus full sparse vectors 
for i in range(0, steps_left):
    observation_shape = (sequence_length,)
    observation_space = torch.zeros(observation_shape, dtype=torch.bool)

    action_space = task.vec_to_inst
    one_hot_encoded_action = instruction_offsets[steps_left - 1]
    observation_space[sequence_length - steps_left] = one_hot_encoded_action

### Initialize the Networks

There are a few characteristics of the input for this RNN. One, since we evaluate once per episode it is likely we will use some kind of padding for the candidate programs, i.e.

INST, PAD, PAD, PAD, PAD
INST, INST, PAD, PAD, PAD
INST, INST, INST, PAD, PAD
INST, INST, INST, INST, PAD
INST, INST, INST, INST, INST

for an episode, sequence length of 5

However, as long as it is the case that the network is only fed a program at the end of the episode (along with the discounted reward) this might not be necessary.

There are two interesting problems, one is regressing reward/value with the actor and critic networks fed the input of a sequence and outputting a reward. There is another toy problem where compilation can be treated as a multi-class classification problem (represented by the output binary vector) which could be perhaps used to learn an embedding. 


In [15]:
import torch.nn as nn
from torch.distributions import Categorical

def create_rnn(vocab_size, embedding_dim, hidden_size, n_layers, output_size):
    net_layers = []
    net_layers.append(nn.Embedding(vocab_size, embedding_dim))
    net_layers.append(nn.RNN(embedding_dim, hidden_size, n_layers, bidirectional=False))
    return nn.Sequential(*net_layers)

# class ActorCategoricalRNN(nn.Module):
    
#     def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers):
#         super().__init__()      
        
#         #embedding layer
#         self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
#         self.rnn = create_rnn(embedding_dim, hidden_dim, n_layers)
        
#         self.act = nn.ReLU()
        
#     def forward(self, states):
#         embedded = self.embedding(states)
#         #embeddeed = [batch_size, seq_len, embedding_dim]
        
#         #sequences are fixed length so don't need any packing 
#         logits, _ = self.rnn(embedded)
        
#         pi = Categorical(logits=logits)
#         actions = pi.sample()

#         return pi, actions

#vocab is the set of possible instructions 

In [16]:
rnn = create_rnn(vocab_size = task.instruction_shape, embedding_dim = 24, hidden_size = 8, n_layers = 8, output_size = task.instruction_shape )



In [17]:
batch_size = 4
states = torch.zeros(size = (batch_size, sequence_length,), dtype=torch.int)
states = states.to(torch.int64)
print(states)

outputs, hx = rnn.forward(states)

outputs.shape

tensor([[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]])


torch.Size([4, 4, 8])