# Global

In [1]:
from collections import deque, defaultdict
from functools import partial
from math import ceil, floor
import numpy as np

## Memory

In [2]:
class RollingMemory(object):
    """ A class used for memory which pops its oldest element after
        reaching a certain size """
    def __init__(self, size, keys):
        self.keys = keys
        self.Mem = dict([(k, deque([], maxlen=size)) for k in keys])
        self.MAX = int(size)
        self.L = 0
        
    def __getitem__(self, i):
        """ Returns the ith value for each key """
        if isinstance(i, slice):
            return [self[j] for j in range(*i.indices(self.L))]
        if isinstance(i, (list, tuple)):
            return [self[j] for j in i]
        out = {}
        for k, v in self.Mem.items():
            out[k] = v[i]
        return out
    
    def get_key(self, k):
        return list(self.Mem[k])
    
    def is_full(self):
        return len(self) == self.MAX
    def _pop_if_full(self):
        """ Pops the last item from every key if full and returns. """
        if self.is_full():
            for k in self.Mem:
                out = self.Mem[k].popleft()
            self.L -= 1
        else:
            out = None
        return out
    
    def add(self, d):
        if set(d.keys()) != set(self.Mem.keys()):
            raise KeyError("All keys must be the same.")
        out = self._pop_if_full()
        for k, v in d.items():
            self.Mem[k].append(v)
        self.L += 1
        return out
        
    def copy(self):
        return deepcopy(self.Mem)
        
    def __iter__(self):
        return iter((self[i] for i in range(self.L)))
    
    def __len__(self):
        return self.L
    
    def __repr__(self):
        return str(dict([(k, self.get_key(k)) for k in self.Mem]))

### Test that rolling memory works

In [3]:
test_mem = RollingMemory(3,['a','b','c'])
# Test add and __repr__ and concequenty _pop_if_full
for x in range(5):
    test_mem.add({'a': x, 'b': x, 'c': x})
    print(test_mem, " Full: {}".format(test_mem.is_full()))

# Test __iter__ and concequenty __getitem__ and __len__
for d in test_mem:
    print(d)
    
print(test_mem[0:2]) # Test slice indexing
print(test_mem[[0,1,2]]) # Test list indexing

{'a': [0], 'c': [0], 'b': [0]}  Full: False
{'a': [0, 1], 'c': [0, 1], 'b': [0, 1]}  Full: False
{'a': [0, 1, 2], 'c': [0, 1, 2], 'b': [0, 1, 2]}  Full: True
{'a': [1, 2, 3], 'c': [1, 2, 3], 'b': [1, 2, 3]}  Full: True
{'a': [2, 3, 4], 'c': [2, 3, 4], 'b': [2, 3, 4]}  Full: True
{'a': 2, 'c': 2, 'b': 2}
{'a': 3, 'c': 3, 'b': 3}
{'a': 4, 'c': 4, 'b': 4}
[{'a': 2, 'c': 2, 'b': 2}, {'a': 3, 'c': 3, 'b': 3}]
[{'a': 2, 'c': 2, 'b': 2}, {'a': 3, 'c': 3, 'b': 3}, {'a': 4, 'c': 4, 'b': 4}]


### Reformatting

Rolling memory is in the format of a pandas dataframe, a dictionary containing labels relating lists. We need matrices for our network.

In [4]:
def dict_to_input(d):
    return [d[k] for k in sorted(d.keys())]

# Testing
m0 = {'b': 2, 'a': 1, 'c': 3}
t0 = dict_to_input(m0)
print(m0, t0, t0 == [1,2,3]) 

{'a': 1, 'c': 3, 'b': 2} [1, 2, 3] True


### Types of Memory

We are going to create three types of memory:
1. Action memory will save time windows both before and after actions
2. Reward memory will save time windows before and including rewards
3. Time memory will save a rolling record of most recent states

#### Create types of slices

In [5]:
window_size = 100
causal_slice = lambda index: list(range(index+1-window_size, index+1))
center_slice = lambda index: list(range(index-floor(window_size/2.), index+ceil(window_size/2.)))

# Testing
test_index = 100
m0 = causal_slice(test_index)
print("Causal Slice equals Window Size? {}: {}".format(len(m0) == window_size, len(m0)))
print("Causal Slice max equals index? {}: {}".format(max(m0) == test_index, max(m0)))

m1 = center_slice(test_index)
print("Center Slice equals Window Size? {}: {}".format(len(m1) == window_size, len(m1)))
print("Center Slice middle equals index? {}: {}".format(m1[len(m1)//2] == test_index, m1[len(m1)//2]))

Causal Slice equals Window Size? True: 100
Causal Slice max equals index? True: 100
Center Slice equals Window Size? True: 100
Center Slice middle equals index? True: 100


#### Initialize Memory

In [6]:
action_mem = RollingMemory(200)
reward_mem = RollingMemory(200)
time_mem   = RollingMemory(200)

TypeError: __init__() missing 1 required positional argument: 'keys'

## Simulation Environment

Create a conveyor

In [7]:
class Conveyor(object):
    _alarm_period = 100
    _dehome_period = 10
    _num_pieces = 4
    _home_wait = 6
    def __init__(self):
        self.T, self.Production = 0, 0
        self.begin_running, alarm_triggered, time_step = True, False, False
        self.pressence = [False for i in range(self._num_pieces)]
        self.alarm, self.auto, self.cv_on, self.pusher_home = False, False, True, True
        self.pusher_home_at = None
        self._reset_action()
        self._last_running, self._last_alarm = False, False
    def _auto_ok(self):
        return self.pusher_home and self.cv_on and not self.alarm
    def _shift_right(self):
        self.pressence = [False] + self.pressence[0:-1]
        if self.auto:
            self.Production += 1
    def _drop(self):
        """ Adds a part at beginning of cv, random missing piece. """
        if np.random.randint(0,self._alarm_period) == 0:
            self.pressence[0] = False
        else:
            self.pressence[0] = True
    def set_action(self, key):
        self._reset_action()  # Only one action can be set per time
        if key == 'HomePusher':
            self.home_pusher = True
        elif key == 'ChangeManual':
            self.change_manual = True
        elif key == 'ChangeAuto':
            self.change_auto = True
        elif key == 'CVActivate':
            self.cv_activate = True
        elif key == 'CVDeactivate':
            self.cv_deactivate = True
        elif key == 'ResetAlarm':
            self.reset_alarm = True
        elif key == 'Jog':
            self.jog = True
        elif key == 'Drop':
            self.drop = True
        elif key:
            raise KeyError
    def _reset_action(self):
        self.home_pusher, self.change_manual, self.change_auto = False, False, False 
        self.cv_activate, self.cv_deactivate, self.reset_alarm = False, False, False
        self.jog, self.drop = False, False
    def get_state(self):
        return {'Pressence0': self.pressence[0], 'Pressence1': self.pressence[1],
                'Pressence2': self.pressence[2], 'Pressence3': self.pressence[3],
                'Alarm': self.alarm, 'Auto': self.auto,
                'CVOn': self.cv_on, 'PusherHome': self.pusher_home}
    __repr__ = str(get_state)
    def __next__(self):
        self.T += 1 # Increment time
        
        # Remember past state
        self._last_running = self.auto
        self._last_alarm = self.alarm
        
        # Handle auto switching
        if self.change_auto and self._auto_ok(): self.auto = True
        if self.change_manual: self.auto = False
        if not self._auto_ok(): self.auto = False
        
        # Handle auto operation
        if self.auto:
            self._shift_right()  # Shift pressence sensors right
            
            # A piece is added at the beggining of each pitch
            self._drop()
        
            # Alarm triggers if there is no piece at the end of the conveyor
            if not self.pressence[self._num_pieces-1]:
                self.alarm = True
        
        # Handle Manual operation
        else:
            # Chance to randomly dehome pusher each cycle
            if np.random.randint(0,self._dehome_period) == 0:
                self.pusher_home = False
          
            # Home pusher
            if self.home_pusher: self.pusher_home_at = self.T + self._home_wait
            
            # CV Activate / Deactivate
            if self.cv_activate: self.cv_on = True
            if self.cv_deactivate: self.cv_on = False
                            
            # Handle jog and drop
            if self.jog: self._shift_right()
            if self.drop: self._drop()
                
        # Reset alarm on press
        if self.reset_alarm: self.alarm = False
                
        # Check pusher done homing
        if self.pusher_home_at == self.T: self.pusher_home = True
        
        # Reset buttons at the end
        self._reset_action()
        
        # Return state
        return self.get_state()
    
    def began_running(self):
        return (not self._last_running) and self.auto
    def alarm_triggered(self):
        return (not self._last_alarm) and self.alarm
    

### Expand to include reward, action, and state getters and setters specific to this test environment

### Testing

In [8]:
calc_reward_values = lambda reward_bits: {'began_running':   int(reward_bits['began_running'])    * 1.,
                                          'alarm_triggered': int(reward_bits['alarm_triggered'])  * -1.,
                                          'running':       int(reward_bits['running'])            * 0.01,
                                          'not_running':   int(reward_bits['not_running'])        * -0.01}
Actions = [None, 'HomePusher', 'ChangeManual', 'ChangeAuto',
           'CVActivate', 'CVDeactivate', 'ResetAlarm', 'Jog', 'Drop']
       
class MLConveyor(Conveyor):
    get_reward_bits    = lambda self: {'began_running': self.began_running(),
                                       'alarm_triggered': self.alarm_triggered(), 
                                       'running': self.auto,
                                       'not_running': not self.auto}

    get_next_state = lambda self: next(self)
    set_action_bit = lambda self, a: self.set_action(Actions[a])
   
    time_memory = []
    def save(self, t, a, s, r):
        self.time_memory.append((t,a,s,r))
    
    def run_once(self, a0 = None):
        """ Runs and returns the previous state + action + reward."""
        t = self.T
        s0  = self.get_state()
        a0  = Actions.index(a0) # Assume a0 is the name, not index of the action from Actions
        self.set_action_bit(a0) 
        s1  = self.get_next_state()
        r0  = self.get_reward_bits()
        rv0 = calc_reward_values(r0)
        self.save(t, s0, a0, sum(rv0.values()))
        return t, s0, a0, rv0

In [None]:
CV = MLConveyor()

PRINT = False
def print_state(t, s, a, r):
    if PRINT: 
        print("State{}: {}".format(t, (sorted(list(s.items())))))
        print("Action{}: {}".format(t, Actions[a]))
        print("Reward{}: {}\n".format(t, r))

def run_wait():
    # Run until an alarm
    if not CV.alarm:
        while not CV.alarm:
            t0, s0, a0, r0 = CV.run_once()

        if PRINT: print("---- ALARM!!! ----")
        if PRINT: print("Previous State:")
        print_state(t0,s0,a0,r0)
        if PRINT: print("Current State:")
        print_state(*CV.run_once(None)) # You must be one step ahead to get the action and reward

def handle_startup():
    while not all(CV.pressence):
        for a in ["Jog", "Drop"]:
            CV.run_once(a)
    for a in ["ResetAlarm","ChangeAuto", None]:
        print_state(*CV.run_once(a))
    if CV.auto and PRINT:
        print("------- RUNNING ----------\n")
    
def handle_homing():
    print_state(*CV.run_once("HomePusher"))
    for i in range(CV._home_wait):
        CV.run_once()
    for a in ["ResetAlarm","ChangeAuto", None]:
        print_state(*CV.run_once(a))
        
def catch_error():
    if not CV.pusher_home:
        print("Pusher not home.\n")
        handle_homing()
        handle_startup()
    elif not CV.pressence[-1]:
        if PRINT: print("Part Missing\n")
        handle_startup()
    else:
        if PRINT: print("Other Alarm")
        print_state(*CV.run_once())
        raise Exception

# Get 1000 iterations
while CV.T < 1000:
    handle_startup()
    run_wait()
    catch_error()

print("Done!")
print("Last Item: {}".format(CV.time_memory[-1]))
print("Time: {}, Production: {}, Difference: {}".format(CV.T, CV.Production, np.abs(CV.T-CV.Production)))

# Q Learning

Q(state, action) = R(state, action) + max([Q(next_state(state, action), a) for a in A])

1. next_state(state, action): Prediction Network & Memory
2. R(state, action): G(next_state(state), action) - G(state)

In [None]:
c = .5
Z = c**(window_size-np.arange(window_size))
print(Z)

In [None]:
R = np.array([x[3] for x in CV.time_memory])
print(R)

In [None]:
def apply_Z(t):
    x0, x1 = t-window_size, t+1