In [64]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [98]:
import math
import random
from PIL import Image as image
from collections import defaultdict
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import random
import tensorflow as tf
%matplotlib inline

ops = {'go': 0, 'turn_left': 1, 'turn_right': 2}
inv_ops = {v: k for k, v in ops.iteritems()}
cells = {'empty': 0, 'visited': 1}

def visited_surrounding(bitmap, x, y):
    num_visited = 0
    for delx in [-1, 0, 1]:
        for dely in [-1, 0, 1]:
            if bitmap[-(y+dely)][x+delx] == cells['visited']:
                num_visited += 1
    return num_visited

def try_generate_program(program_length, p_turn):
    bitmap = defaultdict(lambda: defaultdict(lambda: cells['empty']))
    program = []
    x = y = phi = 0
    bitmap[-y][x] = cells['visited']
    for i in xrange(program_length):
        if random.random() >= p_turn:
            op = ops['go']
            x += round(math.cos(phi))
            y += round(math.sin(phi))
            if bitmap[-y][x] != cells['empty']:
                return None
            else:
                bitmap[-y][x] = cells['visited']
            if visited_surrounding(bitmap, x, y) > 3:
                return None
        else:
            if random.random() < 0.5:
                op = ops['turn_left']
                phi += math.pi/2
                if len(program) > 0 and program[-1] == ops['turn_right']:
                    return None
            else:
                op = ops['turn_right']
                phi -= math.pi/2
                if len(program) > 0 and program[-1] == ops['turn_left']:
                    return None
        program.append(op)
    return program, bitmap

def generate_program(program_length, p_turn):
    for _ in xrange(1000):
        r = try_generate_program(program_length, p_turn)
        if r is not None:
            return r

def generate_programs(num_programs=1000, program_length=24, p_turn=0.3):
    lookup = set()
    width = height = program_length*2+1
    programs = np.zeros((num_programs, program_length, 3))
    bitmaps = np.zeros((num_programs, width * height))
    i = 0
    while i < num_programs:
        program, bitmap = generate_program(program_length, p_turn)
        program = tuple(program)
        if tuple(program) not in lookup:
            lookup.add(tuple(program))
            for k in xrange(program_length):
                programs[i, k, program[k]] = 1 # one-hot
            ai = 0
            for w in xrange(-program_length, program_length):
                for h in xrange(-program_length, program_length):
                    bitmaps[i, ai] = bitmap[w][h]
                    ai += 1    
            i += 1
            print(i, end='   \r')
    return programs, bitmaps

def to_program(program):
    inv_ops = {v: k for k, v in ops.iteritems()}
    rp = []
    for i in xrange(len(program)):
        for k in xrange(len(program[i])):
            if program[i][k] == 1:
                rp.append(inv_ops[k])
    return rp

def show_bitmap(bitmap, program_length):
    width = height = program_length*2+1
    data = np.zeros((width, height))
    ia = 0
    for w in xrange(-program_length, program_length):
        for h in xrange(-program_length, program_length):
            data[w+program_length, h+program_length] = bitmap[ia]
            ia += 1
    plt.axis('off')
    plt.imshow(data, cmap=plt.cm.gray, aspect='equal')
    
def get_training_batch(programs, bitmaps, batch_programs, batch_bitmaps, batch_size, p_training):
    batch_programs.fill(0)
    batch_bitmaps.fill(0)
    for i in xrange(batch_size):
        r = int(random.random() * len(programs) * p_training)
        batch_programs[i] = programs[r]
        batch_bitmaps[i] = bitmaps[r]

def get_test_batch(programs, bitmaps, test_programs, test_bitmaps, test_size, p_training):
    test_programs.fill(0)
    test_bitmaps.fill(0)
    offset = int(p_training * len(programs))
    for i in xrange(test_size):
        test_programs[i] = programs[offset+i]
        test_bitmaps[i] = bitmaps[offset+i]

In [9]:
def relu(x): return max(x, 0)

In [12]:
relu(2)

2

In [100]:
from itertools import product

program_length = 5
width = height = program_length*2+1
directions = {'up': 0, 'down': 1, 'left': 2, 'right': 3}
inv_directions = {v: k for k, v in directions.iteritems()}
offsets = [-1, 0, +1]

i = np.zeros((width, height))
s = np.zeros((len(directions.values()), len(directions.values()), width, height, len(offsets), len(offsets)))
h = np.zeros((len(directions.values()), len(directions.values()), width, height))
e = np.zeros((len(ops.keys()))) # what to emit
w_eh = np.zeros((len(ops.values()), len(directions.values()), len(directions.values()), width, height))
w_sh = np.zeros((len(directions.values()), len(directions.values()), width, height, len(offsets), len(offsets),    len(directions.values()), len(directions.values()), width, height))
w_si = np.zeros((len(directions.values()), len(directions.values()), width, height, len(offsets), len(offsets),    width, height))
w_h = np.zeros((len(directions.values()), len(directions.values()), width, height,     len(directions.values()), len(directions.values()), width, height, len(offsets), len(offsets)))

for d, pd, x, y, dx, dy, dd, ppd, xx, yy in product(directions.values(), directions.values(), xrange(width), xrange(height), offsets, offsets, directions.values(), directions.values(), xrange(width), xrange(height)):
    if d == dd and x == xx and y == yy and pd == ppd:
        w_sh[d, pd, x, y, dx, dy, dd, ppd, xx, yy] = 1
for d, pd, x, y, dx, dy, xx, yy in product(directions.values(), directions.values(), xrange(width), xrange(height), offsets, offsets, xrange(width), xrange(height)):
    if xx == x + dx and yy == y + dy:
        w_si[d, pd, x, y, dx, dy, xx, yy] = 1
for d, pd, x, y, dd, ppd, xx, yy, dx, dy in product(directions.values(), directions.values(), xrange(width), xrange(height), directions.values(), directions.values(), xrange(width), xrange(height), offsets, offsets):
    v = 0
    if pd == dd:
        # down
        if d == dd == directions['down'] and yy == y - 1 and xx == x and dy == 1 and dx == 0:
            v = 1
        if d == directions['down'] and (ppd == directions['left'] or ppd == directions['right']) and (dd == directions['left'] or dd == directions['right']) and yy == y and xx == x and dy == 1 and dx == 0:
            v = 1
        # up
        if d == dd == directions['up'] and yy == y + 1 and xx == x and dy == -1 and dx == 0:
            v = 1
        if d == directions['up']and (ppd == directions['left'] or ppd == directions['right']) and (dd == directions['left'] or dd == directions['right']) and yy == y and xx == x and dy == -1 and dx == 0:
            v = 1
        # left
        if d == dd == directions['left'] and yy == y and xx == x + 1 and dy == 0 and dx == -1:
            v = 1
        if d == directions['left'] and (ppd == directions['up'] or ppd == directions['down']) and (dd == directions['up'] or dd == directions['down']) and yy == y and xx == x and dy == 0 and dx == -1:
            v = 1
        # right
        if d == dd == directions['right'] and yy == y and xx == x - 1 and dy == 0 and dx == 1:
            v = 1
        if d == directions['right'] and (ppd == directions['up'] or ppd == directions['down']) and (dd == directions['up'] or dd == directions['down']) and yy == y and xx == x and dy == 0 and dx == 1:
            v = 1
    w_h[d, pd, x, y, dd, ppd, xx, yy, dx, dy] = v
for op, d, pd, x, y in product(ops.values(), directions.values(), directions.values(), xrange(width), xrange(height)):
    v = 0
    if d == pd and op == ops['go']:
        v = 1
    if op == ops['turn_left']:
        if pd == directions['right'] and d == directions['up']:
            v = 1
        if pd == directions['up'] and d == directions['left']:
            v = 1
        if pd == directions['left'] and d == directions['down']:
            v = 1
        if pd == directions['down'] and d == directions['right']:
            v = 1
    if op == ops['turn_right']:
        if pd == directions['right'] and d == directions['down']:
            v = 1
        if pd == directions['down'] and d == directions['left']:
            v = 1
        if pd == directions['left'] and d == directions['up']:
            v = 1
        if pd == directions['up'] and d == directions['right']:
            v = 1        
    w_eh[op, d, pd, x, y] = v
    
step = 0
def advance():
    global step
    print('Doing ', step)
    step += 1
    for d, pd, x, y, dx, dy in product(directions.values(), directions.values(), xrange(width), xrange(height), offsets, offsets):
        sum = -1 # bias
        for dd, ppd, xx, yy in product(directions.values(), directions.values(), xrange(width), xrange(height)):
            sum += w_sh[d, pd, x, y, dx, dy, dd, ppd, xx, yy] * h[d, pd, x, y]
        for xx, yy in product(xrange(width), xrange(height)):
            sum += w_si[d, pd, x, y, dx, dy, xx, yy] * i[xx, yy]
        s[d, pd, x, y, dx, dy] = relu(sum)
        # if s[d, pd, x, y, dx, dy] != 0:
        #     print('s set: ', step, inv_directions[d], inv_directions[pd], x, y, dx, dy)
    for d, pd, x, y in product(directions.values(), directions.values(), xrange(width), xrange(height)):
        sum = 0 # bias
        for dd, ppd, xx, yy, dx, dy in product(directions.values(), directions.values(), xrange(width), xrange(height), offsets, offsets):
            # if w_h[d, pd, x, y, dd, ppd, xx, yy, dx, dy] * s[dd, ppd, xx, yy, dx, dy] > 0:
            #     print(step, 'w_h * s', d, pd, x, y, dd, ppd, xx, yy, dx, dy)
            sum += w_h[d, pd, x, y, dd, ppd, xx, yy, dx, dy] * s[dd, ppd, xx, yy, dx, dy]
        h[d, pd, x, y] = relu(sum) # sum should be 0 or 1 always, so relu() isn't really required here
        if h[d, pd, x, y] != 0:
            print('h set: ', step, inv_directions[d], inv_directions[pd], x, y)
    for op in ops.values():
        sum = 0
        for d, pd, x, y in product(directions.values(), directions.values(), xrange(width), xrange(height)):
            sum += w_eh[op, d, pd, x, y] * h[d, pd, x, y]
        e[op] = relu(sum)
        if e[op] != 0:
            print('emit: ', step, inv_ops[op])

In [101]:
i = np.zeros((width, height))
s = np.zeros((len(directions.values()), len(directions.values()), width, height, len(offsets), len(offsets)))
h = np.zeros((len(directions.values()), len(directions.values()), width, height))
e = np.zeros((len(ops.keys())))

# initial state
h[directions['right'], directions['right'], 5, 5] = 1
# bitmap
i[5, 5] = 1
i[6, 5] = 1 # go
i[7, 5] = 1 # go
            # turn-right
i[7, 6] = 1 # go
i[7, 7] = 1 # go
i[7, 8] = 1 # go
            # turn-left
i[8, 8] = 1 # go
i[9, 8] = 1 # go

step = 0
advance()
advance()
advance()
advance()
advance()
advance()
advance()
advance()
advance()

Doing  0
1 w_h * s 3 3 6 5 3 3 5 5 1 0
h set:  1 right right 6 5
emit:  1 go
Doing  1
2 w_h * s 3 3 7 5 3 3 6 5 1 0
h set:  2 right right 7 5
emit:  2 go
Doing  2
3 w_h * s 1 3 7 5 3 3 7 5 0 1
h set:  3 down right 7 5
emit:  3 turn_right
Doing  3
4 w_h * s 1 1 7 6 1 3 7 5 0 1
h set:  4 down down 7 6
emit:  4 go
Doing  4
5 w_h * s 1 1 7 7 1 1 7 6 0 1
h set:  5 down down 7 7
emit:  5 go
Doing  5
6 w_h * s 1 1 7 8 1 1 7 7 0 1
h set:  6 down down 7 8
emit:  6 go
Doing  6
7 w_h * s 3 1 7 8 1 1 7 8 1 0
h set:  7 right down 7 8
emit:  7 turn_left
Doing  7
8 w_h * s 3 3 8 8 3 1 7 8 1 0
h set:  8 right right 8 8
emit:  8 go
Doing  8
9 w_h * s 3 3 9 8 3 3 8 8 1 0
h set:  9 right right 9 8
emit:  9 go
