In [1]:
import json
import pprint
import collections
import requests
import pandas as pd
import numpy as np
from pynpm import NPMPackage

In [2]:
pp = pprint.PrettyPrinter(indent=2)

In [3]:
project_data = {}
project = 'parsing-test-simple'
with open('data/{}/project.json'.format(project), 'r') as f:
    project_data = json.load(f)

In [4]:
targets = project_data['targets']
sprites = targets[1:]

In [5]:
blks =  sprites[0]['blocks']
tops = {k:blks[k] for k in blks if  blks[k]['topLevel']}

In [13]:
for p in get_paths(blks):
    print(p)
    print("\n")

['motion_movesteps', '_', 'motion_goto', '_', 'looks_thinkforsecs', '_', 'looks_say', '_', 'sound_playuntildone', '_', 'event_broadcast']


['event_whenflagclicked', '_', 'control_repeat', '_', 'sound_stopallsounds', '_', 'data_setvariableto']


['event_whenflagclicked', '_', 'control_repeat', '>', 'motion_goto', '_', 'control_if', '>', 'control_if', '>', 'motion_turnleft']


['event_whenflagclicked', '_', 'control_repeat', '>', 'motion_goto', '_', 'control_if', '_', 'control_if_else', '>', 'motion_movesteps']


['event_whenflagclicked', '_', 'control_repeat', '>', 'motion_goto', '_', 'control_if', '_', 'control_if_else', '_', 'looks_sayforsecs', '_', 'looks_say']


['event_whenflagclicked', '_', 'control_repeat', '>', 'motion_goto', '_', 'control_if', '_', 'control_if_else', '>', 'motion_movesteps', '_', 'motion_goto']




In [7]:
# This only works for command blocks (nothing about inputs)

In [8]:
def get_terminal_blocks(blocks):
    return {k:blocks[k] for k in blocks
             if blocks[k]['next'] is None # nothing after it
             if not blocks[k]['shadow'] # not a shadow block
             if 'operator' not in blocks[k]['opcode'] # not an operator
             if 'SUBSTACK' not in blocks[k]['inputs'] # has no children
             if 'SUBSTACK2' not in blocks[k]['inputs'] }

In [9]:
def get_paths(blks):
    
    paths = []
    
    # symbols for direction in the tree
    _nest = '>'
    _next = '_'
    
    terminals = get_terminal_blocks(blks)

    for t in terminals:

        # initialize the path ending with the terminal

        curr_parent_id = t
        curr_parent = blks[curr_parent_id]
        next_parent_id = terminals[t]['parent']
        next_parent = blks[next_parent_id]

        path = [curr_parent['opcode']]

        if next_parent_id is not None:
            if t == next_parent['next']:
                path.insert(0,_next)
            else:
                path.insert(0,_nest)

        # initializie before traverseing
        path.insert(0,next_parent['opcode'])

        # begin the traversal with the next parent
        curr_parent_id = next_parent_id

        # go up the tree
        while True:
            # set the current parent to its own parent

            # in order to determine nesting / sequence,
            # if the current block id is the same as its parent's next
            # then it's next
            #  if it's not, then it's nested
            next_parent_id = blks[curr_parent_id]['parent']

            if next_parent_id is not None:

                curr_parent = blks[curr_parent_id]
                next_parent = blks[next_parent_id]

                if curr_parent_id == next_parent['next']:
                    path.insert(0,_next)
                else:
                    path.insert(0,_nest)

                path.insert(0,next_parent['opcode'])

                # reset for the next iteration
                curr_parent_id = next_parent_id
            else:
                break

        paths.append(path)
    
    return paths

In [9]:
def get_top_blocks(blocks):
    return {bid: block for bid,block in blocks.items() if 'topLevel' in list(block) and block['topLevel']}

In [1545]:
def traverse_blocks(blocks):
    
    """
    traverses the blocks dictionary by continually traversing the 'next' property of each block
    returns a list of top to bottom paths
    """
    
    # 1. Grab the top blocks as reference points
    tops = get_top_blocks(blocks)
    topids = [bid for bid,block in tops.items()]
    
    # Store each stack
    stacks = []
        
    # start at each top block and traverse down by seeing each next block
    for tid in topids:
        stack = build_stack_tree([], blocks, tid)
        stacks.append(stack)
    
    return stacks

In [1546]:
# can you do this raw without building the tree?

In [1515]:
def build_stack_tree(curr_stack, all_blocks, block_id):
    """
    Given a block:
    1. Initialize with its data
    1. Check if it has any children
        2. If it has children,
            recursively traverse each child block
    2. If not, check if it has a next block
    3. Repeat the process with the next block
    """
    
    #Grab the block
    block = all_blocks[block_id]

    # Initialize the object to store the current block 
    curr_block = {
        'id': block_id,
        'type': block['opcode'],
        'category': block['opcode'].split('_')[0],
        'next': block['next'],
        'parent': block['parent'],
        'children': [],
        'isLeaf': False
    }
    
    # First check for substacks (control flow)
    for stackid in ['SUBSTACK', 'SUBSTACK2']:
        if stackid in block['inputs']:
            child_id = block['inputs'][stackid][1]
            # pass in the child stack array into the new one
            child_stack = build_stack_tree(curr_block['children'], all_blocks, child_id)
            
            # hacky fix for the list duplicate...
            # TODO: make an actual solution??
            # why does this append a copy of both children in one array?
            if child_stack is not None and type(child_stack) is not list: 
                curr_block['children'].append(child_stack)

    # If there's no next block, return the current stack
    # Check if there's a next block
    if block['next'] is None:
        # A lone block is at the top level and has no next 
        if block['topLevel']:
            return [curr_block]
        
        # At the end of a tree
        if len(curr_block['children']) == 0:
            curr_block['isLeaf'] = True
        
        # At a normal end of a branch
        return curr_stack.insert(0,curr_block)

    # Otherwise, go to the next block
    build_stack_tree(curr_stack, all_blocks, block['next'])
    
    # prepend because it's recursively added from the bottom up
    curr_stack.insert(0,curr_block)

    return curr_stack

In [1516]:
def build_stack_sequence(all_blocks, tid):
    """
    currently only looks at the next block and doesn't consider nesting.
    refactor to include nested paths/sequences by doing a depth-first traversal
    """
    # This produces ONE depth first traversal from top -> bottom
    stack = []
    curr_id = tid
    stack.append(all_blocks[curr_id]['opcode'])
    while curr_id is not None:
        curr_id = all_blocks[curr_id]['next']
        if curr_id != None:
            stack.append(all_blocks[curr_id]['opcode'])
    return stack

In [1517]:
def build_rnn_sequences(stack, seq_len):
    """
    produces a list of rnn-ready sequences given an input stack sequence
    each stack is simply ONE depth first traversal stack
    this produces iterations through the stack
    """
    seqs = []
    for i in range(seq_len):
        seq = stack[i:seq_len]
        if(len(seq)<seq_len):
            seq.extend(['none'] * (seq_len - len(seq)))
        seqs.append(seq)
    return seqs

In [1518]:
# some utils
def stack_is_flat(stack):
    return all([len(block['children']) == 0 for block in stack])

def stack_is_nested(stack):
    return not stack_is_flat(stack)

In [1519]:
def clean_block(block):
    """removes children from block """
    return clean_key(block, 'children')

In [1520]:
def clean_key(d, key):
    """ removes key from a dictionary"""
    return {k:d[k] for k in d if k !=key}

In [1521]:
og_blocks = sprites[0]['blocks']

In [1522]:
# 1. Grab the top blocks as reference points
tops = get_top_blocks(og_blocks)
topids = [bid for bid,block in tops.items()]

In [1523]:
stacks = traverse_blocks(og_blocks)

## main idea
1. do a depth first traversal on each tree for each stack
2. save each path in the search
3. learn to predict the last node in each path! pad it by the max length of each path.