# cleaning ast from project file

In [79]:
import json
import pprint
import collections

In [80]:
pp = pprint.PrettyPrinter(indent=4)

In [81]:
project_data = {}
project = 'parsing-test-simple'
with open('data/{}/project.json'.format(project), 'r') as f:
    project_data = json.load(f)

In [82]:
targets = project_data['targets']
sprites = targets[1:]

In [83]:
def get_top_blocks(blocks):
    return {bid: block for bid,block in blocks.items() if 'topLevel' in list(block) and block['topLevel']}

In [84]:
def traverse_blocks(blocks):
    
    """
    traverses the blocks dictionary by continually traversing the 'next' property of each block
    returns a list of top to bottom paths
    """
    
    # 1. Grab the top blocks as reference points
    tops = get_top_blocks(blocks)
    topids = [bid for bid,block in tops.items()]
    
    # Store each stack
    stacks = []
        
    # start at each top block and traverse down by seeing each next block
    for tid in topids:
        stack = build_stack_tree([], blocks, tid)
        stacks.append(stack)
    
    return stacks

In [85]:
def build_stack_tree(curr_stack, all_blocks, block_id):
    """
    Given a block:
    1. Initialize with its data
    1. Check if it has any children
        2. If it has children,
            recursively traverse each child block
    2. If not, check if it has a next block
    3. Repeat the process with the next block
    """
    
    #Grab the block
    block = all_blocks[block_id]
    
    # Initialize the object to store the current block 
    curr_block = {
        'id': block_id,
        'type': block['opcode'],
        'next': block['next'],
        'parent': block['parent'],
        'children': [],
        'isLeaf': False
    }
    
    # First check for substacks (control flow)
    for stackid in ['SUBSTACK', 'SUBSTACK2']:
        if stackid in block['inputs']:
            child_id = block['inputs'][stackid][1]
            # pass in the child stack array into the new one
            child_stack = build_stack_tree(curr_block['children'], all_blocks, child_id)
            curr_block['children'].append(child_stack)


    # If there's no next block, return the current stack
    # Check if there's a next block
    if block['next'] is None:
        # A lone block is at the top level and has no next 
        if block['topLevel']:
            return [curr_block]
        
        # At the end of a tree
        if len(curr_block['children']) == 0:
            curr_block['isLeaf'] = True
        
        # At a normal end of a branch
        return curr_stack.insert(0,curr_block)

    # Otherwise, go to the next block
    build_stack_tree(curr_stack, all_blocks, block['next'])
    
    # prepend because it's recursively added from the bottom up
    curr_stack.insert(0,curr_block)

    return curr_stack

In [103]:
def build_stack_sequence(all_blocks, tid):
    """
    currently only looks at the next block and doesn't consider nesting.
    refactor to include nested paths/sequences by doing a depth-first traversal
    """
    # This produces ONE depth first traversal from top -> bottom
    stack = []
    curr_id = tid
    stack.append(all_blocks[curr_id]['opcode'])
    while curr_id is not None:
        curr_id = all_blocks[curr_id]['next']
        if curr_id != None:
            stack.append(all_blocks[curr_id]['opcode'])
    return stack

In [105]:
def build_rnn_sequences(stack, seq_len):
    """
    produces a list of rnn-ready sequences given an input stack sequence
    each stack is simply ONE depth first traversal stack
    this produces iterations through the stack
    """
    seqs = []
    for i in range(seq_len):
        seq = stack[i:seq_len]
        if(len(seq)<seq_len):
            seq.extend(['none'] * (seq_len - len(seq)))
        seqs.append(seq)
    return seqs

In [97]:
# some utils
def stack_is_flat(stack):
    return all([len(block['children']) == 0 for block in stack])

def stack_is_nested(stack):
    return not stack_is_flat(stack)

In [98]:
# 1. Grab the top blocks as reference points
tops = get_top_blocks(sprites[0]['blocks'])
topids = [bid for bid,block in tops.items()]

In [115]:
def depth_first_traversal(curr_path,all_blocks,pid):
    curr_block = all_blocks[0]
    if curr_block['next'] is None and len(curr_block['children']) == 0:
        print("Got to the end of a path")
        return curr_path
    
    if len(curr_block['children']) > 0:
        for c in curr_blcok['children']:
            depth_first_traversal(curr_path, all_blocks, c.id)
        
    return curr_path

In [111]:
sample_blocks = traverse_blocks(sprites[0]['blocks'])

In [114]:
depth_first_traversal([],sample_blocks,topids[0])

TypeError: list indices must be integers or slices, not str

In [100]:
for tid in topids:
    print("Stack: {}".format(tid))
    s = build_stack_sequence(sample_blocks, tid)
    seqs = build_rnn_sequences(s,len(s))

Stack: 2U2~47{QSP_bc*?H9{Z/
Stack: ck}JxTe~bq1WN`,gHL#W
Stack: 84:+X?A:D.A,^gF}uA4r


In [101]:
print(topids)

['2U2~47{QSP_bc*?H9{Z/', 'ck}JxTe~bq1WN`,gHL#W', '84:+X?A:D.A,^gF}uA4r']


In [102]:
def extract(stack):
    """
    Given a stack, this will return all of the paths from
    the top node to its leaves.
    
    Performs a depth-first traversal and saves each of the paths.
    Have to consider CHILDREN and NEXT.
    Only after going thorugh all of children do you go to next
     
    """
    paths = []
    
    return paths

In [63]:
def extract_helper(stack, all_paths):
    """
    recursive helper for the extract method
    given a starting node, it'll go through and look for all its children
    """
    path = []
    
    
    all_paths.append(path)
    return all_paths

## main idea
1. do a depth first traversal on each tree for each stack
2. save each path in the search
3. learn to predict the last node in each path! pad it by the max length of each path.