# Match API
Ok, it would really help if we had a named structure for `a` and `b` so that we can more easily make recommendations on matching. Ideally we'll just return a JSON of recommendations, like a list of node names with the changed attributes. That's it:

```
[
{ node: c.34, content: [...], start ... }, <=== this one's changed
{ node: s.14, literal: ... }, <=== this one's changed
{ new: True, node: s.2, literal: ... },   <=== this one's new
]
```

### Stage 0: ??? muck around with setup
Put all the nodes from a and from b into a dictionary by node name, just like we do in the database. This will make the matching a lot easier so we don't have to constantly be crawling json and stuff.

### Stage 1: isolate what has changed from `a` to `b`
 - goal: diff `a` and `b` and figure out the smallest parts of `b` that are changed 
 - match everything in `a` and `b` that has not changed, sort out changed bits of `b` and unmatched bits of `a` 
 
### Stage 2: make a list of nodes in `b`
 - initially assume that all different nodes in `b` are new
 - make a list of candidate nodes from `a` for matching


In [153]:
import difflib
import lilgit_parser as lilgit

# a is old, b is new

a = '''for i in range(0,10):
    if i % 2 == 0:
        print(i + 9)'''
b = '''for i in range(0,10):
        print(i + 6)'''

In [154]:
def nodeToDict(node, dic, prefix = "", parent = None):
    name = ""
    if 'name' in node:
        name = node['name']
    else: name = prefix + str(len(dic))
    
    dic[name] = node
    if parent:
        node['parent'] = parent
    if 'content' in node:
        node['content'] = list(map(lambda x: nodeToDict(x, dic, prefix, name), node['content']))
    
    return name

In [155]:
# AST parse from string
A = lilgit.parse(a)
B = lilgit.parse(b)

# dictionary of nodes from AST
dic_a = {}
dic_b = {}
nodeToDict(A, dic_a, "a.")
nodeToDict(B, dic_b, "b.")

'b.0'

In [156]:
# identify literals
literal_a = []
tokens_a = []
literal_b = []
tokens_b = []

def listLiterals(lst, dic, tok_list):
    for key in dic:
        if 'literal' in dic[key] and dic[key]['literal']:
            lst.append(key)
            tok_list.append(dic[key]['literal'])
        elif 'syntok' in dic[key]:
            lst.append(key)
            tok_list.append(dic[key]['syntok'])
        
def printLiterals(lst, dic):
    content = ''
    for name in lst:
        item = dic[name]
        if 'literal' in item: content += item['literal']
        else: content += item['syntok']
    print(content)
    
listLiterals(literal_a, dic_a, tokens_a)
listLiterals(literal_b, dic_b, tokens_b)
printLiterals(literal_a, dic_a)     
print("\n############################\n")
printLiterals(literal_b, dic_b)      

for i in range(0,10):
    if i % 2 == 0:
        print(i + 9)

############################

for i in range(0,10):
        print(i + 6)


In [162]:
'''
Identify matching sequences in tokens a and b
'''
from difflib import SequenceMatcher
s = SequenceMatcher(None, tokens_a, tokens_b)
matchList = s.get_matching_blocks()

for match in matchList:
    (a_start, b_start, count) = match
    for i in range(0, count):
        name_a = literal_a[a_start + i]
        name_b = literal_b[b_start + i]
        node_a = dic_a[name_a]
        node_b = dic_b[name_b]
        node_a['match'] = name_b
        node_b['match'] = name_a

In [None]:
'''
Run matches up the hierchy where possible
'''
for name in literals:
        if 'match' in dic[name]:
            

In [158]:
'''
Put all unmatch literals in a bucket.
'''

def listUnmatched(literals, dic):
    unmatched = []
    tokens = []
    for name in literals:
        if 'match' not in dic[name]:
            unmatched.append(name)
            node = dic[name]
            tok = node['syntok'] if 'syntok' in node else node['literal']
            tokens.append(tok)
    return unmatched, tokens

unmatched_a, tokens_a = listUnmatched(literal_a, dic_a)
unmatched_b, tokens_b = listUnmatched(literal_b, dic_b)
printLiterals(unmatched_a, dic_a)     
print("\n############################\n")
printLiterals(unmatched_b, dic_b)

if i % 2 == 0:
    9

############################

6


In [159]:
'''
Assume all new tokens in B are new. Try to match them.
'''


'\nAssume all new tokens in B are new. Try to match them.\n'

In [160]:
'''
Finalize the recommendation
'''
changes = []

def finalizeMatches(node, dic):
    if 'match' in node:
        node['name'] = 'TODO'
    else:
        node['new'] = True
        changes.append(node)
    if 'parent' in node:
        finalizeMatches(dic[node['parent']], dic)
        
for name in unmatched_b:
    node = dic_b[name]
    finalizeMatches(node, dic_b)  
    
changes

[{'type': 'Num',
  'content': [],
  'literal': '6',
  'start': {'pos': 40, 'line': 1, 'ch': 18},
  'end': {'line': 1, 'ch': 19, 'pos': 41},
  'parent': 'b.32',
  'new': True},
 {'type': 'BinOp',
  'content': ['b.33', 'b.35', 'b.36', 'b.38', 'b.39'],
  'literal': None,
  'start': {'pos': 36, 'line': 1, 'ch': 14},
  'end': {'line': 1, 'ch': 19, 'pos': 41},
  'parent': 'b.28',
  'new': True},
 {'type': 'Call',
  'content': ['b.29', 'b.31', 'b.32', 'b.40'],
  'literal': None,
  'start': {'pos': 30, 'line': 1, 'ch': 8},
  'end': {'line': 1, 'ch': 19, 'pos': 42},
  'parent': 'b.27',
  'new': True},
 {'type': 'Expr',
  'content': ['b.28'],
  'literal': None,
  'start': {'pos': 30, 'line': 1, 'ch': 8},
  'end': {'line': 1, 'ch': 19, 'pos': 42},
  'parent': 'b.1',
  'new': True},
 {'type': 'For',
  'content': ['b.2',
   'b.3',
   'b.4',
   'b.6',
   'b.7',
   'b.8',
   'b.9',
   'b.17',
   'b.18',
   'b.19',
   'b.20',
   'b.21',
   'b.22',
   'b.23',
   'b.24',
   'b.25',
   'b.26',
   'b.27']