# Counting usage of lemmas 



In [1]:
from __future__ import unicode_literals
import re

In [2]:
agda_file_location = 'agda-playground/fingertree/FingerTree-msc-dummy.agda'

#requirements are that all declarations are separated by at least a newline

In [3]:
# regular expressions

#comment in the begining
comment_begining = r'^\s*--'

#imports 
imports_begining = r'^(\s*open)|(\s*open\s*import)|(\s*import)'

#infixation
infix_begining = r'^\s*infix[lr]*'

#instance
instance_begining = r'^\s*instance'

#data
data_begining = r'^\s*data'

#mutual
mutual_begining = r'^\s*mutual'

#module 
module = r'^\s*module'

#declarations
decl = r'^\s*(postulate){0,1}\s*([^\s:]+)\s*:'

#lemmas 
lemmas = [r'refl',
          r'sym',
          r'cong',
          r'trans',
          r'ε-left',
          r'ε-right',
          r'∙-assoc',
          r'[^\s:\(]+lemma[^\s:\)]+']

basic_lemmas = [r'refl',
          r'sym',
          r'cong',
          r'trans',
          r'ε-left',
          r'ε-right',
          r'∙-assoc']

                

In [4]:
import codecs

agda_file = codecs.open(agda_file_location, 'r', encoding='utf-8')
lines = agda_file.readlines()

In [5]:
def remove_all_lines(reg, lines):
    filter_fun = lambda x : (re.match(reg, x) == None)
    return [x for x in lines if filter_fun(x)]

def remove_ending_comment(line):
    return line.split('--')[0]

def strip_ending_comments(lines):
    return [remove_ending_comment(x) for x in lines]


In [6]:
lines = remove_all_lines(comment_begining, lines)
lines = remove_all_lines(imports_begining, lines)
lines = remove_all_lines(infix_begining, lines)
lines = remove_all_lines(mutual_begining, lines)
lines = remove_all_lines(module, lines)
lines = strip_ending_comments(lines)

In [7]:
# at least * all top level declarations 

#these are toplogically sorted
declarations = [re.search(decl, x).group(1) for x in lines if re.search(decl,x) is not None]

In [8]:
#splitting in sections 

new_lines = [i for (i, x) in enumerate(lines) if x == u'\n']

#partition to sections

def partition(new_lines, lines):
    parts = []
    N = len (new_lines)
    for i in range(1, N):
        parts.append(lines[new_lines[i-1] + 1 : new_lines[i]])
    return parts

parts = partition(new_lines, lines)

#remove empty lists 
parts = [x for x in parts if len(x) > 0]

#select only sections that are not data declarations
def select_parts_begining(regex, parts):
    filter_fun = lambda x : (re.match(regex, x[0]) == None) 
    return [x for x in parts if filter_fun(x)]

decl_parts = select_parts_begining(data_begining, parts)

#select all sections that are not instance declarations -- makes things easier
decl_parts = select_parts_begining(instance_begining, decl_parts)

#identify declarations in each part
def parts_dict(parts):
    p_dict = {}
    for part in parts:
        line = part[0]
        decl_match = re.search(decl, line)
        if (decl_match is None):
            print part
        declr = decl_match.group(2)
        assert declr not in p_dict
        p_dict[declr] = part
    return p_dict

def find_lemmas(decl, part):
    results = []
    for regex in lemmas:
        for line in part:
            results += re.findall(regex, line)
    results = [x for x in results if not (x == decl)]
    return results
    

In [9]:
decl_dict = parts_dict(decl_parts)

In [10]:
def lemma_dict(decl_dict):
    result = {}
    for key in decl_dict:
        result[key] = find_lemmas(key, decl_dict[key])
    return result

In [11]:
lemma_usage = lemma_dict(decl_dict)

In [12]:
flattened_lemmas = {}

def flatten_list(l):
    result = []
    if type(element) is str:
        result.append(element)
    else:
        result += element
    return result

def flatten_lemma(decl, lemma_dict):
    if decl in flattened_lemmas:
        return flattened_lemmas[decl]
    
    if decl in lemma_dict:
        used_lemmas = lemma_dict[decl]
    else:
        used_lemmas = []
        
    for (i, lemma) in enumerate(used_lemmas):
        if lemma not in basic_lemmas:
            flat = flatten_lemma(lemma, lemma_dict)
            used_lemmas[i:i+1] = flat
             
    flattened_lemmas[decl] = used_lemmas
    return used_lemmas    

In [13]:
flatten_lemma(u'splitDigit-size-lemma0', lemma_usage)

def flatten_dict(lemma_usage):
    for key in lemma_usage:
        flatten_lemma(key, lemma_usage)

flatten_dict(lemma_usage)

In [14]:
flattened_lemmas

{u'FunExt': [],
 u'FunExt\u2082': [],
 u'_\u25b7_': [u'sym',
  u'cong',
  u'\u03b5-left',
  u'\u03b5-left',
  u'sym',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'sym',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'sym',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'sym',
  u'sym',
  u'sym',
  u'cong',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'sym',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc'],
 u'_\u25c1_': [u'\u03b5-right',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'sym',
  u'cong',
  u'\u03b5-left',
  u'refl',
  u'sym',
  u'cong',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc'],
 u'append': [u'\u03b5-right'],
 u'append-measure-lemma': [u'

In [15]:
lemma_usage

{u'FunExt': [],
 u'FunExt\u2082': [],
 u'_\u25b7_': [u'sym',
  u'cong',
  u'\u03b5-left',
  u'\u03b5-left',
  u'sym',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'sym',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'sym',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'sym',
  u'sym',
  u'sym',
  u'cong',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'sym',
  u'sym',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc'],
 u'_\u25c1_': [u'\u03b5-right',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'sym',
  u'cong',
  u'\u03b5-left',
  u'refl',
  u'sym',
  u'cong',
  u'cong',
  u'\u2219-assoc',
  u'\u2219-assoc',
  u'\u2219-assoc'],
 u'append': [u'\u03b5-right'],
 u'append-measure-lemma': [u'