In [16]:
import os, os.path, qgrid, math
import pandas as pd

from framenet.builder         import build
from framenet.ecg.generation  import unstack_all
from framenet.util            import (aget, iget, compose, groupby, curry, 
                                      groupwise, remove, reduceby, flatmap, merge, isnan)
from framenet.data.annotation import (annoset_for, to_list, Pattern, Graph, 
                                      make_groups, to_layers, to_node)
# from collections             import defaultdict
from pprint                   import pprint, pformat
from collections              import namedtuple


def lus_for(frame_name):
    """LUs for `frame` as a pd.DataFrame.
    """
    cs    = ('annotationSet.ID', 'annotationSet.LU')
    lu    = aget('ID', 'lu')
    fn, _ = build()
    frame = fn.get_frame(frame_name)
    return pd.DataFrame([dict(zip(cs, lu(ann))) for ann in frame.annotations])
    

def core_FE_for(frame_name):
    fn, _ = build()
    frame = fn.get_frame(frame_name)
    return set(int(e.ID) for e in frame.elements if e.coreType == 'Core')
    
    
def get_frame_df(frame_name):
    """Get pd.DataFrame for `frame_name`, normalize by adding Target LUs using the above.
    """
    lu_df       = get_lu_df()
    aset_ids    = annoset_for(frame_name)
    core_fe_ids = core_FE_for(frame_name)
    selected    = lu_df.loc[lu_df['annotationSet.ID'].isin(aset_ids)]
    merged      = pd.merge(selected, lus_for(frame_name), how='outer', on='annotationSet.ID')
    merged.loc[
        merged['label.name'] == 'Target', 
        'label.name'
    ] = merged['annotationSet.LU']
#     merged.loc[
#         merged['label.start'].isnull(),
#         'label.start'
#     ] = 9999
    merged.loc[
        merged['label.feID'].isin(core_fe_ids),
        'label.coreFE'
    ] = True
    return merged


def get_lu_df(base='.'):
    """Return LU records as a pd.DataFrame.
    """
    LU_PICKLE      = 'lu.pkl'
    lu_pickle_path = os.path.join(base, LU_PICKLE)
    if os.access(lu_pickle_path, os.R_OK):
        # Read lu_df back in
        lu_df = pd.read_pickle(lu_pickle_path)
        return lu_df
    else:
        # Save to a file in the current directory
        lu_df = pd.DataFrame(list(unstack_all(lu_sents())))
        lu_df.to_pickle(lu_pickle_path)
        return lu_df


def test_get_frame_df(frame_name='Cause_motion', count=10788):
    cm_df = get_frame_df(frame_name)
    assert len(cm_df) == 10788, 'Test falied: %d' % len(cm_df)
    
%time test_get_frame_df()

These lexical units have already been built.
CPU times: user 3.46 s, sys: 1.95 s, total: 5.41 s
Wall time: 5.89 s


In [2]:
cm_df = get_frame_df('Cause_fluidic_motion')

In [7]:
qgrid.show_grid(cm_df, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 100})

In [5]:
gs = make_groups(cm_df.to_dict(orient='records'))

In [7]:
# pprint([layers(g) for g in gs][:10])
# gs[7], to_layers(gs[7])
# ll = [to_layers(g) for g in gs]
# len(gs[0])
gs[0]

[[{'annotationSet.ID': 143329,
   'annotationSet.LU': 'splatter.v',
   'annotationSet.status': 'MANUAL',
   'label.coreFE': True,
   'label.end': 103.0,
   'label.feID': 5802.0,
   'label.itype': nan,
   'label.name': 'Agent',
   'label.start': 103.0,
   'layer.name': 'FE',
   'layer.rank': 1,
   'sentence.ID': 248787,
   'sentence.aPos': 32640926,
   'sentence.corpID': nan,
   'sentence.docID': nan,
   'sentence.paragNo': nan,
   'sentence.sentNo': 0,
   'text.contents': 'Brandon then told one of the other group : ` Order your food and leave if you do n\'t want trouble , or I \'ll splatter your face across the road . "'},
  {'annotationSet.ID': 143329,
   'annotationSet.LU': 'splatter.v',
   'annotationSet.status': 'MANUAL',
   'label.coreFE': nan,
   'label.end': 103.0,
   'label.feID': nan,
   'label.itype': nan,
   'label.name': 'Ext',
   'label.start': 103.0,
   'layer.name': 'GF',
   'layer.rank': 1,
   'sentence.ID': 248787,
   'sentence.aPos': 32640926,
   'sentence.corpID': na

## Patterns

In [9]:
# Ext T Obj Dep+

gf_tgt = compose(iget('GF', 'Target', default=None))

def gf_or_target(group):
    "Turn a group into a pattern."
    f, s = gf_tgt(group) 
    return f or s

# Test
g1 = [{'FE': 'Agent', 'GF': 'Ext', 'PT': 'NP'},
      {'Target': 'catapult.v'},
      {'FE': 'Theme', 'GF': 'Obj', 'PT': 'NP'},
      {'FE': 'Path',  'GF': 'Dep', 'PT': 'PP'},
      {'FE': 'Goal',  'GF': 'Dep', 'PT': 'PP'}]    
assert list(map(gf_or_target, g1)) == ['Ext', 'catapult.v', 'Obj', 'Dep', 'Dep']


# g0 = to_layers(gs[0])
# assert list(map(gf_or_target, g0)) == ['Ext', 'catapult.v', 'Obj', 'Dep', 'Dep']

    
def get_matcher(vertices):
    ps = groupwise(2)(vertices)
    def edge(ps):
        a, b = ps
        if b == '+':
            return a, [a]
        else:
            return a, [b]
    
    es = list(map(edge, ps))
#     print(es)
    return Pattern(Graph(vertices=vertices, edges=es))


matcher = get_matcher(['Ext', '_', 'Obj', 'Dep', '+'])

@curry
def match(matcher, group):
#     pprint(group)
    return matcher.match(map(gf_or_target, group))

# assert match(matcher, g0)
assert match(matcher, g1)

g3 = [{'FE': 'Theme'},
      {'FE': 'Source'},
      {'FE': 'Agent'},
      {'GF': 'Ext', 'PT': 'NP'},
      {'GF': 'Dep', 'PT': 'PP'}],

## $Pattern_1$: `Ext _ Obj Dep+`

In [12]:
vertices1 = ['Ext', '_', 'Obj', 'Dep', '+']
pattern1  = get_matcher(vertices1)
matching1 = [g for g in gs if match(pattern1, to_layers(g))] 
count_matching1 = len(matching1)

print('For %s: %d, %.2f%%' % (vertices1, count_matching1, 100.0 * count_matching1 / len(gs)))

For ['Ext', '_', 'Obj', 'Dep', '+']: 65, 45.45%


In [13]:
from framenet.data.annotation import write_csv

write_csv('Cfm_pattern_1_core', matching1)

[(Link(source=Node(id='0:Agent', FE='Agent', GF='Ext', core=True), target=Node(id='1:Fluid', FE='Fluid', GF='Obj', core=True)),
  49),
 (Link(source=Node(id='0:Agent', FE='Agent', GF='Ext', core=True), target=Node(id='1:Goal', FE='Goal', GF='Obj', core=True)),
  3),
 (Link(source=Node(id='0:Cause', FE='Cause', GF='Ext', core=True), target=Node(id='1:Area', FE='Area', GF='Obj', core=True)),
  1),
 (Link(source=Node(id='0:Cause', FE='Cause', GF='Ext', core=True), target=Node(id='1:Fluid', FE='Fluid', GF='Obj', core=True)),
  4),
 (Link(source=Node(id='0:Cause', FE='Cause', GF='Ext', core=True), target=Node(id='1:Goal', FE='Goal', GF='Obj', core=True)),
  1),
 (Link(source=Node(id='1:Area', FE='Area', GF='Obj', core=True), target=Node(id='2:Fluid', FE='Fluid', GF='Dep', core=True)),
  1),
 (Link(source=Node(id='1:Fluid', FE='Fluid', GF='Obj', core=True), target=Node(id='2:Area', FE='Area', GF='Dep', core=True)),
  3),
 (Link(source=Node(id='1:Fluid', FE='Fluid', GF='Obj', core=True), targ

## $Pattern_2$: `Ext _ Dep+`

In [14]:
vertices2 = ['Ext', '_', 'Dep']
edges2    = (
    ('Ext', ['_']),
    ('_', ['Dep']),
    ('Dep', ['Dep', None, 'INI', 'CNI', 'DNI']),
    ('CNI', ['INI', 'CNI', 'DNI', None]),
    ('DNI', ['INI', 'CNI', 'DNI', None]),
    ('INI', ['INI', 'CNI', 'DNI', None]),
    (None, ['INI', 'CNI', 'DNI', None]),
)
pattern2        = Pattern(Graph(vertices2, edges2))
matching2       = [g for g in gs if match(pattern2, to_layers(g))] 
count_matching2 = len(matching2)

print('For %s: %d, %.2f%%' % (vertices2, count_matching2, 100.0 * count_matching2 / len(gs)))

For ['Ext', '_', 'Dep']: 27, 18.88%


In [29]:
write_csv('Cfm_pattern_2_core', matching2, noncore=False)

Written 16 records.


In [17]:
m12 = matching1 + matching2
ns = [to_node(g) for g in gs if g not in m12]
ns[:2]

[[Node(id='0:Fluid', FE='Fluid', GF='Ext', core=True),
  Node(id='1:Goal', FE='Goal', GF='Obj', core=True),
  Node(id='2:Agent', FE='Agent', GF='CNI', core=True)],
 [Node(id='0:Agent', FE='Agent', GF='Ext', core=True),
  Node(id='1:Fluid', FE='Fluid', GF='Obj', core=True),
  Node(id='2:Path', FE='Path', GF='Dep', core=True),
  Node(id='3:Goal', FE='Goal', GF='DNI', core=True)]]

In [26]:
from IPython.display import HTML
from collections import Counter, defaultdict


def rec(ns):
    return tuple('%s: %s' % (n.GF, n.FE) for n in ns)

gf_fe_tgt = iget('GF', 'FE', 'Target', default='')

def tag(elem):
    def e(t, **kwargs): 
        if kwargs:
            avs = ' '.join('%s="%s"' % (k, v) for k, v in kwargs.items())
            return '<{0} {2}>{1}</{0}>'.format(elem, str(t), avs)
        else:
            return '<{0}>{1}</{0}>'.format(elem, str(t))
    return e

table, tr, td, th, b, p, div, ul, li = (
    map(tag, 'table tr td th b p div ul li'.split())
)

def gf_or_(group):
    "Turn a group into a pattern."
    gf, fe, target = gf_fe_tgt(group) 
    return '%s: %s' % (b(gf), fe) if not target else 'v'

cs = Counter([rec(n) for n in ns]).most_common()

arrow = ' \u2192 '

def t(cs):
    right = 'text-align: right;'
    left  = 'text-align: left;'
    block = 'white-space: nowrap;'
    rows = [''.join((td(i + 1, style=right), 
                     td(len(sentences), style=right), 
                     td(arrow.join(pattern), style=block),
                     td(ul(''.join(li(s) for s in sentences)), style=left)))
            for i, (pattern, sentences) in enumerate(cs)
#             if c > 4
           ]
#     pprint (rows)
    header = tr(''.join(map(th, ('', 'fr.', 'Pattern', 'Text'))))
#     pprint(header)
    return table(header + '\n'.join(map(tr, rows)))

In [27]:
gts = [(tuple(gf_or_(l) for l in to_layers(g)), g[0][0]['text.contents']) 
        for g in gs if g not in m12]

def group_patterns(gts):
    ps = defaultdict(list)
    for gt, s in gts: ps[gt].append(s)
    return ps    

In [28]:
HTML(t(sorted(group_patterns(gts).items(), key=lambda kv: len(kv[1]), reverse=True)))

Unnamed: 0,fr.,Pattern,Text
1,4,v → Obj: Fluid → Dep: Goal → CNI: Agent,"Fat Trask choked with laughter , spattering half-chewed pasta over another man who slapped him upside the head .Splashing the liquid into a goblet , he carried the drink back to the bed and shoved it brusquely into Isabel 's hand .` When you 've got it full up to the top , splash a bit of Paraffin on the top and put the match to it .Simply spray Hot Shapes onto clean , dry hair before setting to get instant hold with a glossy finish ."
2,2,Ext: Agent → v → Obj: Fluid → INI: Area,"My hand jerked so that I almost spilled my wine .Jenna jumped up , almost spilling her coffee , making a hasty grab for the delicate cup and amusing him even more ."
3,2,Dep: Time → Ext: Fluid → v → Obj: Goal → INI: Cause,"This time a few , reluctant drops spattered the millpond surface but did not disturb it .Then the monstrous green body exploded like a balloon of filth , spattering the walls of the cavern and the cages of the cowering young prisoners -- the last time he/she would set a mark upon them ."
4,2,v → Obj: Fluid → Dep: Goal → DNI: Agent,"It was Barney who returned to it first , dribbling cream into his coffee and watching it swirl into a spiral on the surface .Beyond that , you can usually buy extensions covering accidental damage to the buildings , eg ; by putting your foot through the bedroom ceiling , or to the contents , eg , by spilling paint on a carpet ."
5,2,Obj: Fluid → Obj: Fluid → Ext: Agent → v → Dep: Goal → Dep: Time,"Do n't forget to add the Aquasafe at this stage , to remove the chlorines and chloramines that our water boards are pumping into the tapwater at the moment .It was part of the defoliant Agent Orange , that the US forces sprayed on Vietnamese forests during the 1960s and 1970s ."
6,2,Ext: Fluid → v → Obj: Goal → Dep: Result → INI: Agent,"More bullets spattered the front of the cottage , drilling lines back and forth in the stonework .A roar greeted his effort at authority and dirt spattered him , making him blink and cower ."
7,1,Ext: Agent → Head: Means → v → Obj: Goal → DNI: Fluid,"Then she raised one foot and brought it down with a smack on the water , splashing him , and laughed ."
8,1,Ext: Fluid → v → CNI: Cause → INI: Goal,Rinse additive systems work by having a sensor linked to the rinse cycle actuator so that while rinse water is being pumped a metered dose of rinse additive is delivered .
9,1,Ext: Fluid → Dep: Source → v → Obj: Goal → CNI: Agent,"` Put it out , "" she screamed at Julie as another burst of fire from the UZI spattered the cottage ."
10,1,Dep: Cause → Ext: Fluid → v → Obj: Goal,"It was night , and as the wind gusted down the iron chimney pipe , a shower of metal flakes spattered on to the wooden floor ."
