In [12]:
import xml.etree.ElementTree as et
import re, string, yaml, os

from os.path        import join
from framenet.util  import curry, singleton, update
from abc            import ABC, abstractmethod
from collections    import OrderedDict
from pprint         import pprint

FN_HOME = os.getenv('FN_HOME')

class Tree(ABC):
#     @property
    @abstractmethod
    def children(self): pass
    
#     @property
    @abstractmethod
    def items(self): pass
    
    def __repr__(self):
        ii, cc = self.items(), list(self.children())
        cname  = self.__class__.__name__
        if not cc: 
            return '{0}: {1}'.format(cname, ii)
        else:
            return '{0}: {1} {2}'.format(cname, ii, repr(cc))

class TestTree(Tree):
    def __init__(self, items, children=[]):
        update(self, _items=items, _children=children)
#     @property
    def items(self): 
        return self._items
#     @property
    def children(self):
        return self._children

# A catamorphism

    data Tree a = Node a [Tree a] deriving (Show) 
    
    cat :: (a -> [b] -> b) -> Tree a -> b
    cat f (Node root children) = f root (map (cat f) children)

In [13]:
@curry
def cat(f, tree):
    "A catamorphism. See above."
    return f(tree.items(), map(cat(f), tree.children()))

In [14]:
TT = TestTree

t = TT(1, [TT(2), TT(3)])

def f(items, children):
    return TestTree(items + 1, children)

cat(f, t)

TestTree: 2 [TestTree: 3, TestTree: 4]

In [7]:
def is_whitespace(s):
    for c in s:
        if not c in string.whitespace:
            return False
    return True

class EtTree(Tree):
    def __init__(self, et_root):
        self.et_root = et_root

    def items(self):
        r = self.et_root
        if r.text and not is_whitespace(r.text):
            return ('text', r.text) + r.items()
        else:
            return r.items()
    
    def children(self):
        return map(EtTree, self.et_root.getchildren())

In [9]:
t = EtTree(et.parse(join(FN_HOME, 'frRelation.xml')).getroot())

def to_dict(items, children):
    cc = list(children)
    return dict((k, v) for k, v in items + [('contents', cc)] if v)

d = cat(to_dict, t)

with open('../data/frRelation.yml', 'w+') as sout:
    yaml.dump(d, sout, default_flow_style=False)

In [15]:
from framenet.main import main

fn, fb = main(FN_HOME)
fn.frames[10]

Frame(name=Achieving_first, ID=403, relations=[Inherits from: ['Intentionally_create'], Uses: ['First_experience'], Subframe of: ['Product_development_scenario'], Precedes: ['Product_development']], elements=[FrameElement(frame_name=Achieving_first, name=Cognizer, ID=3725, coreType=Core, semtype=Sentient), FrameElement(frame_name=Achieving_first, name=New_idea, ID=3726, coreType=Core, semtype=None), FrameElement(frame_name=Achieving_first, name=Time, ID=3727, coreType=Peripheral, semtype=Time), FrameElement(frame_name=Achieving_first, name=Place, ID=3728, coreType=Peripheral, semtype=Locative_relation), FrameElement(frame_name=Achieving_first, name=Means, ID=3729, coreType=Peripheral, semtype=State_of_affairs), FrameElement(frame_name=Achieving_first, name=Purpose, ID=3730, coreType=Peripheral, semtype=State_of_affairs), FrameElement(frame_name=Achieving_first, name=Explanation, ID=3731, coreType=Extra-Thematic, semtype=State_of_affairs), FrameElement(frame_name=Achieving_first, name=F

In [1]:
print('a \u27f7 b')

a ‚ü∑ b


In [9]:
fn.get_frame('Event')

Frame(Event) [187]

In [11]:
motion = fn.get_frame('Motion')
motion.individual_valences

[]

In [40]:
fb.build_lus_for_frame("Motion", fn)

In [19]:
from IPython.display import HTML

class H:
    def _repr_html_(self):
        return '<h1>This is H!</h1>'

    def _repr_json_(self):
        return {'this': ['is H!']}

In [18]:
H()

In [2]:
from collections import namedtuple as _namedtuple
from abc import ABCMeta, abstractproperty
from functools import wraps
from sys import version_info

__all__ = ('namedtuple',)


class _NamedTupleABCMeta(ABCMeta):
    '''The metaclass for the abstract base class + mix-in for named tuples.'''
    def __new__(mcls, name, bases, namespace):
        fields = namespace.get('_fields')
        for base in bases:
            if fields is not None:
                break
            fields = getattr(base, '_fields', None)
        if not isinstance(fields, abstractproperty):
            basetuple = _namedtuple(name, fields)
            bases = (basetuple,) + bases
            namespace.pop('_fields', None)
            namespace.setdefault('__doc__', basetuple.__doc__)
            namespace.setdefault('__slots__', ())
        return ABCMeta.__new__(mcls, name, bases, namespace)


exec(
    # Python 2.x metaclass declaration syntax
    """class _NamedTupleABC(object):
        '''The abstract base class + mix-in for named tuples.'''
        __metaclass__ = _NamedTupleABCMeta
        _fields = abstractproperty()""" if version_info[0] < 3 else
    # Python 3.x metaclass declaration syntax
    """class _NamedTupleABC(metaclass=_NamedTupleABCMeta):
        '''The abstract base class + mix-in for named tuples.'''
        _fields = abstractproperty()"""
)


_namedtuple.abc = _NamedTupleABC
#_NamedTupleABC.register(type(version_info))  # (and similar, in the future...)

@wraps(_namedtuple)
def namedtuple(*args, **kwargs):
    '''Named tuple factory with namedtuple.abc subclass registration.'''
    cls = _namedtuple(*args, **kwargs)
    _NamedTupleABC.register(cls)
    return cls

In [4]:
class MyRecord(namedtuple.abc):
        _fields = 'x y z'  # such form will be transformed into ('x', 'y', 'z')
        def __repr__(self):
            return "I'm your new MyRecord NT!"
        
r = MyRecord(1, 2, 3)
r

I'm your new MyRecord NT!

In [12]:
dir(r)

['__abstractmethods__',
 '__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_cache',
 '_abc_negative_cache',
 '_abc_negative_cache_version',
 '_abc_registry',
 '_asdict',
 '_fields',
 '_make',
 '_replace',
 '_source',
 'count',
 'index',
 'x',
 'y',
 'z']

In [11]:
r.__dict__

{}

In [10]:
del r.__dict__

In [3]:
import xml.etree.ElementTree as et
import pandas                as pd
import numpy                 as np

from framenet.utils import take, drop, curry, iget, flatmap
from os.path        import join
from glob           import glob, iglob

@curry
def et_loader(base, path):
#     *start, last = path
    fname = '%s.xml' % path if not path.endswith('.xml') else path
    return et.parse(join(base, fname)).getroot()

URI          = 'http://framenet.icsi.berkeley.edu'
NS           = {'fn': URI} 
base_dir     = os.getenv('FN_HOME')
frame_dir    = join(base_dir, 'frame')
frame_root   = et_loader(frame_dir)
rel_root     = et_loader(base_dir)

def frame_element_relation(uri, root):
    rtypes  = root.iter('{%s}frameRelationType' % uri)
    cap     = lambda s: s[0].upper() + s[1:]
    items   = lambda fer, fr, rt: dict(
        fer.items() 
        + [('relationType', rt.get('name'))]
        + [('relation%s' % cap(k), v) for k, v in fr.items()])
    return [items(fer, fr, rt) for rt in rtypes for fr in rt for fer in fr]

@curry
def frame_element(uri, root):
    fs    = root.iter('{%s}frame' % uri)
    items = lambda fe: [(k, v) for k, v in fe.items() if k in 'ID coreType name'.split()]
    return [dict(items(fe) + [('frameID', f.get('ID'))]) 
            for f in fs
            for fe in f.iter('{%s}FE' % uri)]

ImportError: No module named 'framenet.utils'

In [4]:
fer_df = pd.DataFrame(frame_element_relation(URI, rel_root('frRelation')))

In [5]:
et_roots = (frame_root(fname) for fname in glob('%s/*.xml' % frame_dir))

def frames(uri, roots):
    return flatmap(frame_element(uri), roots)

In [6]:
frames_df = pd.DataFrame(list(frames(URI, et_roots)))

In [7]:
import qgrid
qgrid.nbinstall(overwrite=True)

In [38]:
qgrid.show_grid(fer_df, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 200})

In [33]:
from itertools import chain
attrs = 'ID coreType name'.split()
pairs = lambda f, fe: chain(
    [(k, v) for k, v in fe.items() if k in attrs],
    [('frameID', f.get('ID'))])

from framenet.ecg import it
[(fe.tag, f.tag) for f in it(frame_root('Intentionally_act'), 'frame') for fe in it(f, 'FE')]

[('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http://framenet.icsi.berkeley.edu}FE',
  '{http://framenet.icsi.berkeley.edu}frame'),
 ('{http:/

In [34]:
[dict(pairs(f, fe)) 
 for f in it(frame_root('Intentionally_act'), 'frame') 
 for fe in f 
 if 'FE' in fe.tag]

[{'ID': '1609',
  'coreType': 'Core-Unexpressed',
  'frameID': '198',
  'name': 'Act'},
 {'ID': '1610', 'coreType': 'Core', 'frameID': '198', 'name': 'Agent'},
 {'ID': '1613', 'coreType': 'Peripheral', 'frameID': '198', 'name': 'Place'},
 {'ID': '1614', 'coreType': 'Peripheral', 'frameID': '198', 'name': 'Purpose'},
 {'ID': '1616', 'coreType': 'Peripheral', 'frameID': '198', 'name': 'Time'},
 {'ID': '2573', 'coreType': 'Peripheral', 'frameID': '198', 'name': 'Means'},
 {'ID': '2574', 'coreType': 'Peripheral', 'frameID': '198', 'name': 'Manner'},
 {'ID': '7982',
  'coreType': 'Extra-Thematic',
  'frameID': '198',
  'name': 'Domain'},
 {'ID': '7983',
  'coreType': 'Extra-Thematic',
  'frameID': '198',
  'name': 'Frequency'},
 {'ID': '11078',
  'coreType': 'Extra-Thematic',
  'frameID': '198',
  'name': 'Period_of_iterations'},
 {'ID': '11079',
  'coreType': 'Extra-Thematic',
  'frameID': '198',
  'name': 'Result'},
 {'ID': '11863',
  'coreType': 'Extra-Thematic',
  'frameID': '198',
  'n

In [4]:
import pandas as pd
import numpy as np

In [1]:
lu_df = pd.read_pickle('lu.pkl')
lu_df.describe()

Unnamed: 0,annotationSet.ID,annotationSet.status,label.feID,label.itype,label.name,label.span,layer.name,layer.rank,sentence.ID,sentence.aPos,sentence.corpID,sentence.docID,sentence.paragNo,sentence.sentNo,text.contents
count,7618695,7618695,447475,51037,6737095,6737095,7618695,7618695,7618695,7618695,3201763,3201763,3201763,7618695,7618695
unique,369745,4,7790,4,1403,77917,17,3,173757,149291,14,3391,3390,452,166707
top,6545999,UNANN,285,INI,NT,"(0, 2)",BNC,1,4102241,0,111,421,5,0,Well if I - I saw on one of the talk shows thi...
freq,5880,5642629,6522,19143,625730,71854,2751250,7613043,6232,2388890,1136420,1043425,150975,4399651,6232


In [6]:
columns = pd.MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'),
                                     ('B', 'cat'), ('A', 'dog')],
                                    names=['exp', 'animal'])

index = pd.MultiIndex.from_product([('bar', 'baz', 'foo', 'qux'),
                                    ('one', 'two')],
                                   names=['first', 'second'])

df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=columns)
df

Unnamed: 0_level_0,exp,A,B,B,A
Unnamed: 0_level_1,animal,cat,dog,cat,dog
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,-0.435827,-0.061351,-0.670579,0.147002
bar,two,-0.806055,-1.694736,-0.160833,-0.983085
baz,one,0.697155,0.157056,0.096714,-1.335713
baz,two,0.278984,0.394231,0.679809,0.296236
foo,one,-1.830128,-1.755675,-0.330606,0.270147
foo,two,1.053411,1.456888,0.359551,0.236067
qux,one,-0.930444,0.461257,0.096407,-0.359314
qux,two,2.615287,-0.064862,1.27234,1.040811


In [10]:
df.stack()

Unnamed: 0_level_0,Unnamed: 1_level_0,exp,A,B
first,second,animal,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,cat,-0.435827,-0.670579
bar,one,dog,0.147002,-0.061351
bar,two,cat,-0.806055,-0.160833
bar,two,dog,-0.983085,-1.694736
baz,one,cat,0.697155,0.096714
baz,one,dog,-1.335713,0.157056
baz,two,cat,0.278984,0.679809
baz,two,dog,0.296236,0.394231
foo,one,cat,-1.830128,-0.330606
foo,one,dog,0.270147,-1.755675


In [11]:
df.stack().mean(1)

first  second  animal
bar    one     cat      -0.553203
               dog       0.042826
       two     cat      -0.483444
               dog      -1.338910
baz    one     cat       0.396935
               dog      -0.589328
       two     cat       0.479397
               dog       0.345233
foo    one     cat      -1.080367
               dog      -0.742764
       two     cat       0.706481
               dog       0.846478
qux    one     cat      -0.417018
               dog       0.050972
       two     cat       1.943814
               dog       0.487974
dtype: float64

In [12]:
df.stack().mean(1).unstack()

Unnamed: 0_level_0,animal,cat,dog
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.553203,0.042826
bar,two,-0.483444,-1.33891
baz,one,0.396935,-0.589328
baz,two,0.479397,0.345233
foo,one,-1.080367,-0.742764
foo,two,0.706481,0.846478
qux,one,-0.417018,0.050972
qux,two,1.943814,0.487974


In [14]:
df.mean().unstack()

animal,cat,dog
exp,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0.080298,-0.085981
B,0.16785,-0.138399


In [15]:
df

Unnamed: 0_level_0,exp,A,B,B,A
Unnamed: 0_level_1,animal,cat,dog,cat,dog
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,-0.435827,-0.061351,-0.670579,0.147002
bar,two,-0.806055,-1.694736,-0.160833,-0.983085
baz,one,0.697155,0.157056,0.096714,-1.335713
baz,two,0.278984,0.394231,0.679809,0.296236
foo,one,-1.830128,-1.755675,-0.330606,0.270147
foo,two,1.053411,1.456888,0.359551,0.236067
qux,one,-0.930444,0.461257,0.096407,-0.359314
qux,two,2.615287,-0.064862,1.27234,1.040811


In [18]:
df.groupby('animal')

KeyError: 'animal'

In [1]:
from numba import jit