In [None]:
import markhov
import em
import numpy as np
%pylab inline

# Demonstrating EM

Here we demonstrate the use and behaviour of the various functions in our expectation maximisation algorithm for learning rule probagbilities for a two-part grammar made up of a finite state automaton that generates strings of operations and a bigram grammar of legal transitions for the operation Merge.

### Get set up

Some functions for log-transforming the grammar

In [None]:
def ops_log(ops):
    for a in ops:
        for b in ops[a]:
            for w in ops[a][b]:
                ops[a][b][w]=np.log(ops[a][b][w])
    return ops

def bis_log(bigrams):
    for a in bigrams:
        for b in bigrams[a]:
            bigrams[a][b]=np.log(bigrams[a][b])
    return bigrams


Make some transitions

In [None]:
trans = {'a':['a','b'],
           'b':['b','a'],
           '[':['a','b']
       }


In [None]:
trans_probs = {'a':{'a':0.5,'b':0.5},
           'b':{'b':0.5,'a':0.5},
           '[':{'a':0.5,'b':0.5}
       }

trans_probs=bis_log(trans_probs)

Operations FSA

In [None]:
ops = {'S':{'NotCL':['mg']}, # from start we have to merge
       'NotCL':{'NotCL':['mg','copy'], # this state is the state in which the last "special" operation was *not* Clear. 
                #Either we've done none or the last was copy. From here we can do everything including end
               'CLEAR_S':['clear'], # go here to clear the buffer
               'F':['end'] # go here to end
           },
       'CLEAR_S':{'CLEAR':['mg']}, # this is where we've just cleared. Buffer is empty so you can only Merge
       'CLEAR':{'CLEAR':['mg'], # the last special op was Clear so we can Copy or Merge.
                'NotCL':['copy'] # if we Copy, the last special op was Copy so go to NotCL
            },
       'F':{} #final state
   }


In [None]:
ops_probs = {'S':{'NotCL':{'mg':1.}}, # from start we have to merge
       'NotCL':{'NotCL':{'mg':0.3,'copy':0.1}, # this state is the state in which the last "special" operation was *not* Clear. 
                #Either we've done none or the last was copy. From here we can do everything including end
               'CLEAR_S':{'clear':0.1}, # go here to clear the buffer
               'F':{'end':0.5} # go here to end
           },
       'CLEAR_S':{'CLEAR':{'mg':1.}}, # this is where we've just cleared. Buffer is empty so you can only Merge
       'CLEAR':{'CLEAR':{'mg':0.5}, # the last special op was Clear so we can Copy or Merge.
                'NotCL':{'copy':0.5} # if we Copy, the last special op was Copy so go to NotCL
            },
       'F':{} #final state
   }

ops_probs=ops_log(ops_probs)

A corpus

In [None]:
aaa = ['a','a a','a a a']

### Parse a sentence

In [None]:
s=aaa[-1]

In [None]:
parses=markhov.parse(s,trans,ops)

In [None]:
for i in range(len(parses)):
    print(markhov.parse2string(parses[i]))

### Parse the corpus

In [None]:
parsed_corpus=em.parse_corpus(aaa,trans,ops)

In [None]:
print(em.parsed_corpus2string(parsed_corpus))

Make a parallel list of parse relative probabilities

In [None]:
parse_probs=em.get_p_parses(parsed_corpus,ops_probs,trans_probs)

In [None]:
print(parse_probs)

In [None]:
(exp_unigrams,exp_bigrams) = em.expected_counts_trans(parsed_corpus,parse_probs,trans)

In [None]:
for u in exp_unigrams:
    print '\nExpected count of %s in each sentence:'%u
    for s in exp_unigrams[u]:
        print (' %s: %.4f'%(s,exp_unigrams[u][s]))

In [None]:
for a in exp_bigrams:
    for b in exp_bigrams[a]:
        print '\nExpected count of %s %s in each sentence:'%(a,b)
        for s in exp_bigrams[a][b]:
            print (' %s: %.4f'%(s,exp_bigrams[a][b][s]))

In [None]:
(exp_states,exp_tr) = em.expected_counts_fsa(parsed_corpus,parse_probs,ops)

In [None]:
for u in exp_states:
    print '\nExpected count of %s in each sentence:'%u
    for s in exp_states[u]:
        print (' %s: %.4f'%(s,exp_states[u][s]))

In [None]:
for a in exp_tr:
    for b in exp_tr[a]:
        for e in exp_tr[a][b]:
            print '\nExpected count of (%s %s %s) in each sentence:'%(a,e,b)
            for s in exp_tr[a][b][e]:
                print (' %s: %.4f'%(s,exp_tr[a][b][e][s]))

In [None]:
new_ops = em.update_rabbit_fsa(exp_states,exp_tr,ops)

In [None]:
print (markhov.fsa2string(new_ops,False)) # False prints the non-log probs

In [None]:
em.check_fsa(new_ops) # is this a valid probability distribution?

In [None]:
new_trans = em.update_rabbit_trans(exp_unigrams,exp_bigrams,trans)

In [None]:
print (markhov.trans2string(new_trans))

In [None]:
em.check_bis(new_trans) # check this is a valid probability distribution

## Expectation Maximisation time!

In [None]:
n = 5 # number of EM iterations to do

In [None]:
history=em.em_rabbit(aaa,trans,ops,n) # run EM

In [None]:
#for i,step in enumerate(history):
#    print ('\n--------\n--------\nIteration %i'%i)
#    print ('FSA:')
#    print (markhov.fsa2string(step['fsa']))
#    print ('\n------\nTrans probs:')
#    print (markhov.trans2string(step['trans_probs']))

Display the results

In [None]:
import pandas as pd

Make the rules into a simpler list

In [None]:
rules=[]
for lhs in ops:
    for rhs in ops[lhs]:
        for e in ops[lhs][rhs]:
            rules.append(('%s->%s %s'%(lhs,e,rhs),(lhs,rhs,e)))

Put them and their updating probabilities into a table

In [None]:
tab=[]
for (rule,(lhs,rhs,e)) in rules:
    thisrule={'rule':rule}
    for i in range(n):
        p=history[i]['fsa'][lhs][rhs][e]
        thisrule["p.iteration%03d"%i]=p
    tab.append(thisrule)

Use pandas to make the table into a datafram

In [None]:
df = pd.DataFrame(tab)

In [None]:
df

In [None]:
df.to_csv('ops_%s.csv'%('oct_22_2016_aaa')) # print to file

Same for the transitions

In [None]:
bigrams=[]
for lhs in trans:
    for rhs in trans[lhs]:
        bigrams.append(('%s,%s'%(lhs,rhs),(lhs,rhs)))

In [None]:
bigrams

In [None]:
tab=[]
for (bi,(lhs,rhs)) in bigrams:
    thisrule={'rule':bi}
    for i in range(n):
        p=history[i]['trans_probs'][lhs][rhs]
        thisrule["p.iteration%i"%i]=p
    tab.append(thisrule)

In [None]:
df_bis = pd.DataFrame(tab)

In [None]:
df_bis

In [None]:
df_bis.to_csv('trans_%s.csv'%('oct_22_2016_aaa'))

### Calculate the log likelihood of the corpus given the trained grammar

This is the ll of the corpus at the end of training:

In [None]:
markhov.ll_corpus(parsed_corpus,history[-1]['trans_probs'],history[-1]['fsa'])

Make a dataframe of the LL of the corpus throughout training:

In [None]:
tab=[]
for i in range(n):
    this_iter={'iteration':i,
               'likelihood':markhov.ll_corpus(parsed_corpus,history[i]['trans_probs'],history[i]['fsa'])}
    tab.append(this_iter)

In [None]:
df_lls=pd.DataFrame(tab)

In [None]:
df_lls

In [None]:
plot(df_lls["iteration"],df_lls["likelihood"],'o-')
xlabel("Iteration")
ylabel("Log Likelihood")

In [None]:
tab=[]

for i,(s,parse) in enumerate(parsed_corpus):
    this_parse={'sentence':' '.join(s),'bigrams':' '.join(parse['bis']),'Q':' '.join(parse['rt'][0]),'E':' '.join(parse['rt'][1])}
    for iter in range(1,n):
        this_parse['iteration_%i p'%iter]=history[iter]['parse_ps'][i]
    tab.append(this_parse)

In [None]:
df.parses=pd.DataFrame(tab)


In [None]:
df.parses
