In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt
import copy
import pickle

% matplotlib inline

from sem_context import SemContext
import event_generation
import encoding_schemes
from event_schemas import BasicRNN, HybridRNN
import hrr

import keras

import sem

Using TensorFlow backend.


### Instantiate Model

In [2]:
dim = 100
context = pickle.load(open("final_context.p", "rb" ))
opts = sem.sem_options(None, dim, lambd = 0.0, alpha = 0.7, beta = 1.0)
s = sem.sem_init(opts, context)

### Cluster Events

In [3]:
events, generator_indices = event_generation.generate_clustering_events(context, 'all_property_addition', num_events = 100)

In [4]:
from scipy.stats import multivariate_normal as mvnormal

def logsumexp(x):
    if len(x) == 0:
        return 0
    y = np.max(x)
    x = x - y
    s = y + np.log(sum(np.exp(x)))
    return s

def cluster_events(events, sem, opts):
    d = opts['d']
    k = opts['max_events']
    Sigma = np.eye(d) * opts['beta']

    # return all event ids and all predictions under the last event id
    assignments = []
    assignments.append(0)

    # default creation of first event and assignment of first scene
    event_counts = sem['event_counts']
    event_counts[0] = 1

    n = len(events)
    for time in range(1, n):
        # create crp prior from event counts
        prior = copy.copy(sem['event_counts'])        
        empty = np.where(prior == 0)[0]
        # new event creation parameter
        if len(empty) > 0:
            prior[empty[0]] = opts['alpha']

        predictions = [[] for i in range(k)]
        likelihood = np.zeros(k)
        active = np.nonzero(prior)[0]
        errors = []
        for i in active:   
            predictions[i] = 0.0
            seen = [events[time][0]]
            for s in range(1, len(events[time])):
                prediction = hrr.normalize(sem['theta'][i].predict(seen))
                predictions[i] += np.linalg.norm(events[time][s] - prediction)/(len(events[time]) - 1.0)
                seen.append(events[time][s])
            likelihood[i] = mvnormal.logpdf(predictions[i], mean=np.zeros(d), cov=Sigma)

        # construct posterior from prior and likelihood to choose event at current time
        posteriors = np.zeros(k)
        p = np.log(prior[active]) + likelihood[active]
        post = np.exp(p-logsumexp(p))    
        posteriors[active] = post
        posteriors[np.isnan(posteriors)] = 0
        
        chosenevent = np.where(posteriors == max(posteriors))[0][0]
        sem['theta'][chosenevent].train_recurrent(events[time]) 
        assignments.append(chosenevent)
        event_counts[chosenevent] += 1
        
    return assignments

In [5]:
assignments = cluster_events(events, s, opts)

In [6]:
print assignments

[0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [7]:
print generator_indices

[0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0]


In [10]:
# TODO: account for different possible orderings
def error(a, b):
    count = 0.0
    for i in range(len(a)):
        if a[i] == b[i]:
            count+=1
    return count/len(a)

In [11]:
error(assignments, generator_indices)

0.49

In [None]:
# parameter grid search attempt
l = 0.0
res = []
for a in np.arange(0, 2, 1.0):
    small = []
    for b in np.arange(0, 2, 1.0):
        opts = sem.sem_options(None, dim, lambd = l, alpha = a, beta = b)
        s = sem.sem_init(opts, context)
        assignments = cluster_events(events, s, opts)
        small.append(error(assignments, generator_indices))
    res.append(small)