# Hack Zurich - Fringer

In [3]:
%load_ext autoreload
%autoreload 2
from __future__ import division, print_function

import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
from sklearn.manifold import MDS, Isomap, TSNE
from sklearn.decomposition import PCA

from fringiness import *
from bokeh.plotting import show
from data_getter import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Create random data to test the method.

In [75]:
def random_data(n, m, sparsity=0.8, mean=2, distribution='poisson'):
    """
    Parameters
    ----------
    n : int
        number of samples
    m : int
        number of features
    sparsity : float between 0 and 1
        sets the ratio of zero values in the resulting matrix.
    distribution : str
        supported are 'poisson' and 'normal'
    """
    if distribution == 'poisson':
        r = np.random.poisson(mean, (n, m))
    elif distribution == 'normal':
        r = np.random.randn(n*m).reshape((n,m)) + mean
    p = np.random.rand(n*m).reshape((n,m))
    r[p<sparsity] = 0
    return r

def res_to_matrix(res):
    all_keys = set(res['point'])
    
    for env in res['environs']:
        all_keys |= env['entities'].keys()
    reference = np.array(list(all_keys))
    v = np.zeros(len(reference))
    v[np.hstack([np.where(reference==key) 
                 for key in res['point']])[0]] = 1
    vs = [v]
    for env in res['environs']:
        v = np.zeros(len(reference))
        v[np.hstack([np.where(reference==key) 
                     for key in env['entities']])[0]] = 1
        vs.append(v)
    vs = np.vstack(vs)
    return vs

def text_to_matrix(text):
    return res_to_matrix(run(text))

In [5]:
data = np.vstack((
    random_data(200, 100, mean=.5, sparsity=.8, distribution='poisson'), 
    random_data(200, 100, mean=10, sparsity=.8, distribution='normal'),
    random_data(200, 100, mean=8, sparsity=.8, distribution='poisson'), 
    random_data(200, 100, mean=1, sparsity=.8, distribution='normal')))

In [None]:
fig = plt.figure()

ax = fig.add_subplot(121)
ax.set_title('Raw data')
cax = ax.imshow(data.T / np.linalg.norm(data,axis=1), aspect='auto')
ax.set_ylabel('Sample #')
ax.set_xlabel('Entity #')

ax = fig.add_subplot(122)
ax.hist(data.flatten(),bins=range(10))
ax.set_title('Entity frequency distribution')

plt.colorbar(cax)
plt.tight_layout()

In [None]:
text = """ST. LOUIS (Reuters) - Police using tear gas and rubber bullets clashed with demonstrators in St Louis in the early hours of Saturday after a white former policeman was acquitted of murdering a black suspect.
A peaceful protest over Friday’s not guilty verdict turned violent after police confronted a small group of demonstrators - three years after the police shooting of another black suspect in the nearby suburb of Ferguson stirred nationwide anger and debate.
Officers fired tear gas as people broke windows at a library, a restaurant and a home and threw bricks and water bottles at officers. Eight officers were injured, police said.
Former city policeman Jason Stockley, 36, was found not guilty of the first-degree murder of Anthony Lamar Smith, 24, shot to death on Dec. 20, 2011.
After the ruling, around 600 protesters marched from the courthouse through downtown St. Louis, chanting “No justice, no peace” and “Hey hey! Ho ho! These killer cops have got to go!” Some held “Black Lives Matter” signs."""

In [None]:
res = run(text)

In [77]:
data = res_to_matrix(res)

In [78]:
fr = fringiness(data, 'cosine')

In [79]:
plot = embedding_plot_bokeh(*fr)
show(plot)

In [81]:
histogram = histogram_bokeh(fr[2])
show(histogram)