# Hack Zurich - Fringer

In [1]:
%load_ext autoreload
%autoreload 2
from __future__ import division, print_function

import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
from sklearn.manifold import MDS, Isomap, TSNE
from sklearn.decomposition import PCA

from fringiness import *
from bokeh.plotting import show
from data_getter import *

Create random data to test the method.

In [9]:
def random_data(n, m, sparsity=0.8, mean=2, distribution='poisson'):
    """
    Parameters
    ----------
    n : int
        number of samples
    m : int
        number of features
    sparsity : float between 0 and 1
        sets the ratio of zero values in the resulting matrix.
    distribution : str
        supported are 'poisson' and 'normal'
    """
    if distribution == 'poisson':
        r = np.random.poisson(mean, (n, m))
    elif distribution == 'normal':
        r = np.random.randn(n*m).reshape((n,m)) + mean
    p = np.random.rand(n*m).reshape((n,m))
    r[p<sparsity] = 0
    return r

def res_to_matrix(res):
    all_keys = set(res['point'])
    
    for env in res['environs']:
        all_keys |= env['entities'].keys()
    reference = np.array(list(all_keys))
    v = np.zeros(len(reference))
    v[np.hstack([np.where(reference==key) 
                 for key in res['point']])[0]] = 1
    vs = [v]
    for env in res['environs']:
        v = np.zeros(len(reference))
        try:
            v[np.hstack([np.where(reference==key) 
                         for key in env['entities']])[0]] = 1
            vs.append(v)
        except ValueError:
            pass
        
    vs = np.vstack(vs)
    return vs

def text_to_matrix(text):
    return res_to_matrix(run(text))

In [5]:
data = np.vstack((
    random_data(200, 100, mean=.5, sparsity=.8, distribution='poisson'), 
    random_data(200, 100, mean=10, sparsity=.8, distribution='normal'),
    random_data(200, 100, mean=8, sparsity=.8, distribution='poisson'), 
    random_data(200, 100, mean=1, sparsity=.8, distribution='normal')))

In [None]:
fig = plt.figure()

ax = fig.add_subplot(121)
ax.set_title('Raw data')
cax = ax.imshow(data.T / np.linalg.norm(data,axis=1), aspect='auto')
ax.set_ylabel('Sample #')
ax.set_xlabel('Entity #')

ax = fig.add_subplot(122)
ax.hist(data.flatten(),bins=range(10))
ax.set_title('Entity frequency distribution')

plt.colorbar(cax)
plt.tight_layout()

In [3]:
text = "WASHINGTON—Saying that the charitable contribution would immediately provide a clear conscience, experts reported Friday that you need to donate $37 to hurricane relief in order to completely forget about what happened. “Concerned individuals have been asking what they can do to stop feeling bad about hurricane victims, so everyone should know that a $37 donation is sufficient to allow you to move on from the disaster once and for all,” said nonprofit analyst Sandra Western, adding that a donation in that amount entitles the giver to avoid any further media coverage of the hurricane and not feel any kind of shame about the degree to which they were helping. “You can donate $25, but the notion that you could have done more might nag at you for a bit. At $37, however, you’ll be able to tell yourself you pitched in when some truly desperate people needed you most and then never think about them again. In addition, if you donate $65 or more, you can also sit out the next catastrophe with no guilt whatsoever.” Western went on to say that the guidelines did not apply to selfish assholes, who could donate nothing and immediately forget the devastation anyway."
res = run(

In [4]:
print(len(res['environs']))

104


In [10]:
data = res_to_matrix(res)
fr = fringiness(data, 'cosine')
plot = embedding_plot_bokeh(*fr)
histogram = histogram_bokeh(fr[2])
show(plot)
show(histogram)