In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import umap.plot
import bokeh.plotting
from matplotlib import pyplot as plt

from uriel import Uriel

In [3]:
u = Uriel(load=True, umap=True)

In [91]:
from matplotlib.colors import LinearSegmentedColormap


def show_languages(languages):
    metadata = u.language_metadata()
    p = umap.plot.interactive(
        u.umap_vectors,
        hover_data=metadata,
        width=600,
        height=600,
        interactive_text_search=True,
        point_size=2,
        interactive_text_search_alpha_contrast=0.99,
    )
    metadata = metadata[metadata['code'].isin(languages)]
    metadata = metadata.assign(
        x=u.umap_vectors.embedding_[metadata.index][:,0],
        y=u.umap_vectors.embedding_[metadata.index][:,1],
    )
    p.children[1].circle(
        x='x',
        y='y',
        size=10,
        alpha=0.5,
        color="#ee6666",
        source=metadata
    )

    bokeh.plotting.output_notebook() 
    bokeh.plotting.show(p)
    
def show_diff(score1, score2, comparison='rel'):
    metadata = u.language_metadata()
    p = umap.plot.interactive(
        u.umap_vectors,
        hover_data=metadata,
        width=600,
        height=600,
        interactive_text_search=True,
        point_size=2,
        interactive_text_search_alpha_contrast=0.99,
        cmap=LinearSegmentedColormap.from_list('oink', ['#eee', '#ccc']),
    )
    metadata = metadata[metadata['code'].isin(score1)]
    metadata = metadata.assign(
        x=u.umap_vectors.embedding_[metadata.index][:,0],
        y=u.umap_vectors.embedding_[metadata.index][:,1],
        width=0.05,
        height=[abs(1 - score2[l] / score1[l]) for l in metadata['code']],
        color=['#66ee66' if score2[l] / score1[l] > 1 else '#ee6666' for l in metadata['code']],
    )
    metadata['height'] /= max(metadata['height'])
    metadata['height'] *= 2
    metadata['y'] += metadata['height'] / 2
    p.children[1].rect(
        x='x',
        y='y',
        width='width',
        height='height',
        color='color',
        source=metadata,
    )

    bokeh.plotting.output_notebook() 
    bokeh.plotting.show(p)


In [92]:
scores = np.vstack([
    [float(v) for v in line.split()[2:]]
    for line
    in open('./papers/rahimi_ner.txt')
])
languages = ['afr', 'arb', 'bul', 'ben', 'bos', 'cat', 'ces', 'dan', 'deu', 'ell', 'spa', 'est', 'pes', 'fin', 'fra', 'heb', 'hin', 'hrv', 'hun', 'ind', 'ita', 'lit', 'lav', 'mkd', 'zlm', 'nld', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'als', 'swe', 'tam', 'tgl', 'tur', 'ukr', 'vie']

show_diff(dict(zip(languages, scores[:,2])), dict(zip(languages, scores[:, 11])))