In [3]:
from IPython.display import display, display_markdown

import pandas as pd
import numpy as np
import subprocess as sp
from filter_cooccurrences import get_terms
from subs2vec.vecs import Vectors
from subs2vec.neighbors import compute_nn

def display_md(md, **kwargs):
    return display_markdown(md, raw=True, **kwargs)

def convert_notebook(title, output='html'):
    convert = sp.run(f'jupyter nbconvert {title}.ipynb --to {output} --output {title}.{output}'.split(' '))
    if convert.returncode == 0:
        display_md(f'Jupyter notebook `{title}` converted successfully.')
    else:
        display_md(f'Error: encountered problem converting Jupyter notebook `{title}`')

## Compute and store semantic nearest neighbors for colors and adjectives
We'll compute the 25 nearest neighbors in the fiction embeddings for each color and adjective in our stimuli. Some neighbors of colors and adjectives will be _other colors and adjectives_, so we'll need to resample/replace those to prevent e.g. removing a color from our corpus entirely and then no longer being able to learn an embedding for that color.

In [16]:
vecs = Vectors('../embeddings/fic.en.vec', n=1e5, normalize=True)

[INFO] loading vectors ../embeddings/fic.en.vec
[INFO] <function Vectors.__init__ at 0x1248e0d30> ran in 2.886 seconds


In [38]:
colors, dimensions = get_terms()
labels = colors + dimensions

In [39]:
df = compute_nn(vecs, target_vecs=None, target_labels=np.array(labels),
                num_neighbors=1000, whole_matrix=True)

[INFO] <function Vectors.as_dict at 0x1248e0f70> ran in 0.044 seconds
[INFO] computing analogies using whole matrix additive method
[INFO] <function compute_nn at 0x1248e0af0> ran in 0.687 seconds


In [40]:
neighbors = df.values.tolist()
neighbors = [[word for word in words if (('_' not in word) and (word not in labels))] for words in neighbors]
neighbors = [words[:100] for words in neighbors]

In [42]:
df = pd.DataFrame(neighbors)
df['labels'] = labels
df = df.set_index('labels')
df.to_csv('data/neighbors_coca_fic.tsv', sep='\t', index=False)
display(df)

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
brown,brownish,gray,browned,browns,browning,browner,blackish,tan,colored,grey,...,rust,amber,lank,tawny,grained,violet,plaid,grayed,staining,houndstooth
blue,gray,bluegreen,pink,violet,blued,crimson,maroon,turquoise,cobalt,purples,...,coloration,ribboned,splotches,fringed,grayish,pinafore,roseate,speckling,hues,yellowish
purple,purples,purpled,pink,purplish,violet,purpling,lavender,scarlet,crimson,Purple,...,puffy,puce,tourmaline,discolored,bluer,flowering,leotard,monochrome,gold,gaudily
white,whited,whitely,whiten,pink,whitening,gray,whitest,whitened,whiter,colored,...,iridescent,voile,purples,ribboned,unwrinkled,gabardine,gaudily,striped,immaculately,painted
green,bluegreen,greenish,greeny,gray,greens,purples,greening,yellows,greened,teal,...,sienna,beige,loopleaves,monochromatic,fuchsia,hyacinths,lucent,iridescence,bioluminescent,ocher
yellow,yellows,yellowing,yellowy,yellowish,yellowed,pink,crimson,greenish,purples,Yellow,...,Reddish,burgundy,staining,leaved,squiggly,cornflowers,pastel,whitely,glowed,lucent
red,crimson,pink,scarlet,bloodred,reddish,vermilion,pinking,redder,splotched,violet,...,golden,tinted,staining,blinking,browned,heartshaped,pale,stripes,colors,dirndl
black,gray,dark,blacked,blackish,Black,collarless,colored,blacker,sharkskin,formfitting,...,leathered,finned,worsted,barechested,multicolored,speckle,color,whitening,eyelet,pink
orange,oranges,pink,crimson,purples,tangerine,yellows,chartreuse,violet,magenta,Orange,...,paisley,discolored,bright,buttercream,ovoid,pomegranate,puce,mauve,fluorescents,turquoise
happy,unhappy,happier,glad,pleased,happiest,delighted,Happy,grateful,contented,content,...,hap,energetic,beautiful,mirthful,interested,expectant,satisfying,beaming,loquacious,depressed


In [4]:
convert_notebook('get_nearest_neighbors')

[NbConvertApp] Converting notebook get_nearest_neighbors.ipynb to html
[NbConvertApp] Writing 615898 bytes to get_nearest_neighbors.html


Jupyter notebook `get_nearest_neighbors` converted successfully.