In [1]:
text = open("luster.txt").read()

In [2]:
conda install -c conda-forge spacy

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.


Note: you may need to restart the kernel to use updated packages.


In [3]:
import json
import spacy
import random

In [4]:
def hex_to_int(s):
    s = s.lstrip("#")
    return np.array([int(s[:2], 16), int(s[2:4], 16), int(s[4:6], 16)])

In [5]:
import math
def distance2d(a, b):
    return math.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2)

prob_lookup = dict(json.load(open("./wordfreq-en-25000-log.json")))
math.exp(prob_lookup['i'])

0.01290036078727234

In [6]:
import spacy
nlp = spacy.load('en_core_web_md')
n = nlp(open("luster.txt").read())

In [7]:
import numpy as np
from numpy.linalg import norm

def vec(s):
    return nlp.vocab[s].vector

def meanv(vecs):
    total = np.sum(vecs, axis=0)
    return total / len(vecs)

from numpy.linalg import norm
def distance(a, b):
    return norm(a - b)

In [8]:
from simpleneighbors import SimpleNeighbors

lookup = SimpleNeighbors(300)
for word in n:
        if word.text.lower()not in lookup.corpus:
            lookup.add_one(word.text.lower(), word.vector)
lookup.build()

In [9]:
lookup.nearest(vec('eric'))

['eric',
 'david',
 'fritz',
 'duane',
 'jason',
 'kevin',
 'hamish',
 'robert',
 'paul',
 'john',
 'tim',
 'bruce']

In [10]:
lookup.nearest(meanv([vec("eric"), vec("rebecca")]))

['rebecca',
 'eric',
 'becky',
 'michelle',
 'katie',
 'kevin',
 'david',
 'jason',
 'jenna',
 'veronica',
 'james',
 'ivan']

In [11]:
chunk_lookup = SimpleNeighbors(300)
for chunk in n.noun_chunks:
    chunk_text = chunk.text.replace("\n", " ")
    if chunk_text not in chunk_lookup.corpus:
        chunk_lookup.add_one(chunk_text, chunk.vector)
chunk_lookup.build()

In [12]:
chunk_lookup.nearest(nlp("edie").vector)

['Edie',
 'Ellen',
 '“Edie',
 'Suzanne Somers',
 'Rebecca',
 'Gloria Gaynor',
 'Katie',
 'Deborah',
 'Becky Abramov',
 'Eileen',
 'John Ritter',
 'Veronica']

In [13]:
chunk_lookup.nearest(nlp("Yale").vector)

['Yale',
 'Princeton',
 'NYU',
 'Suzanne Somers',
 'Sloan Kettering',
 'John Ritter',
 'Wikipedia',
 'David',
 'Manhattan',
 'SoHo',
 'Flatiron',
 'Robert']

In [14]:
chunk_lookup.nearest(nlp("Akila").vector)

['you',
 'You',
 'the happy parts',
 'just one way',
 'the time',
 'our first real date',
 'the right one',
 'the apartment',
 'And then the worst part',
 'a place',
 'only the diaper',
 'this choice']

In [15]:
sentence_lookup = SimpleNeighbors(300)
for sent in n.sents:
    sentence_lookup.add_one(sent.text.replace("\n", " "), sent.vector)
sentence_lookup.build()

In [16]:
sentence_lookup.nearest(nlp("Does Eric drink when he’s with you?").vector)

['I want to know what he’s said, and when she smiles I know she can see it on my face.  ',
 'I can tell you’ve never owned anything,” she says, and then she withdraws and says it’s time to bring out the cake.',
 'He tells me what he ate for lunch and asks if I can manage to take off my underwear in my cubicle without anyone noticing.',
 'I can’t tell if she is looking at me or the neighbor’s dog.  ',
 'I ask him if something is wrong, and he hoists me up and takes me from behind as a sleepy radio voice is introducing “Come On Eileen.”',
 'He tells me that he’s firing the current receptionist because she is too whimsical with visitors, and I tell him that I thought clowning was supposed to be fun.',
 'and I think she’s going to ask me to put it out, but instead she asks for one.',
 'I know it’s because she can tell I’m nervous, and that makes it worse.  ',
 'He thinks I’m joking, and when he realizes I’m not, his face darkens and he says he doesn’t feel comfortable with that.',
 'and yo

# "parts of speech"

In [17]:
noun_lookup = SimpleNeighbors(300)
for word in n:
    if word.tag_ == 'NN' and word.text.lower() not in noun_lookup.corpus:
        noun_lookup.add_one(word.text.lower(), word.vector)
noun_lookup.build()

In [18]:
noun_lookup.nearest(vec("Edie"))

['fritz',
 'decker',
 'grindhouse',
 'twilight',
 'playboy',
 'aw',
 'shucks',
 'duke',
 'duchy',
 'kiss',
 'lyric',
 'bikini']

In [19]:
adj_lookup = SimpleNeighbors(300)
for word in n:
    if word.tag_ == 'JJ' and word.text.lower() not in adj_lookup.corpus:
        adj_lookup.add_one(word.text.lower(), word.vector)
adj_lookup.build()

In [20]:
adj_lookup.nearest(vec("Akila"))

['sweet',
 '-',
 'quiet',
 'much',
 'great',
 'happy',
 'sensitive',
 'earnest',
 'real',
 'complex',
 'little',
 'hard']

In [21]:
for i in range(3):
    print(random.choice(lookup.nearest(vec('Edie'))))
    print()
    print(random.choice(adj_lookup.nearest(vec('Edie'))))
    print()
    print(random.choice(noun_lookup.nearest(vec('Edie'))))
    print()
    print(random.choice(chunk_lookup.nearest(nlp('Edie').vector)))
    print("...")

deborah

norwegian

duchy

John Ritter
...
donna

british

fritz

Katie
...
donna

norwegian

bikini

“Edie
...


In [22]:
import sys
!{sys.executable} -m pip install pronouncing



In [23]:
import pronouncing

In [24]:
import pronouncing as pr

In [25]:
pr.phones_for_word("about")

['AH0 B AW1 T']

In [26]:
pr.phones_for_word("edie")

['EH1 D IY0']

In [27]:
pr.phones_for_word("eric")

['EH1 R IH0 K']

In [28]:
from collections import Counter                               
text = open("luster.txt").read()
count = Counter()                                             
words = text.split()
for word in words:
    pronunciation_list = pronouncing.phones_for_word(word)
    if len(pronunciation_list) > 0:
      count.update(pronunciation_list[0].split(" "))

count.most_common(5)

[('AH0', 14702), ('N', 13264), ('T', 13027), ('D', 7772), ('S', 7601)]

In [29]:
pronouncing.search("EH1 D IY0")[:5]

['abedi', 'already', 'already', 'bready', 'deady']

In [30]:
pronouncing.rhymes("edie")

['abedi',
 'already',
 'already',
 'bready',
 'deady',
 'dwivedi',
 'eadie',
 'eddie',
 'eddy',
 'edye',
 'freddie',
 'freddy',
 'freddye',
 'geddie',
 'heady',
 'heddy',
 'hedi',
 'keddy',
 'leddy',
 'manfredi',
 'maready',
 'medi',
 'mehdi',
 'peddie',
 'peddy',
 'preddy',
 'ready',
 'reddy',
 'redi',
 'steady',
 'tancredi',
 'tangredi',
 'teddie',
 'teddy',
 'trivedi',
 'unsteady']

In [31]:
text = open("luster_meetings.txt").read()
out = list()
for word in text.split():
    rhymes = pronouncing.rhymes(word)
    if len(rhymes) > 0:
        out.append(random.choice(rhymes))
    else:
        out.append(word)
print(' '.join(out))

the family crance love cluster pty sarajevan jani duree 1 the gerst kime dea halve sex, sieh cisar growth woolly clothed, sadat knauer desks alluring lurking hours, unscathed in achoo polluter light. foresee griz towne processing a you gundel belove seabeach and switaj zahm uptown mishandling predilections roquemore a badu evermore ineffective manuscript. chablis materiels embree scutt blea wait corps bunche and basks diff phi stan manage q yake stauff lai cookware in thai cubicle shout dipanjan noticing. wiz messages schumm with impeccable punctuation. fsi quiz fernand gov words' eich raced and spread. the empty next schield tis schul labove possibilities. shove perforce rely murrey route commit remoting into whereby computer, lore buy marmoset protohistory warranting midyette nother gooseberry completing with HR. somewhat the risk. the pille belove a concurred o'hare labov smithereen eyes. the ia nonfat grandson in the office, with hatt sweet, post-lunch-break optimism, non-white thu

In [32]:
text = sentence_lookup.nearest(nlp("Does Eric drink when he’s with you?").vector)
out = list()
for word in text:
    rhymes = pronouncing.rhymes(word)
    if len(rhymes) > 0:
        out.append(random.choice(rhymes))
    else:
        out.append(word)
print(' '.join(out))

I want to know what he’s said, and when she smiles I know she can see it on my face.   I can tell you’ve never owned anything,” she says, and then she withdraws and says it’s time to bring out the cake. He tells me what he ate for lunch and asks if I can manage to take off my underwear in my cubicle without anyone noticing. I can’t tell if she is looking at me or the neighbor’s dog.   I ask him if something is wrong, and he hoists me up and takes me from behind as a sleepy radio voice is introducing “Come On Eileen.” He tells me that he’s firing the current receptionist because she is too whimsical with visitors, and I tell him that I thought clowning was supposed to be fun. and I think she’s going to ask me to put it out, but instead she asks for one. I know it’s because she can tell I’m nervous, and that makes it worse.   He thinks I’m joking, and when he realizes I’m not, his face darkens and he says he doesn’t feel comfortable with that. and you forget that everything eventually di

In [33]:
text = sentence_lookup.nearest(nlp("black").vector)
out = list()
for word in text:
    rhymes = pronouncing.rhymes(word)
    if len(rhymes) > 0:
        out.append(random.choice(rhymes))
    else:
        out.append(word)
print(' '.join(out))

A black Leia! A black child in a pink wig and a tummy shirt, smoking a candy cigarette.   My mother was number six, smack-dab in the middle of a transition from tall, blue-black boys to bodacious, kinky-haired girls.     Orange, yellow, pink. The president was black. She is a natural black girl, bright and woowoo, a cluster of cloudy amethyst around her neck. There is a modest purple vibrator with three speeds, cotton balls, hydrogen peroxide, hair dye, and black nail polish. A child who is black.”   It depicts a woman crawling through tall, brown grass. The pigments drawn from sand and Canterbury bells, the carbon black drawn from fire and spread onto slick cave walls. I pack a can of black olives for lunch. Then the master takes the other purple belt, a small white girl with dark, sunken eyes, and the class settles down onto their knees as she and Akila spar.


In [34]:
text = 'A black child in a pink wig and a tummy shirt, smoking a candy cigarette.'
out = list()
for word in text.split():
    rhymes = pronouncing.rhymes(word)
    if len(rhymes) > 0:
        out.append(random.choice(rhymes))
    else:
        out.append(word)
print(' '.join(out))

a tacke refiled in a link prig and a mummy shirt, coking a shandy cigarette.


In [35]:
for i in range(10):
    print(random.choice(chunk_lookup.nearest(nlp('black').vector)))
    print("...")

her white coat
...
tall, blue-black boys
...
pierced white boys
...
brown pleather
...
Three black wigs
...
black SUVs
...
Three black wigs
...
dark red
...
black mothers
...
black men
...
