In [1]:
text = open("luster_meetings.txt").read()

In [2]:
conda install -c conda-forge spacy

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.


Note: you may need to restart the kernel to use updated packages.


In [3]:
import json
import spacy
import random

In [4]:
def hex_to_int(s):
    s = s.lstrip("#")
    return np.array([int(s[:2], 16), int(s[2:4], 16), int(s[4:6], 16)])

In [5]:
import math
def distance2d(a, b):
    return math.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2)

prob_lookup = dict(json.load(open("./wordfreq-en-25000-log.json")))
math.exp(prob_lookup['i'])

0.01290036078727234

In [6]:
import spacy
nlp = spacy.load('en_core_web_md')
n = nlp(open("luster_meetings.txt").read())

In [7]:
import numpy as np
from numpy.linalg import norm

def vec(s):
    return nlp.vocab[s].vector

def meanv(vecs):
    total = np.sum(vecs, axis=0)
    return total / len(vecs)

from numpy.linalg import norm
def distance(a, b):
    return norm(a - b)

In [8]:
from simpleneighbors import SimpleNeighbors

lookup = SimpleNeighbors(300)
for word in n:
        if word.text.lower()not in lookup.corpus:
            lookup.add_one(word.text.lower(), word.vector)
lookup.build()

In [9]:
lookup.nearest(vec('eric'))

['eric',
 'phil',
 'mister',
 'edie',
 'rebecca',
 'collins',
 'rogers',
 'clinton',
 'essex',
 'mae',
 'tweety',
 'taz']

In [10]:
lookup.nearest(meanv([vec("eric"), vec("rebecca")]))

['rebecca',
 'eric',
 'edie',
 'mister',
 'phil',
 'collins',
 'rogers',
 'clinton',
 'tweety',
 'taz',
 'essex',
 'tuesday']

In [11]:
chunk_lookup = SimpleNeighbors(300)
for chunk in n.noun_chunks:
    chunk_text = chunk.text.replace("\n", " ")
    if chunk_text not in chunk_lookup.corpus:
        chunk_lookup.add_one(chunk_text, chunk.vector)
chunk_lookup.build()

In [12]:
chunk_lookup.nearest(nlp("edie").vector)

['“Edie',
 'Rebecca',
 'Eric',
 '“Rebecca',
 'Raven Leilani',
 'how tenderly Eric',
 'Yale',
 'African American',
 'fox hair',
 'I',
 'Eric’s flushed face',
 'Pompeii']

In [13]:
chunk_lookup.nearest(nlp("Yale").vector)

['Yale',
 'Appleton',
 'African American',
 'Milwaukee',
 'Eric',
 'Rebecca',
 'a Phil Collins joint',
 '“Edie',
 'Pompeii',
 '“Rebecca',
 'DMV photos',
 'how tenderly Eric']

In [14]:
chunk_lookup.nearest(nlp("Akila").vector)

[' The Family Romance',
 'p.',
 'The empty text field',
 'possibilities',
 'But the risk',
 'The thrill',
 'The idea',
 'the thread',
 'the sand',
 'the soft recession',
 'the photo',
 'the train']

In [15]:
sentence_lookup = SimpleNeighbors(300)
for sent in n.sents:
    sentence_lookup.add_one(sent.text.replace("\n", " "), sent.vector)
sentence_lookup.build()

In [16]:
sentence_lookup.nearest(nlp("Does Eric drink when he’s with you?").vector)

['“Does Eric drink when he’s with you?”  ',
 'I can tell you’ve never owned anything,” she says, and then she withdraws and says it’s time to bring out the cake.',
 'He tells me what he ate for lunch and asks if I can manage to take off my underwear in my cubicle without anyone noticing.',
 '’s feelings deeply because of something I say or a face I make, which I will of course think about when I ride the train home, and actually, forever, even though I tried to be merry and keep the conversation light, even though I can’t sleep and I can’t shit, and someone is dying but that one song tells you to slide to the left',
 '“I know who you are but I don’t want to discuss it, if that’s all right with you,” she says, dusting herself off.',
 'Eric says he’s sorry, and the apology feels like it is not about only the diaper, but more how this choice of location is turning out.',
 'All I want is for him to have what he wants.',
 'There is a moment when I think she is preparing to punch me.',
 'and

# "parts of speech"

In [17]:
noun_lookup = SimpleNeighbors(300)
for word in n:
    if word.tag_ == 'NN' and word.text.lower() not in noun_lookup.corpus:
        noun_lookup.add_one(word.text.lower(), word.vector)
noun_lookup.build()

In [18]:
noun_lookup.nearest(vec("Edie"))

['fox',
 'moss',
 'aunt',
 'laugh',
 'girl',
 'doll',
 'voodoo',
 'daddy',
 'husk',
 'dress',
 'makeup',
 'haircut']

In [19]:
adj_lookup = SimpleNeighbors(300)
for word in n:
    if word.tag_ == 'JJ' and word.text.lower() not in adj_lookup.corpus:
        adj_lookup.add_one(word.text.lower(), word.vector)
adj_lookup.build()

In [20]:
adj_lookup.nearest(vec("Akila"))

['first',
 'blue',
 'uptown',
 'new',
 'impeccable',
 'fond',
 'empty',
 'full',
 'disciplinary',
 'third',
 'unseen',
 'sweet']

In [21]:
for i in range(3):
    print(random.choice(lookup.nearest(vec('Edie'))))
    print()
    print(random.choice(lookup.nearest(vec('Eric'))))
    print()
    print(random.choice(lookup.nearest(vec('Rebecca'))))
    print()
    print(random.choice(lookup.nearest(vec('Alika'))))

yale

tweety

edie

mae
rebecca

phil

tuesday

sallie
eric

phil

taz

rogers


In [22]:
for i in range(3):
    print(random.choice(adj_lookup.nearest(vec("old"))))
    print()
    print(random.choice(noun_lookup.nearest(vec("animal"))))

old

sex
few

child
long

body


In [24]:
stanza_count = random.randint(5,10)
print(stanza_count)

9


In [25]:
print("Some Meetings")
print()

for i in range(stanza_count):
    print()
    for i in range(random.randint(0,6)): #random chunk count
        print("   ",random.choice(noun_lookup.nearest(vec("blessing")) if random.random() > 0.5 else random.choice(noun_lookup.nearest(vec("curse")))))
              

Some Meetings


    pity
    a
    moment
    dignity
    u

    e
    p

    e
    y
    i
    v
    b

    l
    everyone
    moment
    dignity
    m

    occasion
    yearning
    promise
    everyone
    yearning



    pity
    sympathy
    i

    a
    occasion
    a


In [26]:
stanza_count = random.randint(5,10)
print(stanza_count)

10


In [27]:
print("Some Meetings")
print()

for i in range(stanza_count):
    print()
    print("...")
    for i in range(random.randint(1,5)): #random chunk count
        print("   ",random.choice(noun_lookup.nearest(vec("edie")) if random.random() > 0.5 else random.choice(noun_lookup.nearest(vec("eric")))))
    print("...")
    for i in range(random.randint(1,5)): #random chunk count
        print("   ",random.choice(noun_lookup.nearest(vec("edie")) if random.random() > 0.5 else random.choice(noun_lookup.nearest(vec("rebecca")))))
    print("...")
    for i in range(random.randint(1,5)): #random chunk count
        print("   ",random.choice(noun_lookup.nearest(vec("edie")) if random.random() > 0.5 else random.choice(noun_lookup.nearest(vec("akila")))))
    print("...")
    for i in range(random.randint(1,5)): 
        print("   ",random.choice(noun_lookup.nearest(vec("edie")) if random.random() > 0.5 else random.choice(noun_lookup.nearest(vec("edie")))))

Some Meetings


...
    voodoo
    dress
    girl
    p
    d
...
    a
    v
...
    b
    s
    doll
    dress
    makeup
...
    husk
    u

...
    dress
    doll
    dress
    l
...
    fox
    o
...
    b
    fox
    husk
    dress
    e
...
    t
    o
    aunt
    s

...
    v
    fox
    haircut
    l
...
    aunt
    d
    v
    f
...
    d
    voodoo
    l
    t
    moss
...
    d
    voodoo
    k

...
    aunt
    doll
    k
    l
    t
...
    daddy
    c
...
    n
    moss
...
    o
    makeup
    y

...
    voodoo
    d
    daddy
...
    husk
    r
    doll
    d
...
    i
    s
    s
...
    haircut
    a
    d
    laugh

...
    haircut
...
    moss
...
    moss
    m
    husk
    r
...
    l
    i

...
    husk
    t
    f
    i
    f
...
    e
    a
    fox
    aunt
    o
...
    haircut
    b
    makeup
    t
    d
...
    m
    k

...
    dress
    o
    makeup
    voodoo
...
    d
    fox
...
    t
    girl
    daddy
    aunt
    s
...
    o
    x
    e

...
    m