In [1]:
import spacy
import numpy as np
import pandas as pd

In [2]:
# load in medium english model
nlp = spacy.load('en_core_web_md')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# load in two "happy" songs and two "sad" songs

# Blue Skies by Frank Sinatra
blue_skies = nlp(
    "Blue skies, smiling at me \
    Nothing but blue skies do I see \
    Blue days, all of them gone \
    Nothing but blue skies from now on \
    Never saw the sun shining so bright \
    Never saw things looking so right \
    Noticing the days hurrying by \
    When you're in love, my how they fly like \
    Bluebirds singing a song \
    Nothing but bluebirds all day long \
    Blue skies, smiling at me \
    Nothing but blue skies do I see \
    Blue days, all of them gone \
    Nothing but blue skies from now on"
)

# What a Wonderful World by Louis Armstrong
wond_world = nlp(
    "I see trees of green \
    Red roses too \
    I see them bloom \
    For me and you \
    And I think to myself \
    What a wonderful world \
    I see skies of blue \
    And clouds of white \
    The bright blessed day \
    The dark sacred night \
    And I think to myself \
    What a wonderful world \
    The colors of the rainbow \
    So pretty in the sky \
    Are also on the faces \
    Of people going by \
    I see friends shaking hands \
    Saying, \"How do you do?\" \
    They\'re really saying \
    I love you \
    I hear babies cry \
    I watch them grow \
    They\'ll learn much more \
    Than I\'ll ever know \
    And I think to myself \
    What a wonderful world \
    Yes, I think to myself \
    What a wonderful world \
    Ooh, yes"
)

# The Sound of Silence by Simon and Garfunkel
silence = nlp(
    "Hello darkness, my old friend \
    I\'ve come to talk with you again \
    Because a vision softly creeping \
    Left its seeds while I was sleeping \
    And the vision that was planted in my brain \
    Still remains \
    Within the sound of silence \
    In restless dreams I walked alone \
    Narrow streets of cobblestone \
    \'Neath the halo of a street lamp \
    I turned my collar to the cold and damp \
    When my eyes were stabbed by the flash of a neon light \
    That split the night \
    And touched the sound of silence \
    And in the naked light I saw \
    Ten thousand people, maybe more \
    People talking without speaking \
    People hearing without listening \
    People writing songs that voices never share \
    No one dared \
    Disturb the sound of silence \
    \"Fools\" said I, \"You do not know \
    Silence like a cancer grows \
    Hear my words that I might teach you \
    Take my arms that I might reach you\" \
    But my words like silent raindrops fell \
    And echoed in the wells of silence \
    And the people bowed and prayed \
    To the neon god they made \
    And the sign flashed out its warning \
    In the words that it was forming \
    And the sign said, \"The words of the prophets \
    Are written on the subway walls \
    And tenement halls \
    And whispered in the sounds of silence\" "
)

# Mad World by Tears for Fears
mad_world = nlp(
    "All around me are familiar faces \
    Worn out places, worn out faces \
    Bright and early for their daily races \
    Going nowhere, going nowhere \
    Their tears are filling up their glasses \
    No expression, no expression \
    Hide my head I want to drown my sorrow \
    No tomorrow, no tomorrow \
    And I find it kind of funny \
    I find it kind of sad \
    The dreams in which I\'m dying are the best I\'ve ever had \
    I find it hard to tell you \'cause I find it hard to take \
    When people run in circles it\'s a very, very \
    Mad world \
    Mad world \
    Mad world \
    Mad world \
    Children waiting for the day they feel good \
    Happy birthday, happy birthday \
    Made to feel the way that every child should \
    Sit and listen, sit and listen \
    Went to school and I was very nervous \
    No one knew me, no one knew me \
    Hello teacher tell me what's my lesson \
    Look right through me, look right through me \
    And I find it kind of funny \
    I find it kind of sad \
    The dreams in which I'm dying are the best I've ever had \
    I find it hard to tell you 'cause I find it hard to take \
    When people run in circles it\'s a very, very \
    Mad world \
    Mad world \
    Mad world \
    Mad world \
    And I find it kind of funny \
    I find it kind of sad \
    The dreams in which I\'m dying are the best I\'ve ever had \
    I find it hard to tell you \'cause I find it hard to take \
    When people run in circles it\'s a very, very \
    Mad world \
    Mad world \
    Halargian world \
    Mad world"
)

In [11]:
mad_world.vector

300

In [4]:
songs = [
    ["Blue Skies", blue_skies],
    ["What a Wonderful World", wond_world],
    ["The Sound of Silence", silence],
    ["Mad World", mad_world]
]

# Raw comparison without any adjustments
for i in range(len(songs)):
    s1_name = songs[i][0]
    s1_song = songs[i][1]
    for j in range(i + 1, len(songs)):
        s2_name = songs[j][0]
        s2_song = songs[j][1]
        print(f'{s1_name} <-> {s2_name}: {s1_song.similarity(s2_song)}')

Blue Skies <-> What a Wonderful World: 0.8942985693398715
Blue Skies <-> The Sound of Silence: 0.879295268353476
Blue Skies <-> Mad World: 0.8973674854927597
What a Wonderful World <-> The Sound of Silence: 0.8683464537820359
What a Wonderful World <-> Mad World: 0.9570395874858139
The Sound of Silence <-> Mad World: 0.8872459668576334


In [5]:
# Spacy constructs sentence embedding by averaging the word embeddings, so songs
# with very different meaning can seem to be similar if we do not remove "meaningless words"
# called stop words. 

# Removing Stop Words
no_stop_songs = [0]*len(songs)
for i in range(len(songs)):
    song = songs[i][1]
    new_song = nlp(' '.join([str(t) for t in song if not t.is_stop]))
    no_stop_songs[i] = [songs[i][0], new_song]

no_stop_songs


[['Blue Skies',
  Blue skies , smiling      blue skies      Blue days , gone      blue skies      saw sun shining bright      saw things looking right      Noticing days hurrying      love , fly like      Bluebirds singing song      bluebirds day long      Blue skies , smiling      blue skies      Blue days , gone      blue skies],
 ['What a Wonderful World',
  trees green      Red roses      bloom           think      wonderful world      skies blue      clouds white      bright blessed day      dark sacred night      think      wonderful world      colors rainbow      pretty sky      faces      people going      friends shaking hands      Saying , " ? "      saying      love      hear babies cry      watch grow      learn      know      think      wonderful world      Yes , think      wonderful world      Ooh , yes],
 ['The Sound of Silence',
 ['Mad World',
  familiar faces      Worn places , worn faces      Bright early daily races      Going , going      tears filling glasses      

In [6]:
for i in range(len(no_stop_songs)):
    s1_name = no_stop_songs[i][0]
    s1_song = no_stop_songs[i][1]
    for j in range(i + 1, len(songs)):
        s2_name = no_stop_songs[j][0]
        s2_song = no_stop_songs[j][1]
        print(f'{s1_name} <-> {s2_name}: {s1_song.similarity(s2_song)}')

Blue Skies <-> What a Wonderful World: 0.8094913587988022
Blue Skies <-> The Sound of Silence: 0.7313186984127603
Blue Skies <-> Mad World: 0.6475199501661411
What a Wonderful World <-> The Sound of Silence: 0.8643514867169877
What a Wonderful World <-> Mad World: 0.8754898631320087
The Sound of Silence <-> Mad World: 0.775502116756936


Next Milestone: Try to find the angle between two word vectors

Milestone: Try to get spacy to work with pandas 

In [7]:
s = 'How do I get pandas to work nicely with spacy?' 
s_list = s.split() 
df = pd.DataFrame({'Words': s_list}) 
df.head() 

Unnamed: 0,Words
0,How
1,do
2,I
3,get
4,pandas


In [8]:
# ap
df['docs'] = df['Words'].apply(nlp)

In [9]:
df.head()

Unnamed: 0,Words,docs
0,How,(How)
1,do,(do)
2,I,(I)
3,get,(get)
4,pandas,(pandas)
