#Word Vector
Add word vector columns to the [use-this-master-lyrics-extracted.csv](../../data/conditioned/use-this-master-lyricsdf-extracted.csv)

In [2]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

In [3]:
## MLJ: Additional Extras
import os
import time
import itertools
import json
import pickle

##Setup Data For Pipeline
###Load and manipulate with Pandas

In [11]:
# load the lyrics from the approved "master" dataframe
lyrics_pd_df = pd.read_csv("../../data/conditioned/use-this-master-lyricsdf-extracted.csv")  

##Add Reduced Word and ID Vectors
Add columns:
* `noun_vector` for reduced words (as string separated by spaces)
* `noun_id_vector` id vector (again as string separated by vector)
* `ad_vector` for reduced words (as string separated by spaces)
* `adj_id_vector` id vector (again as string separated by vector)

In [13]:
# load files needed (expects dir `corpus_vocabs` to be present)

# load ncollect from file
with open('../../data/conditioned/corpus_vocabs/noun-word-reduction.json', 'r') as fp:
    nreduction = json.load(fp)
    
# load acollect from file
with open('../../data/conditioned/corpus_vocabs/adj-word-reduction.json', 'r') as fp:
    areduction = json.load(fp)

In [14]:
nreduction[:1]

[[u'time', u'bridge', u'water']]

In [15]:
def reductionAsWordStr(reduction):
    words = []
    for r in reduction:
        v = ' '.join([x.encode('ascii','ignore') for x in r])
        words.append(v)
    return words

In [16]:
wn_reduction = reductionAsWordStr(nreduction)
wa_reduction = reductionAsWordStr(areduction)

In [17]:
len(wn_reduction)

4500

In [18]:
wn_reduction[:1]

['time bridge water']

In [19]:
wa_reduction[:1]

['rough troubled']

In [20]:
nvdf = pd.DataFrame({'noun_vector': wn_reduction})  
lyrics_pd_df1 = lyrics_pd_df.join(nvdf)

In [21]:
avdf = pd.DataFrame({'adj_vector': wa_reduction})  
lyrics_pd_df = lyrics_pd_df1.join(avdf)

In [22]:
lyrics_pd_df.head()

Unnamed: 0,index,position,year,title.href,title,artist,lyrics,decade,song_key,lyrics_url,lyrics_abstract,noun_vector,adj_vector
0,0,1,1970,https://en.wikipedia.org/wiki/Bridge_over_Trou...,Bridge over Troubled Water,Simon and Garfunkel,When you're weary. Feeling small. When tears a...,1970,1970-1,http://lyrics.wikia.com/Simon_And_Garfunkel:Br...,When you're weary. Feeling small. When tears a...,time bridge water,rough troubled
1,1,2,1970,https://en.wikipedia.org/wiki/(They_Long_to_Be...,(They Long to Be) Close to You,The Carpenters,Why do birds suddenly appear. Everytime you ar...,1970,1970-2,http://lyrics.wikia.com/Carpenters:%28They_Lon...,Why do birds suddenly appear. Everytime you ar...,dream starlight eye,true blue
2,2,3,1970,https://en.wikipedia.org/wiki/American_Woman_(...,American Woman,The Guess Who,"Mmm, da da da. Mmm, mmm, da da da. Mmm, mmm, d...",1970,1970-3,http://lyrics.wikia.com/The_Guess_Who:American...,"Mmm, da da da. Mmm, mmm, da da da. Mmm, mmm, d...",woman mess mind mama thing time growin light y...,american important old coloured leave
3,3,4,1970,https://en.wikipedia.org/wiki/Raindrops_Keep_F...,Raindrops Keep Fallin' on My Head,B.J. Thomas,Raindrops are falling on my head. And just lik...,1970,1970-4,http://lyrics.wikia.com/B.J._Thomas:Raindrops_...,Raindrops are falling on my head. And just lik...,guy foot bed happiness step eye,big long red
4,4,5,1970,https://en.wikipedia.org/wiki/War_(Edwin_Starr...,War,Edwin Starr,"War, huh, yeah. What is it good for? Absolutel...",1970,1970-5,http://lyrics.wikia.com/Edwin_Starr:War,"War, huh, yeah. What is it good for? Absolutel...",god destruction life war unrest generation man...,good innocent younger young short precious fig...


##Save Augmented DF

In [23]:
lyrics_pd_df.to_csv("../../data/conditioned/master-lyricsdf-word_vectors.csv",index=False)