# In this file I create WordClouds for every Genre

In [None]:
import pandas as pd
from wordcloud import WordCloud,STOPWORDS
from gensim.utils import simple_preprocess

import nltk
nltk.download('stopwords') #once
from nltk.corpus import stopwords

import matplotlib.pyplot as plt

## Get Lyrics from each Genre, initialize Stop Words

In [None]:
#lyrical dataset enriched with meta data created in 'enriched_metadata.ipynb' 
songs = pd.read_csv("songs_enriched.csv", sep=",", engine="python", encoding='utf-8')

hiphop = songs.loc[songs['genre'].str.contains('Hip Hop')]
pop = songs.loc[songs['genre'].str.contains('Pop')]
rock = songs.loc[songs['genre'].str.contains('Rock')]
country = songs.loc[songs['genre'].str.contains('Country')]

hiphop_lyrics = hiphop['a_lyrics'].values.tolist()
rock_lyrics = rock['a_lyrics'].values.tolist()
pop_lyrics = pop['a_lyrics'].values.tolist()
country_lyrics = country['a_lyrics'].values.tolist()

# tokenize and remove punctuations
def sent_to_words(sentences):
    for sentence in sentences:
        # deacc=True removes punctuations
        yield(simple_preprocess(str(sentence), deacc=True))

stop_words = STOPWORDS

## Create WordClouds
### Rock

In [None]:
data_words = list(sent_to_words(rock_lyrics))

s = ""
for l in data_words:
    s = s + ','.join(l)

# Generate a word cloud
wordcloud = WordCloud(stopwords=stop_words, width=1280, height=720, min_font_size=15, 
                      max_font_size=160, background_color="white", max_words=2000, 
                      contour_color='steelblue')

wordcloud.generate(s)# Visualize the word cloud
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

### Pop

In [None]:
data_words = list(sent_to_words(pop_lyrics))

s = ""
for l in data_words:
    s = s + ','.join(l)

# Generate a word cloud
wordcloud = WordCloud(stopwords=stop_words, width=1280, height=720, min_font_size=15, 
                      max_font_size=160, background_color="white", max_words=2000, 
                      contour_color='steelblue')

wordcloud.generate(s)# Visualize the word cloud
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

### Hip hop

In [None]:
data_words = list(sent_to_words(hiphop_lyrics))

s = ""
for l in data_words:
    s = s + ','.join(l)

# Generate a word cloud
wordcloud = WordCloud(stopwords=stop_words, width=1280, height=720, min_font_size=15, 
                      max_font_size=160, background_color="white", max_words=2000, 
                      contour_color='steelblue')

wordcloud.generate(s)# Visualize the word cloud
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

### Country

In [None]:
data_words = list(sent_to_words(country_lyrics))

s = ""
for l in data_words:
    s = s + ','.join(l)

# Generate a word cloud
wordcloud = WordCloud(stopwords=stop_words, width=1280, height=720, min_font_size=15, 
                      max_font_size=160, background_color="white", max_words=2000, 
                      contour_color='steelblue')

wordcloud.generate(s)# Visualize the word cloud
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()