## Generate word cloud from lyrics of top spotify songs:

In [None]:
import better_profanity
import json
import spotipy
import string
import sys

import matplotlib.pyplot as plt

from nltk.corpus import stopwords
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
from wordcloud import WordCloud

sys.path.append(
    os.path.join(sys.path[0].split("spotify-research")[0], "spotify-research")
)

from spotify_research.lyrics_scraping import scrape_lyrics, clean_for_word_cloud

In [None]:
# When you run this cell for the first time in a notebook session,
# you will be redirected to a different web page and prompted to
# paste the URL of this page into a text box here. Once this is done
# the API call will be made.

with open(
    "../credentials/client_credentials.json", "r", encoding="utf-8"
) as file_path:
    credentials = json.load(file_path)
    
with open(
    "../credentials/headers.json", "r", encoding="utf-8"
) as file_path:
    headers = json.load(file_path)
    
os.environ["SPOTIPY_CLIENT_ID"] = credentials['CLIENT_ID']
os.environ["SPOTIPY_CLIENT_SECRET"] = credentials['CLIENT_SECRET']

scope = 'user-top-read'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=credentials["CLIENT_ID"], 
                                               client_secret = credentials["CLIENT_SECRET"], 
                                               redirect_uri=credentials["REDIRECT_URI"],
                                               scope=scope))

In [None]:
# ranges = {'short_term', 'medium_term', 'long_term'}

med_results = sp.current_user_top_tracks(time_range='medium_term', limit=50)

artist_track_pairs = [{'artist': i['artists'][0]['name'],
                      'track_name': i['name']} for i in med_results['items']]


In [None]:
# Scraping lyrics from genius.com

lyrics = []

for song in artist_track_pairs:
    try:
        lyrics.extend(
            scrape_lyrics(artist_name=song['artist'],
                          track_name=song['track_name'],
                          headers=headers)
        )
    except IndexError:
        continue

In [None]:
# Lyrics are in a list of lines from each song,
# so need to convert to one complete string.

full_string = ' '.join(lyrics)

# Removing punctuation, stopwords, profanity and vocables:
words_to_remove = stopwords.words('english')

for i in better_profanity.profanity.CENSOR_WORDSET:
    words_to_remove.append(str(i))
    
words_to_remove.extend(["oh", "ooh", "o", "oooh", "da"])

cleaned_text = clean_for_word_cloud(full_string, words_to_remove=words_to_remove)


In [None]:
wordcloud = WordCloud(
    max_words=len(cleaned_text),
    background_color="white"
).generate(' '.join(cleaned_text))

# Display the generated image:
plt.figure(figsize = (12,9))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.savefig("word_cloud.png")
plt.show()