# FINAL PROJECT - SONG LYRICS EXTRACTOR

#### IMPORT LIBRARIES

In [24]:
import requests
import urllib.request
import urllib.parse
import json
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import time
import lyricsgenius
import string
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from datetime import datetime, date
import ast

In [25]:
#nltk.download('stopwords')
#nltk.download('wordnet')

#### GENIUS API INITIALIZATION

In [3]:
dict_credentials = {"client_id":"-q1tRGBZMEOk6JewZCC_KWZBxyFSg9nccGlX11Cb3MxpGpzWG4FBSJIXCJS33D3x"}

In [4]:
base = "https://api.genius.com"

In [5]:
genius = lyricsgenius.Genius(dict_credentials["client_id"])

genius.verbose = True # Turn off status messages
genius.remove_section_headers = True # Remove section headers (e.g. [Chorus]) from lyrics when searching
genius.skip_non_songs = False # Include hits thought to be non-songs (e.g. track lists)
genius.excluded_terms = ["(Remix)", "(Live)"] # Exclude songs with these words in their title

#### MAIN FUNCTION
1) Reads all songs from input dataframe
2) Downloads all lyrics from Genius
3) Generates a new dataframe with the songs features and the lyrics without processing
4) Every 100 songs, function generates the log file and save the dataframe in a CSV file

In [26]:
def main(songs_path):
    """
    This is the main function:
    1) reads all songs from the DataFrame coming from Spotify
    2) get lyrics from genius
    3) add the lyrics in each song
    """
    file_name = "lyrics_extraction_" + str(date.today()) + ".log"
    path = "../data/"
    f = open(path + file_name, "a")
    f.write("################################################################\n")

    songs_df = pd.read_csv(songs_path)

    songs_df.reset_index(inplace=True)

    start = datetime.now()
    f.write("START: " + str(start) + "\n")
    f.close()

    lyrics = ["Pending" for i in range(len(songs_df))]

    for i in range(len(songs_df)):
        f = open(path + file_name, "a")
        print("Song: " + str(i+1) + "/" + str(len(songs_df)))
        f.write("Song: " + str(i+1) + "/" + str(len(songs_df)) + "\n")
        song_name = songs_df.name[i]
        artist_name = [n.strip() for n in ast.literal_eval(songs_df.artists[i])][0]
        if "-" in song_name:
            song_name = song_name.split(" - ")[0]
        try:
            song = genius.search_song(song_name, artist_name)
        except:
            lyrics[i] = None
            f.write("Connection Timeout" + "\n")
            time.sleep(5)
            continue
        if song == None:
            lyrics[i] = None
        else:
            lyrics_song = song.lyrics
            if len(lyrics_song) == 0:
                lyrics[i] = None
            else:
                lyrics[i] = lyrics_song
        if (i+1)%100 == 0:
            songs_df['lyrics'] = lyrics
            songs_df.to_csv(path + "songs_lyrics_complete.csv", index=False)
            inter = datetime.now()
            print("Time spend:", inter-start, "- Average by song:", (inter-start)/(i+1))
            f.write("Time spend: " + str(inter-start) + " - Average by song: " + str((inter-start)/(i+1)) + "\n")
        f.close()

    stop = datetime.now()

    print("Time spend:", stop-start, "- Average by song:", (stop-start)/len(songs_df))

    f = open(path + file_name, "a")

    f.write("Time spend: " + str(stop-start) + " - Average by song: " + str((stop-start)/(i+1)) + "\n")
    f.write("Finished!!!" + "\n")
    f.write("################################################################" + "\n")

    songs_df['lyrics'] = lyrics

    songs_df.to_csv(path + "songs_lyrics_complete.csv", index=False)

    f.close()

#### EXECUTE THE FUNCTION FOR THE DESIRED SONGS

In [9]:
main("../data/phil_collins_songs.csv")

Song: 1/90
Searching for "In The Air Tonight" by Phil Collins...
Done.
Song: 2/90
Searching for "You Can't Hurry Love" by Phil Collins...
Done.
Song: 3/90
Searching for "I Don't Care Anymore" by Phil Collins...
Done.
Song: 4/90
Searching for "Sussudio" by Phil Collins...
Done.
Song: 5/90
Searching for "One More Night" by Phil Collins...
Done.
Song: 6/90
Searching for "Take Me Home" by Phil Collins...
Done.
Song: 7/90
Searching for "Don't Lose My Number" by Phil Collins...
Done.
Song: 8/90
Searching for "Another Day in Paradise" by Phil Collins...
Done.
Song: 9/90
Searching for "Do You Remember?" by Phil Collins...
Done.
Song: 10/90
Searching for "I Wish It Would Rain Down" by Phil Collins...
Done.
Song: 11/90
Searching for "You'll Be In My Heart" by Phil Collins...
Done.
Song: 12/90
Searching for "Strangers Like Me" by Phil Collins...
Done.
Song: 13/90
Searching for "Can't Turn Back The Years" by Phil Collins...
Done.
Song: 14/90
Searching for "Can't Turn Back the Years" by Phil Collin

#### LOAD THE DATAFRAME WITH THE LYRICS GENERATE BY THE PREVIOUS FUNCTION

In [11]:
songs_lyrics_df = pd.read_csv("../data/songs_lyrics_complete.csv")

In [12]:
songs_lyrics_df.head()

Unnamed: 0,index,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,...,name,popularity,release_date,speechiness,tempo,valence,year,decade,main_artist,lyrics
0,0,0.551,['Phil Collins'],0.446,336453,0.239,0,18AXbzPzBS8Y3AkgSxzJPb,4e-06,0,...,In The Air Tonight - 2015 Remastered,77,1981-01-01,0.0316,189.507,0.298,1981,80s,Phil Collins,"I can feel it coming in the air tonight, oh Lo..."
1,1,0.0261,['Phil Collins'],0.62,175747,0.936,0,4YwbSZaYeYja8Umyt222Qf,0.0,7,...,You Can't Hurry Love - 2016 Remaster,76,1982-11-05,0.0308,97.527,0.763,1982,80s,Phil Collins,"I need love, love\nOoh, ease my mind\nAnd I ne..."
2,2,0.188,['Phil Collins'],0.703,305707,0.422,0,24IdPCimVnJJDRuB5YSDiS,0.00311,11,...,I Don't Care Anymore - 2016 Remaster,58,1982-11-05,0.0378,140.494,0.442,1982,80s,Phil Collins,Well you can tell everyone I'm a down disgrace...
3,3,0.0529,['Phil Collins'],0.714,263107,0.935,0,4qM461TqtpnP4GLRIXwEnW,0.0934,5,...,Sussudio - 2016 Remaster,66,1985-01-25,0.0849,121.232,0.725,1985,80s,Phil Collins,There's a girl that's been on my mind\nAll the...
4,4,0.788,['Phil Collins'],0.534,288973,0.442,0,6fkvIT9KFKxO8poBeKfaEf,0.000351,3,...,One More Night - 2016 Remaster,69,1985-01-25,0.0308,136.369,0.343,1985,80s,Phil Collins,"One more night, one more night\nOne more night..."
