In [6]:
# Import required libraries
import requests
from bs4 import BeautifulSoup
import re
import os

# Retrieving URLs

In [7]:
GENIUS_API_TOKEN = 'XXXXXXXXXXXXXXXXXXXX'

# Function to make a request to the Genius API for artist information
def request_artist_info(artist_name, page):
    base_url = 'https://api.genius.com'  
    headers = {'Authorization': 'Bearer ' + GENIUS_API_TOKEN}  
    search_url = base_url + '/search?per_page=10&page=' + str(page)  
    data = {'q': artist_name}  
    response = requests.get(search_url, data=data, headers=headers)  
    return response  

# Function to retrieve song URLs for a given artist
def request_song_url(artist_name, song_cap):
    page = 1  # Initialize the page number for pagination
    songs = []  # Initialize an empty list to store song URLs
    
    while True:  # Loop until we reach the desired number of songs
        response = request_artist_info(artist_name, page)  # Request artist info for the current page
        json = response.json() 
        
        # Initialize a list to temporarily hold song information for the current page
        song_info = []
        
        # Filter and collect relevant songs from the response
        for hit in json['response']['hits']:
            if artist_name.lower() in hit['result']['primary_artist']['name'].lower():
                song_info.append(hit) 
    
        # Extract URLs from the song information collected
        for song in song_info:
            if len(songs) < song_cap: 
                url = song['result']['url'] 
                songs.append(url)
            
        # Break the loop if the desired number of songs is reached
        if len(songs) == song_cap:  
            break
        else:
            page += 1 

    # Print Result    
    print('Found {} songs by {}'.format(len(songs), artist_name))  
    return songs  

# Example 
request_song_url('Kanye West', 100)

Found 100 songs by Kanye West


['https://genius.com/Kanye-west-mercy-lyrics',
 'https://genius.com/Jay-z-and-kanye-west-niggas-in-paris-lyrics',
 'https://genius.com/Kanye-west-monster-lyrics',
 'https://genius.com/Kanye-west-father-stretch-my-hands-pt-1-lyrics',
 'https://genius.com/Kanye-west-ultralight-beam-lyrics',
 'https://genius.com/Kanye-west-bound-2-lyrics',
 'https://genius.com/Kanye-west-jay-z-and-big-sean-clique-lyrics',
 'https://genius.com/Lil-pump-and-kanye-west-i-love-it-lyrics',
 'https://genius.com/Jay-z-and-kanye-west-no-church-in-the-wild-lyrics',
 'https://genius.com/Kanye-west-new-slaves-lyrics',
 'https://genius.com/Kanye-west-runaway-lyrics',
 'https://genius.com/Kanye-west-blood-on-the-leaves-lyrics',
 'https://genius.com/Kanye-west-black-skinhead-lyrics',
 'https://genius.com/Drake-kanye-west-lil-wayne-and-eminem-forever-lyrics',
 'https://genius.com/Kanye-west-no-more-parties-in-la-lyrics',
 'https://genius.com/Kanye-west-famous-lyrics',
 'https://genius.com/Kanye-west-power-lyrics',
 'htt

# Scraping Lyrics

In [8]:
# Function to scrape song lyrics from a Genius.com song URL
def scrape_song_lyrics(url):
    page = requests.get(url)
    html = BeautifulSoup(page.text, 'html.parser')
    lyrics = html.find_all('span', class_='ReferentFragmentdesktop__Highlight-sc-110r0d9-1 jAzSMw')
    
    lyrics = '\n'.join([elem.get_text() for elem in lyrics])
    # Remove identifiers like chorus, verse, etc and empty lines
    lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics)
    lyrics = lyrics.replace('\r\n', '\n')
    lyrics = '\n'.join(line.strip() for line in lyrics.split('\n') if line.strip())
    # Result     
    return lyrics
    
# Example
print(scrape_song_lyrics('https://genius.com/Post-malone-rockstar-lyrics'))

Hahahahaha
Tank God
I've been fuckin' hoes and poppin' pilliesMan, I feel just like a rockstar
All my brothers got that gasAnd they always be smokin' like a Rasta
Fuckin' with me, call up on a UziAnd show up, man, them the shottas When my homies pull up on your blockThey make that thing go grra-ta-ta-ta
Switch my whip, came back in blackI'm startin' sayin', "Rest in peace to Bon Scott"
Close that door, we blowin' smokeShe ask me light a fire like I'm Morrison Act a fool on stageProlly leave my fuckin' show in a cop car
Shit was legendaryThrew a TV out the window of the Montage
Cocaine on the table, liquor pourin', don't give a damn
Dude, your girlfriend is a groupie, she just tryna get inSayin', "I'm with the band" Now she actin' outta pocket, tryna grab up on my pantsHundred bitches in my trailer say they ain't got a manAnd they all brought a friend
I've been fuckin' hoes and poppin' pilliesMan, I feel just like a rockstar
All my brothers got that gasAnd they always be smokin' like a 

# Scraping and Saving All the Lyrics in One File

In [5]:
# Function to write the lyrics of a specified number of songs by an artist to a file
def write_lyrics_to_file(artist_name, song_count):
    f = open('lyrics/' + artist_name.lower() + '.txt', 'wb')
    urls = request_song_url(artist_name, song_count)
    # Loop through each URL and scrape the lyrics
    for url in urls:
        lyrics = scrape_song_lyrics(url)
        f.write(lyrics.encode("utf8"))
    f.close()
    num_lines = sum(1 for line in open('lyrics/' + artist_name.lower() + '.txt', 'rb'))
    
    # Results
    print('Wrote {} lines to file from {} songs'.format(num_lines, song_count))
  

  
# DEMO  
write_lyrics_to_file('Post Malone', 100)

Found 100 songs by Post Malone
Wrote 934 lines to file from 100 songs
