In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lyricsgenius
import random
import json
import re

In [2]:
genius = lyricsgenius.Genius("YOUR API KEY GOES HERE", timeout=30)

In [3]:
# Define the genre-artist mappings
genre_artists = {
    "Metal": ["Metallica", "Black Sabbath", "Iron Maiden", "Slayer", "Pantera"],
    "Pop": ["Taylor Swift", "Katy Perry", "Justin Bieber", "Ariana Grande", "Maroon 5"],
   "Hip-Hop/Rap": ["Jay-Z", "Kendrick Lamar", "Drake", "Eminem", "Kanye West"],
  "Country": ["Johnny Cash", "Dolly Parton", "Luke Bryan", "Carrie Underwood", "Willie Nelson"],
   "R&B/Soul": ["Aretha Franklin", "Marvin Gaye", "Beyoncé", "Sam Cooke", "Stevie Wonder"],
   "EDM": ["Daft Punk", "Calvin Harris", "Avicii", "The Chainsmokers", "Marshmello"],
   "Jazz": ["Miles Davis", "Louis Armstrong", "Ella Fitzgerald", "Billie Holiday", "John Coltrane"],
   "Blues": ["B.B. King", "Robert Johnson", "Muddy Waters", "Eric Clapton", "Stevie Ray Vaughan"],
   "Reggae": ["Bob Marley", "Peter Tosh", "Toots and the Maytals", "Damian Marley", "Sean Paul"],
   "Metal": ["Metallica", "Black Sabbath", "Iron Maiden", "Slayer", "Pantera"]
}

In [5]:
# Initialize the list to store song data
song_data = []

In [6]:
# Iterate over the genre-artist mappings
for genre, artists in genre_artists.items():
    # Iterate over each artist
    for artist in artists:
        # Search for the artist's songs using the Genius API
        artist_search = genius.search_artist(artist, max_songs=20)
        
        # Retrieve 20 random songs from the artist's songs
        random_songs = random.sample(artist_search.songs, 20)
        
        # Extract song name and lyrics from each song and clean them up
        for song in random_songs:
            song_name = song.title
            lyrics = song.lyrics
            
            # Remove first row
            lyrics = re.sub(r'^.*?\n', '', lyrics)
            
            # Remove contributor information and translations
            lyrics = re.sub(r'\[.*?\]', '', lyrics)
            
            # Remove newline escape sequences (\n) and preserve spaces
            lyrics = lyrics.replace('\n', ' ')
            
            # Remove multiple spaces between words
            lyrics = re.sub(r'\s+', ' ', lyrics)
            
            # Remove "Embed" and the number before it
            lyrics = re.sub(r'\d+\s*Embed', '', lyrics)
            
            # Remove leading and trailing whitespace
            lyrics = lyrics.strip()
            
            # Append the song data to the list
            song_data.append({'Genre': genre, 'Artist': artist, 'Song Name': song_name, 'Lyrics': lyrics})

Searching for songs by Metallica...

Song 1: "Nothing Else Matters"
Song 2: "One"
Song 3: "Enter Sandman"
Song 4: "Master of Puppets"
Song 5: "The Unforgiven"
Song 6: "Fade to Black"
Song 7: "For Whom the Bell Tolls"
Song 8: "Sad But True"
Song 9: "Creeping Death"
Song 10: "Welcome Home (Sanitarium)"
Song 11: "...And Justice for All"
Song 12: "Wherever I May Roam"
Song 13: "Battery"
Song 14: "Ride the Lightning"
Song 15: "Blackened"
Song 16: "The Unforgiven II"
Song 17: "The Four Horsemen"
Song 18: "Whiplash"
Song 19: "Dyers Eve"
Song 20: "Seek & Destroy"

Reached user-specified song limit (20).
Done. Found 20 songs.
Searching for songs by Black Sabbath...

Song 1: "Paranoid"
Song 2: "Iron Man"
Song 3: "War Pigs"
Song 4: "N.I.B."
Song 5: "Black Sabbath"
Song 6: "Heaven and Hell"
Song 7: "Changes"
Song 8: "Planet Caravan"
Song 9: "Sabbath Bloody Sabbath"
Song 10: "Sweet Leaf"
Song 11: "Children of the Grave"
Song 12: "Fairies Wear Boots"
Song 13: "Electric Funeral"
Song 14: "Hand of Doo

In [7]:
# Create a pandas DataFrame
metal_df = pd.DataFrame(song_data)

In [8]:
# Display the DataFrame
display(metal_df)

Unnamed: 0,Genre,Artist,Song Name,Lyrics
0,Metal,Metallica,Sad But True,"Hey (Hey), I'm your life, I'm the one who take..."
1,Metal,Metallica,Whiplash,"Late at night, all systems go You've come to s..."
2,Metal,Metallica,Welcome Home (Sanitarium),Welcome to where time stands still No one leav...
3,Metal,Metallica,The Unforgiven,"New blood joins this Earth, and quickly he's s..."
4,Metal,Metallica,Enter Sandman,"Say your prayers, little one, don't forget, my..."
...,...,...,...,...
95,Metal,Pantera,Domination,(First take like a motherfucker) Agony is the ...
96,Metal,Pantera,Floods,A dead issue (A dead issue) Don't wrestle with...
97,Metal,Pantera,I’m Broken,I wonder if we'll smile in our coffins While l...
98,Metal,Pantera,Hollow,What's left inside him? Don't he remember us? ...


In [9]:
metal_df.to_csv('metal_lyrics.csv', index=False)

In [10]:
# List of file names
file_names = ['blues_lyrics.csv', 'country_lyrics.csv', 'edm_lyrics.csv', 'hiphop_rap_lyrics.csv', 'jazz_lyrics.csv', 'metal_lyrics.csv', 'pop_lyrics.csv', 'reggae_lyrics.csv', 'RNB_Soul_lyrics.csv', 'rock_lyrics.csv']

# Create an empty list to store the dataframes
dfs = []

# Read each CSV file and append its dataframe to the list
for file in file_names:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate the dataframes into one
merged_df = pd.concat(dfs, ignore_index=True)

# Display the merged dataframe
display(merged_df)

# Save the merged dataframe to a CSV file
merged_df.to_csv('lyrics.csv', index=False)

Unnamed: 0,Genre,Artist,Song Name,Lyrics
0,Blues,B.B. King,I’m Working On the Building,It's a true foundation I'm lifting up a bloods...
1,Blues,B.B. King,Sweet Little Angel,I love the way she spread her wings Yes got a ...
2,Blues,B.B. King,Everyday I Have the Blues,"Everyday, everyday I have the blues Ooooh, eve..."
3,Blues,B.B. King,You Upset Me Baby,"28 in the waist, 44 in the hips She got a real..."
4,Blues,B.B. King,Three O’Clock Blues,And I can't even close my eyes Three o'clock i...
...,...,...,...,...
995,Rock,Guns N' Roses,Used to Love Her,"I used to love her, but I had to kill her I us..."
996,Rock,Guns N' Roses,This I Love,"And now, I don't know why she wouldn't say goo..."
997,Rock,Guns N' Roses,Don’t Cry (Alternate Lyrics),"If we could see tomorrow, what of your plans? ..."
998,Rock,Guns N' Roses,Patience,"One, two, one, two, three, four *Whistling* Sh..."
