In [18]:
from transformers import pipeline
classifier = pipeline("text-classification",model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True)
prediction = classifier("I love using transformers. The best part is wide range of support and its easy to use", )
print(prediction)

[[{'label': 'sadness', 'score': 0.00067926972405985}, {'label': 'joy', 'score': 0.9959298968315125}, {'label': 'love', 'score': 0.0009452439262531698}, {'label': 'anger', 'score': 0.0018055178225040436}, {'label': 'fear', 'score': 0.000411103421356529}, {'label': 'surprise', 'score': 0.00022885671933181584}]]


In [19]:
from tqdm import tqdm
tqdm.pandas()

In [5]:
import pandas as pd

In [29]:
song_df = pd.read_csv('spotify_millsongdata.csv')
movie_df = pd.read_csv('wiki_movie_plots_deduped.csv')

In [22]:
def generate_emotion_metrics(text):
    # Define the maximum sequence length
    max_seq_length = 512
    
    # Split the text into chunks
    chunks = [text[i:i+max_seq_length] for i in range(0, len(text), max_seq_length)]
    
    # Initialize dictionaries to store emotion scores
    emotion_scores_combined = {'joy': 0, 'anger': 0, 'love': 0, 'sadness': 0, 'fear': 0, 'surprise': 0}
    
    # Iterate over the chunks and generate emotion scores
    for chunk in chunks:
        classifier = pipeline("text-classification", model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True)
        emotion_scores = classifier(chunk)
        
        # Aggregate emotion scores from each chunk
        for emotion_score_dict in emotion_scores[0]:
            emotion = emotion_score_dict['label']
            score = emotion_score_dict['score']
            emotion_scores_combined[emotion] += score
    
    # Normalize the aggregated emotion scores
    total_score = sum(emotion_scores_combined.values())
    emotion_metrics = {emotion: score ,  total_score for emotion, score in emotion_scores_combined.items()}
    
    return emotion_metrics


In [23]:
def process_chunk(chunk):
    chunk['emotion_metrics'] = chunk['text'].apply(generate_emotion_metrics)
    return chunk

In [24]:
song_df = song_df.dropna(subset=['text']).drop(columns=['link'])


In [25]:
print(song_df)

             artist                   song  \
0              ABBA  Ahe's My Kind Of Girl   
1              ABBA       Andante, Andante   
2              ABBA         As Good As New   
3              ABBA                   Bang   
4              ABBA       Bang-A-Boomerang   
...             ...                    ...   
57645  Ziggy Marley          Good Old Days   
57646  Ziggy Marley          Hand To Mouth   
57647          Zwan           Come With Me   
57648          Zwan                 Desire   
57649          Zwan              Heartsong   

                                                    text  
0      Look at her face, it's a wonderful face  \r\nA...  
1      Take it easy with me, please  \r\nTouch me gen...  
2      I'll never know why I had to go  \r\nWhy I had...  
3      Making somebody happy is a question of give an...  
4      Making somebody happy is a question of give an...  
...                                                  ...  
57645  Irie days come on play  \r\

In [26]:
chunk_size = 1000

# Calculate the number of chunks
num_chunks = len(song_df) , / chunk_size + 1

# Process the DataFrame in chunks
result_chunks = []
for chunk_idx in tqdm(range(num_chunks)):
    start_idx = chunk_idx * chunk_size
    end_idx = min((chunk_idx + 1) * chunk_size, len(song_df))
    chunk = song_df.iloc[start_idx:end_idx].copy()
    result_chunks.append(process_chunk(chunk))

# Concatenate the result chunks into the final DataFrame
result_df = pd.concat(result_chunks)

100%|██████████| 58/58 [30:58:15<00:00, 1922.34s/it]


In [27]:
print(result_df)

             artist                   song  \
0              ABBA  Ahe's My Kind Of Girl   
1              ABBA       Andante, Andante   
2              ABBA         As Good As New   
3              ABBA                   Bang   
4              ABBA       Bang-A-Boomerang   
...             ...                    ...   
57645  Ziggy Marley          Good Old Days   
57646  Ziggy Marley          Hand To Mouth   
57647          Zwan           Come With Me   
57648          Zwan                 Desire   
57649          Zwan              Heartsong   

                                                    text  \
0      Look at her face, it's a wonderful face  \r\nA...   
1      Take it easy with me, please  \r\nTouch me gen...   
2      I'll never know why I had to go  \r\nWhy I had...   
3      Making somebody happy is a question of give an...   
4      Making somebody happy is a question of give an...   
...                                                  ...   
57645  Irie days come on pl

In [28]:
result_df.to_csv('songEmotion.csv', index=False)

In [31]:
movie_df = movie_df.drop_duplicates(subset=['Title', 'Plot'])
movie_df = movie_df.drop(['Origin/Ethnicity', 'Wiki Page'], axis=1)
movie_df = movie_df.dropna(subset=['Title', 'Plot'])
movie_df = movie_df[movie_df['Release Year'] > 1950]

In [32]:
print(movie_df)

       Release Year                                       Title  \
5285           1951                             The 13th Letter   
5286           1951  Abbott and Costello Meet the Invisible Man   
5287           1951                             Ace in the Hole   
5288           1951                    Across the Wide Missouri   
5289           1951                Adventures of Captain Fabian   
...             ...                                         ...   
34881          2014                           The Water Diviner   
34882          2017                          Çalgı Çengi İkimiz   
34883          2017                                Olanlar Oldu   
34884          2017                            Non-Transferable   
34885          2017                          İstanbul Kırmızısı   

                      Director  \
5285            Otto Preminger   
5286            Charles Lamont   
5287              Billy Wilder   
5288        William A. Wellman   
5289          William Mar

In [38]:
def mov_process_chunk(chunk):
    chunk['emotion_metrics'] = chunk['Plot'].apply(generate_emotion_metrics)
    return chunk

In [39]:
mov_chunk_size = 1000

# Calculate the number of chunks
mov_num_chunks = len(movie_df) , / mov_chunk_size + 1

# Process the DataFrame in chunks
mov_result_chunks = []
for mov_chunk_idx in tqdm(range(mov_num_chunks)):
    mov_start_idx = mov_chunk_idx * mov_chunk_size
    mov_end_idx = min((mov_chunk_idx + 1) * mov_chunk_size, len(movie_df))
    mov_chunk = movie_df.iloc[mov_start_idx:mov_end_idx].copy()
    mov_result_chunks.append(mov_process_chunk(mov_chunk))

# Concatenate the result chunks into the final DataFrame
mov_result_df = pd.concat(mov_result_chunks)

100%|██████████| 28/28 [34:02:44<00:00, 4377.31s/it]


In [13]:
mov_result_df.to_csv('movieEmotion.csv', index=False)

NameError: name 'mov_result_df' is not defined

In [14]:
mov_final_df = pd.read_csv('movieEmotion.csv')
song_final_df = pd.read_csv('songEmotion.csv')

mov_final_df.drop(columns=['Plot'], inplace=True)
song_final_df.drop(columns=['text'], inplace=True)

In [4]:
print(mov_final_df)
print(song_final_df)

       Release Year                                       Title  \
0              1951                             The 13th Letter   
1              1951  Abbott and Costello Meet the Invisible Man   
2              1951                             Ace in the Hole   
3              1951                    Across the Wide Missouri   
4              1951                Adventures of Captain Fabian   
...             ...                                         ...   
27865          2014                           The Water Diviner   
27866          2017                          Çalgı Çengi İkimiz   
27867          2017                                Olanlar Oldu   
27868          2017                            Non-Transferable   
27869          2017                          İstanbul Kırmızısı   

                      Director  \
0               Otto Preminger   
1               Charles Lamont   
2                 Billy Wilder   
3           William A. Wellman   
4             William Mar

In [15]:
mov_final_df.dropna(subset=['emotion_metrics'], inplace=True)
song_final_df.dropna(subset=['emotion_metrics'], inplace=True)

print(mov_final_df)
print(song_final_df)

       Release Year                                       Title  \
0              1951                             The 13th Letter   
1              1951  Abbott and Costello Meet the Invisible Man   
2              1951                             Ace in the Hole   
3              1951                    Across the Wide Missouri   
4              1951                Adventures of Captain Fabian   
...             ...                                         ...   
27865          2014                           The Water Diviner   
27866          2017                          Çalgı Çengi İkimiz   
27867          2017                                Olanlar Oldu   
27868          2017                            Non-Transferable   
27869          2017                          İstanbul Kırmızısı   

                      Director  \
0               Otto Preminger   
1               Charles Lamont   
2                 Billy Wilder   
3           William A. Wellman   
4             William Mar

In [16]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from ast import literal_eval
import random
from scipy.sparse import csr_matrix


In [17]:
def convert_to_dict(emotion_str):
    return literal_eval(emotion_str)

# calculate emotion vector
def calculate_emotion_vector(emotion_dict):
    return np.array(list(emotion_dict.values()))

# calculate emotion vectors 
mov_final_df['Emotion Vector'] = mov_final_df['emotion_metrics'].apply(convert_to_dict).apply(calculate_emotion_vector)
song_final_df['Emotion Vector'] = song_final_df['emotion_metrics'].apply(convert_to_dict).apply(calculate_emotion_vector)


In [18]:
selected_movie = 'In the Mood for Love'
selected_movie_vector = mov_final_df.loc[mov_final_df['Title'] == selected_movie, 'Emotion Vector'].values[0]

# Convert the selected movie's emotion vector to a sparse matrix
selected_movie_vector_sparse = csr_matrix(selected_movie_vector)

# Convert emotion vectors of songs to a sparse matrix
songs_sparse_matrix = csr_matrix(song_final_df['Emotion Vector'].to_list())

# Calculate cosine similarity using sparse matrix operations
cosine_similarities = cosine_similarity(selected_movie_vector_sparse, songs_sparse_matrix)

# Extract top 200 similar songs
top_200_indices = np.argsort(cosine_similarities[0])[::-1][:200]
top_200_songs = song_final_df.iloc[top_200_indices]

# Randomly select 15 songs from the top 200
random.seed(42)  # for reproducibility, remove this line if you want different results each time
selected_songs = top_200_songs.sample(n=15)[['song', 'artist']]

print("Recommended Songs:")
for song, artist in selected_songs.values:
    print(f"{song} by {artist}")

Recommended Songs:
Rock Show by Lady Gaga
Ballad For A Friend by Bob Dylan
Give The Radio Back by Alice Cooper
Don't Leave Me by Green Day
The Fugitive by Iron Maiden
My Impression Now by Guided By Voices
Fear The Voices by Alice In Chains
Take Your Hand by Usher
Fallin' From The Sky by R. Kelly
Dangerous Times by Cher
Istanbul by Morrissey
World I Used To Know by Glen Campbell
Hurts Like Heaven by Coldplay
Somebody Might Wave Back by Waterboys
Scream Until You Like It by W.A.S.P.


In [99]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from config import CLIENT_ID, CLIENT_SECRET
import requests

# Initialize Spotify client
client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID,client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager, requests_timeout=30000)

In [38]:
def get_genre(artist_name):
        # Search for the artist
    results = sp.search(q=f'artist:{artist_name}', type='artist', limit=1)
    
    # Check if any results were found
    if results['artists']['items']:
        artist_info = results['artists']['items'][0]
        if artist_info['genres']:
            return artist_info['genres'][0]  # Returning the first genre found
        else:
            return "Genre not available"
    else:
        return "Artist not found"

In [41]:
from tqdm import tqdm
import time

# Define rate limiting parameters
requests_per_minute_limit = 1000  # Adjust according to Spotify API rate limits
minute_window = 60  # seconds (1 minute)

# Track the time when the last request was made
last_request_time = None
requests_sent_this_minute = 0

# Dictionary to cache genre information for artists
genre_cache = {}

def add_genre_to_df(df):
    global last_request_time, requests_sent_this_minute

    genres = []
    for artist_name in tqdm(df['artist'], desc='Processing artists'):
        # Check if rate limit has been reached
        if requests_sent_this_minute >= requests_per_minute_limit:
            # Calculate the time to delay before making the next request
            time_since_last_request = time.time() - last_request_time
            time_to_wait = max(minute_window - time_since_last_request, 0)
            print(f"Rate limit reached. Waiting for {time_to_wait} seconds before making next request...")
            time.sleep(time_to_wait)
            requests_sent_this_minute = 0  # Reset requests count for the new minute

        # Check if genre information is cached
        if artist_name in genre_cache:
            genre = genre_cache[artist_name]
        else:
            # Make the request
            try:
                genre = get_genre(artist_name)
                genre_cache[artist_name] = genre  # Cache the genre information
            except Exception as e:
                print(f"Error fetching genre for {artist_name}: {e}")
                genre = None  # Set genre to None if retrieval fails

            # Update request tracking variables
            requests_sent_this_minute += 1
            last_request_time = time.time()

        genres.append(genre)

    df['Genre'] = genres

# Call the function with your DataFrame
add_genre_to_df(song_final_df)


Processing artists: 100%|██████████| 57650/57650 [01:31<00:00, 626.78it/s]  


In [42]:
print(song_final_df)

             artist                   song  \
0              ABBA  Ahe's My Kind Of Girl   
1              ABBA       Andante, Andante   
2              ABBA         As Good As New   
3              ABBA                   Bang   
4              ABBA       Bang-A-Boomerang   
...             ...                    ...   
57645  Ziggy Marley          Good Old Days   
57646  Ziggy Marley          Hand To Mouth   
57647          Zwan           Come With Me   
57648          Zwan                 Desire   
57649          Zwan              Heartsong   

                                         emotion_metrics  \
0      {'joy': 0.9988289208827446, 'anger': 0.0001545...   
1      {'joy': 0.6534017489030036, 'anger': 0.0038670...   
2      {'joy': 0.6008874079069335, 'anger': 0.0056411...   
3      {'joy': 0.008357059498882647, 'anger': 0.33226...   
4      {'joy': 0.01621546563623756, 'anger': 0.331428...   
...                                                  ...   
57645  {'joy': 0.9954145554

In [43]:
print(song_final_df['Genre'].unique())

['europop' 'eurodance' 'comic' 'british soul' 'album rock' 'soft rock'
 'opm' 'contemporary country' 'operatic pop' 'alternative metal'
 'bluegrass' 'new romantic' 'christian music' 'classical tenor'
 'adult standards' 'pop' 'classic opm' 'country rock' 'canadian pop'
 'boy band' 'Genre not available' 'baroque pop' 'british invasion'
 'britpop' 'movie tunes' 'disco' 'classic soul' 'alternative rock'
 'classic rock' 'reggae' 'glam metal' 'folk rock' 'dance pop'
 'heartland rock' 'classic oklahoma country' 'british folk' 'r&b' 'celtic'
 'blues' 'permanent wave' 'tin pan alley' 'arkansas country' 'neo mellow'
 'australian rock' 'jam band' 'country' 'art rock' 'big room' 'dance rock'
 'classic indonesian rock' 'desi hip hop' 'classic country pop'
 'classic praise' 'canadian hip hop' 'metal' 'rock-and-roll'
 'indietronica' 'glam rock' 'new wave' 'detroit hip hop' 'banda'
 'classic finnish rock' 'jazz blues' 'melodic drill' 'post-grunge' 'k-pop'
 'funk metal' 'gangster rap' 'punk' 'blues roc

In [46]:
edit_song_df = song_final_df[~song_final_df['Genre'].str.contains('metal', case='False')]

In [47]:
print(edit_song_df)

             artist                   song  \
0              ABBA  Ahe's My Kind Of Girl   
1              ABBA       Andante, Andante   
2              ABBA         As Good As New   
3              ABBA                   Bang   
4              ABBA       Bang-A-Boomerang   
...             ...                    ...   
57645  Ziggy Marley          Good Old Days   
57646  Ziggy Marley          Hand To Mouth   
57647          Zwan           Come With Me   
57648          Zwan                 Desire   
57649          Zwan              Heartsong   

                                         emotion_metrics  \
0      {'joy': 0.9988289208827446, 'anger': 0.0001545...   
1      {'joy': 0.6534017489030036, 'anger': 0.0038670...   
2      {'joy': 0.6008874079069335, 'anger': 0.0056411...   
3      {'joy': 0.008357059498882647, 'anger': 0.33226...   
4      {'joy': 0.01621546563623756, 'anger': 0.331428...   
...                                                  ...   
57645  {'joy': 0.9954145554

In [49]:
rows_to_delete = ['comic' 'opm' 'operatic pop' 'christian music' 'classical tenor' 'classic opm' 'movie tunes' 'glam metal' 'celtic' 'arkansas country'
 'classic indonesian rock' 'desi hip hop' 'banda' 'british comedy' 'flamenco'
 'glee club' 'cosmic american' 'canadian celtic' 'halloween' 'calypso'
 'german metal' 'ccm' 'Artist not found' 'apostolic worship' 'hawaiian' 'gospel' 'nu metal' 'danish metal' 'classic bollywood' 'comic metal' 'horror punk' 'fantasy metal' 'classic israeli pop'  'mahraganat' 'christian power metal' 'broadway'
 'death metal'  'celtic punk' 'celtic rock' 'medieval folk' "canadian children's music" 'worship' 'christian metal' 'boston metal' 'anime' 'black metal' 'classic texas country' 'comedy rock'  'dutch metal' 'old school thrash' 'banjo' 'nasheed' 'groove metal'
 'christian hardcore' 'industrial metal' 
 'christian alternative rock']

filtered_df = edit_song_df[~edit_song_df['Genre'].isin(rows_to_delete)]


In [51]:
filtered_df.to_csv('newSongEdited.csv', index=False)

In [11]:
!pip install --upgrade selenium




In [84]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [91]:
from selenium import webdriver
import time

# Initialize Safari webdriver
driver = webdriver.Safari()

driver.get('https://chartmasters.org/most-streamed-artists-ever-on-spotify/')

time.sleep(5)

# Execute JavaScript to set the value of the dropdown menu to "United States"
driver.execute_script('''
    var dropdown = document.querySelector('select.wdt-multiselect-filter');
    dropdown.value = 'United States';
    dropdown.dispatchEvent(new Event('change', { bubbles: true }));
''')

time.sleep(5)

# Extract artist names
artist_names = driver.execute_script('''
    var names = [];
    document.querySelectorAll("table.dataTable tbody tr").forEach(function(row) {
        var name = row.querySelector("td:nth-child(3)").innerText;
        names.push(name.trim());
    });
    return names;
''')

for artist in artist_names:
    print(artist)

driver.quit()


Taylor Swift  
Up to date data
Drake  
Up to date data
Bad Bunny  
Up to date data
The Weeknd  
Up to date data
Ed Sheeran  
Up to date data
Ariana Grande  
Up to date data
Justin Bieber  
Up to date data
Eminem  
Up to date data
Post Malone  
Up to date data
BTS  
Up to date data
Kanye West  
Up to date data
J Balvin  
Up to date data
Billie Eilish  
Up to date data
Coldplay  
Up to date data
Travis Scott  
Up to date data
Dua Lipa  
Up to date data
Juice WRLD  
Up to date data
Rihanna  
Up to date data
Imagine Dragons  
Up to date data
XXXTENTACION  
Up to date data
Ozuna  
Up to date data
David Guetta  
Up to date data
Maroon 5  
Up to date data
Bruno Mars  
Up to date data
Khalid  
Up to date data


In [79]:
artists_to_add = [
    "Taylor Swift", "Ariana Grande", "Eminem", "Post Malone", "Kanye West", "Billie Eilish", "Travis Scott", 
    "Juice WRLD", "Imagine Dragons", "XXXTENTACION", "Maroon 5", "Bruno Mars", "Khalid", "Kendrick Lamar", 
    "Lana Del Rey", "Beyoncé", "Future", "Lady Gaga", "J. Cole", "Chris Brown", "The Chainsmokers", "Selena Gomez", 
    "Linkin Park", "Doja Cat", "Lil Uzi Vert", "SZA", "Marshmello", "Katy Perry", "Halsey", "Metro Boomin", 
    "21 Savage", "Milev Cyrus", "Olivia Rodrigo", "Twenty One Pilots", "Red Hot Chili Peppers", "Lil Baby", 
    "OneRepublic", "Camila Cabello", "Morgan Wallen", "Michael Jackson", "Jason Derulo", "Suicideboy$", 
    "Tyler, The Creator", "Pitbull", "Mac Miller", "Demi Lovato", "Metallica", "Pop Smoke", "Frank Ocean", 
    "The Neighbourhood", "Lil Wayne", "DaBaby", "YoungBoy Never Broke Again", "Charlie Puth", "Lil Nas X", 
    "PInk", "Young Thug", "Cardi B", "Romeo Santos", "Panic! At The Disco", "Fall Out Boy", "Wiz Khalifa", 
    "Nicky Jam", "NF", "Nirvana", "Luke Combs", "A Boogie Wit da Hoodie", "A$AP Rocky", "G-Eazy", "Major Lazer", 
    "Green Day", "Trippie Redd", "Bebe Rexha", "Tyga", "John Mayer", "Gunna", "Black Eyed Peas", "Fleetwood Mac", 
    "JAY-Z", "Lauv", "Justin Timberlake", "Britney Spears", "Logic", "50 Cent", "Kodak Black", "Playboi Carti", 
    "Guns N' Roses", "Migos", "Polo G", "DJ Khaled", "2Pac", "Machine Gun Kelly", "Ava Max", "Lil Peep", 
    "Kid Cudi", "Macklemore", "Becky G", "Diplo", "Mariah Carey", "Childish Gambino", "Flo Rida", "Usher", 
    "Fuerza Regida", "Melanie Martinez", "Russ", "Elvis Presley", "Foo Fighters", "System Of A Down", 
    "Frank Sinatra", "Meghan Trainor", "blink-182", "The Killers", "Snoop Dogg", "Arcangel", "Paramore", 
    "Madonna", "Slipknot", "blackbear", "Florida Georgia Line", "Creedence Clearwater Revival", 
    "The Notorious B.I.G.", "Bon Jovi", "Ty Dolla Sign", "Kali Uchis", "Bryson Tiller", "Luke Bryan", 
    "Whitney Houston", "Alicia Keys", "Ne-Yo", "Aventura", "Miguel", "Bruce Springsteen", "Kings of Leon", 
    "Bazzi", "Mitski", "Dr. Dre", "Sabrina Carpenter", "Five Finger Death Punch", "Kelly Clarkson", 
    "Aerosmith", "Cigarettes After Sex", "6ix9ine", "Kane Brown", "Brent Faiyaz", "Billy Joel", "Stevie Wonder", "Christina Aguilera", "Roddy Ricch", "The Strokes", 
    "Backstreet Boys", "Johnny Cash", "Disturbed", "6LACK", "Summer Walker", "Cheat Codes", 
    "Chris Stapleton", "Thomas Rhett", "Alec Benjamin", "Avenged Sevenfold", "Kesha", "Jason Aldean", 
    "Akon", "Fifth Harmony", "Prince Royce", "NLE Choppa", "Rod Wave", "Eslabon Armado", "Yeat", 
    "Marc Anthony", "Kehlani", "Steve Lacy", "Chance the Rapper", "Marvin Gaye", "H.E.R.", "Lil Yachty", 
    "Foster The People", "Pentatonix", "Pharrell Williams", "Offset", "Jhené Aiko", "Journey", 
    "Bob Dylan", "Zac Brown Band", "Anderson .Paak", "Jeremy Zucker", "The Doors", "Deftones", 
    "24kGoldn", "Bon Iver", "Sam Hunt", "Tim McGraw", "2 Chainz", "Quavo", "ScHoolboy Q", "Limp Bizkit", 
    "The Black Keys", "Trey Songz", "R.E.M.", "Papa Roach", "Giveon", "T.I.", "Why Don't We", 
    "Van Halen", "Madison Beer", "B.o.B", "TV Girl", "Evanescence", "Jimi Hendrix", "Rage Against The Machine", 
    "Oliver Tree", "Aminé", "The White Stripes", "A$AP Ferg", "Normani", "Shinedown", "KISS", "Chief Keef", 
    "Nas", "Dominic Fike", "Carrie Underwood", "All Time Low", "The Fray", "King Von", "Ella Fitzgerald", 
    "Sublime", "Alan Jackson"
]


In [98]:
import lyricsgenius
import pandas as pd

# Define your access token
token = "ViN1YLPI-LbHfONflY5n8qV03Qry-CQEozwpFvdzCa5wzPvnJYKwfpV4tGd_Vp7u"

# Initialize Genius object with your access token
genius = lyricsgenius.Genius(token)


# Define a list of artists
artists = ["Taylor Swift", "Ariana Grande", "Eminem"]

# Define a function to fetch tracks and lyrics for an artist
def fetch_tracks_and_lyrics(artist_name, max_songs=50):
    # Search for songs by the artist, sorted by popularity
    artist = genius.search_artist(artist_name, max_songs=max_songs, sort="popularity")
    if artist:
        # Create a list to store the data
        data = []

        # Iterate over the songs by the artist
        for song in artist.songs:
            # Append the track details to the data list
            data.append({
                "Track": song.title,
                "Artist": artist_name,
                "Lyrics": song.lyrics
            })

        # Return the data list
        return data
    else:
        print(f"No songs found for {artist_name}")
        return []

# Define a function to fetch tracks and lyrics for multiple artists
def fetch_tracks_and_lyrics_for_artists(artist_list, max_songs_per_artist=50):
    # Create an empty list to store the data
    all_data = []

    # Iterate over the list of artists
    for artist_name in artist_list:
        print(f"Fetching songs for {artist_name}...")
        
        # Fetch tracks and lyrics for the current artist
        artist_data = fetch_tracks_and_lyrics(artist_name, max_songs=max_songs_per_artist)
        
        # Extend the list of all data with the data for the current artist
        all_data.extend(artist_data)

    # Create a DataFrame from the collected data
    df = pd.DataFrame(all_data)

    # Return the DataFrame
    return df

# Call the function to fetch tracks and lyrics for the list of artists
df_tracks_and_lyrics = fetch_tracks_and_lyrics_for_artists(artists)

# Display the DataFrame
print(df_tracks_and_lyrics)

Fetching songs for Taylor Swift...
Searching for songs by Taylor Swift...

Song 1: "All Too Well (10 Minute Version) (Taylor’s Version) [From The Vault]"
Song 2: "All Too Well (10 Minute Version) (Taylor’s Version) [Live Acoustic]"
Song 3: "​cardigan"
Song 4: "​exile"
Song 5: "Lover"
Song 6: "Is It Over Now? (Taylor’s Version) [From The Vault]"
Song 7: "​the 1"
Song 8: "​august"
Song 9: "Cruel Summer"
Song 10: "Look What You Made Me Do"
Song 11: "​tolerate it"
Song 12: "​champagne problems"
Song 13: "​betty"
Song 14: "Anti-Hero"
Song 15: "Blank Space"
Song 16: "Style"
Song 17: "You’re Losing Me (From The Vault)"
Song 18: "​my tears ricochet"
Song 19: "“Slut!” (Taylor’s Version) [From The Vault]"
Song 20: "End Game"
Song 21: "...Ready for It?"
Song 22: "You Need To Calm Down"
Song 23: "​willow"
Song 24: "ME!"
Song 25: "You’re On Your Own, Kid"
Song 26: "​invisible string"
Song 27: "Maroon"
Song 28: "We Are Never Ever Getting Back Together"
Song 29: "All Too Well"


Timeout: Request timed out:
HTTPSConnectionPool(host='genius.com', port=443): Read timed out. (read timeout=5)

In [71]:
import requests

# Define Musixmatch API endpoint and parameters
base_url = "https://api.musixmatch.com/ws/1.1/"
api_key = "cec7f5775a096475379af996a68a6e37"  # Replace with your Musixmatch API key
artist_name = "Taylor Swift"  # Replace with the artist you want to fetch songs for

# Construct the API request URL
url = f"{base_url}artist.search?q_artist={artist_name}&page_size=5&page=1&apikey={api_key}"

# Make the API request
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Print the raw JSON response
    print(response.json())
else:
    print(f"Error: {response.status_code} - {response.reason}")


{'message': {'header': {'status_code': 200, 'execute_time': 0.024214029312134, 'available': 203}, 'body': {'artist_list': [{'artist': {'artist_id': 259675, 'artist_name': 'Taylor Swift', 'artist_name_translation_list': [{'artist_name_translation': {'language': 'JA', 'translation': 'テイラー・スウィフト'}}], 'artist_comment': '', 'artist_country': 'US', 'artist_alias_list': [{'artist_alias': 'テイラー・スウィフト'}, {'artist_alias': '泰勒·史薇芙特'}, {'artist_alias': 'Taylor Alison Swift'}, {'artist_alias': 'fu ci'}, {'artist_alias': 'da Z'}, {'artist_alias': 'Taylor Swift'}, {'artist_alias': 'Lew'}], 'artist_rating': 100, 'artist_twitter_url': 'https://twitter.com/taylorswift13', 'artist_credits': {'artist_list': []}, 'restricted': 0, 'updated_time': '2024-03-07T04:07:57Z', 'begin_date_year': '1989', 'begin_date': '1989-00-00', 'end_date_year': '', 'end_date': '0000-00-00'}}, {'artist': {'artist_id': 34690605, 'artist_name': 'Taylor Swift feat. Ed Sheeran & Future', 'artist_name_translation_list': [], 'artist_c

In [102]:
df_tracks.to_csv('topArtistsDF.csv', index='False')

In [39]:
df_tracks = pd.read_csv('topArtistsDF.csv')

In [40]:
df_tracks = df_tracks.drop_duplicates(subset=['Track', 'Artist'], keep='first')
version_keywords = ['live', 'acoustic', 'studio session', 'remix', 'demo', 'unplugged', 'the long pond studio sessions']

# Filter out rows containing version keywords in the 'Track' column
for keyword in version_keywords:
    df_tracks = df_tracks[~df_tracks['Track'].str.lower().str.contains(keyword.lower())]


In [9]:
import sys
sys.executable


'/Users/meenakshigopalakrishnan/anaconda3/envs/characterPlaylistGenerator/bin/python'

In [47]:
!pip install musixmatch
import musixmatch


Collecting musixmatch
  Downloading musixmatch-0.9.zip (31 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: musixmatch
  Building wheel for musixmatch (setup.py) ... [?25ldone
[?25h  Created wheel for musixmatch: filename=musixmatch-0.9-py3-none-any.whl size=15642 sha256=a98891245ed86a62f88ec5fb07ae72626b6932b659ec1ea2e6d23f4c1e886bd5
  Stored in directory: /Users/meenakshigopalakrishnan/Library/Caches/pip/wheels/93/4f/e9/cd1d516e9eb1f12d85a2f14d4c1b2bd007b927d802ef177875
Successfully built musixmatch
Installing collected packages: musixmatch
Successfully installed musixmatch-0.9


In [2]:
artists_to_add = [
    "Taylor Swift", "Ariana Grande", "Eminem", "Post Malone", "Kanye West", "Billie Eilish", "Travis Scott", 
    "Juice WRLD", "Imagine Dragons", "XXXTENTACION", "Maroon 5", "Bruno Mars", "Khalid", "Kendrick Lamar", 
    "Lana Del Rey", "Beyoncé", "Future", "Lady Gaga", "J. Cole", "Chris Brown", "The Chainsmokers", "Selena Gomez", 
    "Linkin Park", "Doja Cat", "Lil Uzi Vert", "SZA", "Marshmello", "Katy Perry", "Halsey", "Metro Boomin", 
    "21 Savage", "Milev Cyrus", "Olivia Rodrigo", "Twenty One Pilots", "Red Hot Chili Peppers", "Lil Baby", 
    "OneRepublic", "Camila Cabello", "Morgan Wallen", "Michael Jackson", "Jason Derulo", "Suicideboy$", 
    "Tyler, The Creator", "Pitbull", "Mac Miller", "Demi Lovato", "Metallica", "Pop Smoke", "Frank Ocean", 
    "The Neighbourhood", "Lil Wayne", "DaBaby", "YoungBoy Never Broke Again", "Charlie Puth", "Lil Nas X", 
    "PInk", "Young Thug", "Cardi B", "Romeo Santos", "Panic! At The Disco", "Fall Out Boy", "Wiz Khalifa", 
    "Nicky Jam", "NF", "Nirvana", "Luke Combs", "A Boogie Wit da Hoodie", "A$AP Rocky", "G-Eazy", "Major Lazer", 
    "Green Day", "Trippie Redd", "Bebe Rexha", "Tyga", "John Mayer", "Gunna", "Black Eyed Peas", "Fleetwood Mac", 
    "JAY-Z", "Lauv", "Justin Timberlake", "Britney Spears", "Logic", "50 Cent", "Kodak Black", "Playboi Carti", 
    "Guns N' Roses", "Migos", "Polo G", "DJ Khaled", "2Pac", "Machine Gun Kelly", "Ava Max", "Lil Peep", 
    "Kid Cudi", "Macklemore", "Becky G", "Diplo", "Mariah Carey", "Childish Gambino", "Flo Rida", "Usher", 
    "Fuerza Regida", "Melanie Martinez", "Russ", "Elvis Presley", "Foo Fighters", "System Of A Down", 
    "Frank Sinatra", "Meghan Trainor", "blink-182", "The Killers", "Snoop Dogg", "Arcangel", "Paramore", 
    "Madonna", "Slipknot", "blackbear", "Florida Georgia Line", "Creedence Clearwater Revival", 
    "The Notorious B.I.G.", "Bon Jovi", "Ty Dolla Sign", "Kali Uchis", "Bryson Tiller", "Luke Bryan", 
    "Whitney Houston", "Alicia Keys", "Ne-Yo", "Aventura", "Miguel", "Bruce Springsteen", "Kings of Leon", 
    "Bazzi", "Mitski", "Dr. Dre", "Sabrina Carpenter", "Five Finger Death Punch", "Kelly Clarkson", 
    "Aerosmith", "Cigarettes After Sex", "6ix9ine", "Kane Brown", "Brent Faiyaz", "Billy Joel", "Stevie Wonder", 
    "Christina Aguilera", "Roddy Ricch", "The Strokes", "Backstreet Boys", "Johnny Cash", "Disturbed", "6LACK", 
    "Summer Walker", "Cheat Codes", "Chris Stapleton", "Thomas Rhett", "Alec Benjamin", "Avenged Sevenfold", 
    "Kesha", "Jason Aldean", "Akon", "Fifth Harmony", "Prince Royce", "NLE Choppa", "Rod Wave", "Eslabon Armado", 
    "Yeat", "Marc Anthony", "Kehlani", "Steve Lacy", "Chance the Rapper", "Marvin Gaye", "H.E.R.", "Lil Yachty", 
    "Foster The People", "Pentatonix", "Pharrell Williams", "Offset", "Jhené Aiko", "Journey", "Bob Dylan", 
    "Zac Brown Band", "Anderson .Paak", "Jeremy Zucker", "The Doors", "Deftones", "24kGoldn", "Bon Iver", 
    "Sam Hunt", "Tim McGraw", "2 Chainz", "Quavo", "ScHoolboy Q", "Limp Bizkit", "The Black Keys", "Trey Songz", 
    "R.E.M.", "Papa Roach", "Giveon", "T.I.", "Why Don't We", "Van Halen", "Madison Beer", "B.o.B", "TV Girl", 
    "Evanescence", "Jimi Hendrix", "Rage Against The Machine", "Oliver Tree", "Aminé", "The White Stripes", 
    "A$AP Ferg", "Normani", "Shinedown", "KISS", "Chief Keef", "Nas", "Dominic Fike", "Carrie Underwood", 
    "All Time Low", "The Fray", "King Von", "Ella Fitzgerald", "Sublime", "Alan Jackson"
]

In [11]:
import requests
import pandas as pd

def get_artist_songs(artist_name):
    # Search for the artist's songs using the Genius API
    search_url = "https://genius-song-lyrics1.p.rapidapi.com/search/"
    headers = {
        "X-RapidAPI-Key": "8a9689b1eamshdd4a22bf92a824ap1fc9c8jsn30056e3b6560",
        "X-RapidAPI-Host": "genius-song-lyrics1.p.rapidapi.com"
    }
    params = {
        "q": artist_name,
        "per_page": 50,  # Number of results per page
        "page": 1        # Page number
    }
    response = requests.get(search_url, headers=headers, params=params)
    data = response.json()
    
    # Extract song titles and lyrics
    songs = []
    for hit in data.get("hits", []):
        result = hit.get("result", {})
        song_artist = result.get("primary_artist", {}).get("name", "")
        if song_artist.lower() == artist_name.lower():
            song_title = result.get("title", "")
            song_id = result.get("id", "")
            song_lyrics = get_song_lyrics(song_id)
            if song_lyrics:
                songs.append((song_artist, song_title, song_lyrics))
    
    return songs

def get_song_lyrics(song_id):
    # Get lyrics for a specific song using the Genius API
    lyrics_url = f"https://genius-song-lyrics1.p.rapidapi.com/song/lyrics/"
    headers = {
        "X-RapidAPI-Key": "8a9689b1eamshdd4a22bf92a824ap1fc9c8jsn30056e3b6560",
        "X-RapidAPI-Host": "genius-song-lyrics1.p.rapidapi.com"
    }
    params = {
        "id": song_id
    }
    response = requests.get(lyrics_url, headers=headers, params=params)
    data = response.json()
    
    return data.get("response", {}).get("lyrics", {}).get("lyrics", "")

# List of artists to fetch songs for
artists_to_add = ["Taylor Swift", "Ed Sheeran", "Adele"]

# Create an empty DataFrame to store the data
df = pd.DataFrame(columns=["Artist", "Song", "Lyrics"])

# Fetch songs for each artist and add them to the DataFrame
for artist in artists_to_add:
    songs = get_artist_songs(artist)
    if songs:
        df = df.append(pd.DataFrame(songs, columns=["Artist", "Song", "Lyrics"]), ignore_index=True)

# Display the DataFrame
print(df)


Empty DataFrame
Columns: [Artist, Song, Lyrics]
Index: []


In [26]:
import requests
import unicodedata

def get_artist_songs(artist_name):
    search_url = "https://genius-song-lyrics1.p.rapidapi.com/search/"
    headers = {
        "X-RapidAPI-Key": "8a9689b1eamshdd4a22bf92a824ap1fc9c8jsn30056e3b6560",
        "X-RapidAPI-Host": "genius-song-lyrics1.p.rapidapi.com"
    }
    params = {
        "q": artist_name,
        "per_page": 10,
        "page": 1
    }
    response = requests.get(search_url, headers=headers, params=params)
    data = response.json()

    # Extracting relevant information
    results = []
    for hit in data.get("hits", []):
        result = hit.get("result", {})
        track_name = sanitize_string(result.get("title"))
        track_info = {
            "track_name": track_name,
            "track_id": result.get("id"),
            "artist_name": result.get("primary_artist", {}).get("name"),
            "artist_id": result.get("primary_artist", {}).get("id")
        }
        results.append(track_info)

    return results

def sanitize_string(s):
    # Remove non-printable characters
    return ''.join(ch for ch in s if unicodedata.category(ch)[0] != 'C')

# Test with one artist
artist_songs = get_artist_songs("Taylor Swift")
for song in artist_songs:
    print(song)


{'track_name': 'All Too Well (10 Minute Version) (Taylor’s Version) [From The Vault]', 'track_id': 7076626, 'artist_name': 'Taylor Swift', 'artist_id': 1177}
{'track_name': 'All Too Well (10 Minute Version) (Taylor’s Version) [Live Acoustic]', 'track_id': 7394358, 'artist_name': 'Taylor Swift', 'artist_id': 1177}
{'track_name': 'cardigan', 'track_id': 5793984, 'artist_name': 'Taylor Swift', 'artist_id': 1177}
{'track_name': 'exile', 'track_id': 5793983, 'artist_name': 'Taylor Swift', 'artist_id': 1177}
{'track_name': 'Lover', 'track_id': 4508914, 'artist_name': 'Taylor Swift', 'artist_id': 1177}
{'track_name': 'the 1', 'track_id': 5794073, 'artist_name': 'Taylor Swift', 'artist_id': 1177}
{'track_name': 'Is It Over Now? (Taylor’s Version) [From The Vault]', 'track_id': 9538404, 'artist_name': 'Taylor Swift', 'artist_id': 1177}
{'track_name': 'august', 'track_id': 5793977, 'artist_name': 'Taylor Swift', 'artist_id': 1177}
{'track_name': 'Cruel Summer', 'track_id': 4712978, 'artist_name'