In [None]:
import os
from dotenv import load_dotenv
import lyricsgenius
import re
import pandas as pd
import asyncio
import time
import aiohttp
import nest_asyncio

In [None]:
dotenv_path = "../.env"  
load_dotenv(dotenv_path=dotenv_path)

token = os.getenv("GENIUS_CLIENT_ACCESS")
genius = lyricsgenius.Genius(token)
genius.verbose = False 
genius.remove_section_headers = True
genius.timeout=10

# Using Normal Method

In [None]:
# def getLyrics(artist, song):
#     start_time = time.time()
#     time.sleep(1)  
#     try:
#         result = genius.search_song(song, artist)
#         if result:
#             # Extract lyrics after the word 'Lyrics' (if present)
#             lyrics = result.lyrics
#             lyrics_start = lyrics.lower().find("lyrics")
#             if lyrics_start != -1:
#                 lyrics = lyrics[lyrics_start + len("Lyrics"):].strip()
            
#             # Print the time taken for the request
#             elapsed_time = time.time() - start_time
#             print(f"Time taken for '{song}' by {artist}: {elapsed_time:.2f} seconds")
            
#             return lyrics
#         else:
#             print(f"Lyrics not found for {song} by {artist}")
#             return "Lyrics not found"
#     except Exception as e:
#         print(f"Error fetching lyrics for {song} by {artist}: {e}")
#         return f"Error: {e}"

In [None]:
# file_path = "../spotify_tracks_50.csv"  
# df = pd.read_csv(file_path)

# # Ensure column names match your CSV
# if "artist_name" not in df.columns or "track_name" not in df.columns:
#     raise ValueError("CSV must have 'artist' and 'song' columns!")

# # Fetch lyrics for each row
# df["lyrics"] = df.apply(lambda row: getLyrics(row["artist_name"], row["track_name"]), axis=1)
# print(df.head(5))
# df.to_csv("spotify_tracks_50_with_lyrics.csv", index=False)

# Using Async (Not working well currently)

In [None]:
file_path = "../spotify_tracks_50.csv"  
df = pd.read_csv(file_path)

nest_asyncio.apply()

# Function to fetch lyrics asynchronously
async def fetch_lyrics(artist, song):
    loop = asyncio.get_running_loop()
    await asyncio.sleep(2)  # Add delay to avoid rate limit

    try:
        result = await loop.run_in_executor(None, genius.search_song, song, artist)
        if result:
            lyrics = result.lyrics
            lyrics_start = lyrics.lower().find("lyrics")
            if lyrics_start != -1:
                lyrics = lyrics[lyrics_start + len("lyrics"):].strip()
            return lyrics
        else:
            return "Lyrics not found"
    except Exception as e:
        print(f"Error fetching lyrics for {song} by {artist}: {e}")
        return "Error fetching lyrics"
    
# Function to process the dataset asynchronously
async def process_dataset(df):
    start_time = time.time()
    tasks = [fetch_lyrics(row["artist_name"], row["track_name"]) for _, row in df.iterrows()]
    lyrics_results = await asyncio.gather(*tasks)
    elapsed_time = time.time() - start_time
    print(f"Time taken for {len(df)} songs: {elapsed_time:.2f} seconds")

    df["lyrics"] = lyrics_results
    return df

# If running in Jupyter, use 'await' instead of 'asyncio.run()'
loop = asyncio.get_event_loop()
updated_df = await process_dataset(df)