In [None]:
# pip install sanction
# pip install lyrics genius
# pip install python-dotenv

In [7]:
import lyricsgenius as genius
import pandas as pd
import os
from dotenv import load_dotenv

In [8]:
# Load API keys
load_dotenv()

client_id = os.getenv("CLIENT_ID")
secret = os.getenv("SECRET")
API_TOKEN = os.getenv("API_TOKEN")

In [12]:
# artist names are as found on Genius.com
FULL_ARTISTS = [
    "Bloody Hawk", "LEX (GRC)", "Logos Timis", "Eisvoleas", "Ethismos", "Negros tou Moria", "RICTA", "Immune (GRC)", 
    "12os Pithikos", "Vlospa", "Mikros Kleftis", "Tsaki", "Wang (GRC)",
    "Dani Gambino", "Katohos", "Gxhan (GRC)", "Novel 729", "Kareem Kalokoh", "Anser (GRC)", "Hawk (GRC)", "TOQUEL", 
    "Sidarta", "Jaul", "HGEMONA$", "FANN", "Voreia Asteria", "Mpelafon", "Buzz (GRC)", "Trouf", "Mad Clip", 
    "Light", "Sadam (GRC)", "Saske (GRC)", "GRIZZLE (GRC)", "Avi (GRC)", "Nume (GRC)",
    "Logos Apeili", "Sifu Versus", "Detro (GRC)", "Hatemost", "Lobo (GRC)", "ΔΠΘ | Deltapithita", "Sponty", "FLY LO (GRC)", 
    "Βέβηλος (Vevilos)", "Bong Da City", "Mente Fuerte", "Trannos", "Kidd (GRC)", "HermesHermes", "Ivan Greko", "Arab (GRC)", 
    "DAIMA", "Roi 6/12", "FY", "Lava (GRC)", "Ropex Laterio", "Strat", "RACK", "LILA (GRC)", "Sicario (GRC)", "Snik", 
    "Moose (GRC)", "Thug Slime", "YungKapa", "Half Quickie", "OGE (GRC)", "XRS (GRC)", "Skez", "Yanek (GRC)", 
    "Ghetto Queen (GRC)", "Tzamal", "Sin Boy", "APOF", "Diff (GRC)", "Sigma (GRC)", "ZiZZi", "Bossikan", "Taff",
    "Taf Lathos", "Jitano", "KOAS", "MG (GRC)", "Dirty Harry (CYP)", "Mc Daddy", "GMARK", "ATC Coco (GRC)", "Yolte (GRC)",
    "Onad (GRC)", "Block '93", "Scar (YM)", "Alecc (GRC)", "Long3", "Jolly Roger", "ATC Toro", "ATC Nico", "Ggreco (GRC)",
    "Iratus", "Zinon", "Kanon", "RNS (GRC)", "SUPREME (GRC)", "Mani (GRC)",
    "Rio (GRC)", "Μηδενιστής (Midenistis)", "Zontani Nekri", "Ypo", "Kataxthonios",
    "Taki Tsan", "Xarmanis", "Billy Sio", "Fi Vita Sigma", "Inka (GRC)", "Sakir",
    "X.T.P. (GRC)", "Joker/Two-Face", "Styl Mo", "Nesok", "ΙΖΩ (IZW)", "Arrwsto Pneuma", "Jako (GRC)",
    "Split (GRC)", "Ai Tsavouras (GRC)", "LADELE", "Expe (GRC)", "Aria (GR)", "AEON (GRC)", "Above The Hood",
    "Bitapeis", "Ημισκούμπρια (Imiskoubria)", "Anapoda Kapela", "BTK (GRC)"
    "Chemical B", "Apethantos", "Άσαρκος (Asarkos)", "Kako (GRC)", "E.P. (GRC)", "$ulee (GRC)"
]

In [10]:
genius = genius.Genius(
    API_TOKEN,
    remove_section_headers=False,
    skip_non_songs=True, # Skip pages that aren't actual songs
    verbose=False,
    timeout=15,  
    retries=3
)

In [None]:
# Change these numbers for each manual batch!
START = 40
END = 50
FILENAME = 'genius_songs_data.csv'

# Slice the list to get only the artists for this specific run
# current_batch = FULL_ARTISTS[START:END]
current_batch = FULL_ARTISTS
print(f"Processing batch from {START} to {END}...")

data = []

for artist_name in current_batch:
    try:
        print(f"Searching for {artist_name}...")
        artist = genius.search_artist(artist_name, sort='popularity', get_full_info=True)
        
        for song in artist.songs:
            featured_artist_name = None
            if song.featured_artists:
                featured_artist_name = song.featured_artists[0]['name']

            # Use .to_dict() to get raw data
            song_details = song.to_dict()
            release_date = song_details.get('release_date')
            
            # Safe access for pageviews (avoids crash if 'stats' is missing)
            stats = song_details.get('stats')
            pageviews = stats.get('pageviews') if stats else None

            row = {
                'artist': song.artist,
                'title': song.title,
                'album': song.album['name'] if song.album else None,
                'annotation_count': song.annotation_count,
                'featured_artists': featured_artist_name,
                'lyrics': song.lyrics,
                'date': release_date,
                'pageviews': pageviews
            }
            data.append(row)
            
    except Exception as e:
        print(f"Error processing {artist_name}: {e}")

if data:
    df = pd.DataFrame(data)
    
    # Check if file exists so we know if we need to write the header
    file_exists = os.path.isfile(FILENAME)
    
    # mode='a' means append (add to end) rather than 'w' (overwrite)
    # header=not file_exists means: write header ONLY if file is new
    df.to_csv(FILENAME, mode='a', index=False, header=not file_exists)
    
    print(f"Successfully appended {len(df)} songs to {FILENAME}")
else:
    print("No data found in this batch.")