In [1]:
import json
from pathlib import Path
import lyricsgenius

def load_config():
    # Get the path to config.json in the same directory
    config_path = Path('config.json')
    
    with open(config_path, 'r') as f:
        config = json.load(f)
    return config['genius']

creds = load_config()

In [2]:
# Retrieve a song lyrics
genius = lyricsgenius.Genius(creds['access_token'], verbose=False, remove_section_headers=True)
artist = genius.search_artist('Sin Bandera', max_songs=1)
print(artist.name)

Sin Bandera


In [3]:
# Retrieve a song lyrics
genius = lyricsgenius.Genius(creds['access_token'], verbose=False, remove_section_headers=True)
artist = genius.search_artist('Loveless', max_songs=1)
print(artist.name)

Loveless


In [None]:
# First code version (not run): poor to handle missing keys and None values
import pandas as pd

def get_lyrics_with_api(artist, max_songs=15):
    genius = lyricsgenius.Genius(creds['access_token'], verbose=False, remove_section_headers=True, timeout=15)
    artist = genius.search_artist(artist, max_songs=max_songs)
    result = [song.to_dict() for song in artist.songs]
    
    data = []
    for i in range(max_songs):
        data.append({
            'artist': result[i]['primary_artist_names'],
            'title': result[i]['title'],
            'release_date': result[i]['release_date'],
            'pageviews': result[i]['stats']['pageviews'],
            'album': result[i]['album']['name'],
            'lyrics': result[i]['lyrics']
        })
    
    return pd.DataFrame(data)

In [None]:
# Refined version
import pandas as pd

def get_lyrics_with_api(artist, max_songs=10):
    
    genius = lyricsgenius.Genius(creds['access_token'],
                                 verbose=False,
                                 remove_section_headers=True,
                                 timeout=15)
    
    artist = genius.search_artist(artist, max_songs=max_songs)
    
    result = [song.to_dict() for song in artist.songs] if artist else []
    
    data = []
    # Slicing never creates 'empty' spaces, so the len of result remains
    # For instance: len(result) = 3 and max_songs = 4 -> result[:4] will have 3 elements
    for song in result[:max_songs]:
        data.append({
            'artist': (song.get('primary_artist', {}) or {}).get('name', 'Unknown'),
            'title': song.get('title', 'Untitled'),
            'release_date': song.get('release_date', 'No date'),
            'pageviews': (song.get('stats', {}) or {}).get('pageviews', 0),
            'album': (song.get('album', {}) or {}).get('name', 'No album'),
            'lyrics': song.get('lyrics', 'No lyrics available')
        })
    
    return pd.DataFrame(data)

# Get the dataframes
sin_bandera = get_lyrics_with_api('Sin Bandera')
loveless = get_lyrics_with_api('Loveless')

In [3]:
# Inspection
print(sin_bandera.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   artist        10 non-null     object
 1   title         10 non-null     object
 2   release_date  10 non-null     object
 3   pageviews     10 non-null     int64 
 4   album         10 non-null     object
 5   lyrics        10 non-null     object
dtypes: int64(1), object(5)
memory usage: 612.0+ bytes
None


In [4]:
print(loveless.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   artist        10 non-null     object
 1   title         10 non-null     object
 2   release_date  10 non-null     object
 3   pageviews     10 non-null     int64 
 4   album         10 non-null     object
 5   lyrics        10 non-null     object
dtypes: int64(1), object(5)
memory usage: 612.0+ bytes
None


In [5]:
# Save the data
sin_bandera.to_csv('data/sin_bandera.csv', index=False)
loveless.to_csv('data/loveless.csv', index=False)