# Genius API Testing Notebook
Clean start for testing the lyricsgenius library with proper rate limiting.

In [1]:
# Install the lyricsgenius library
!pip install lyricsgenius



In [2]:
# Test if the library is installed and token is valid
import requests

# Your token
token = 'LMPk6C40OW27My1LmWJH6fczQjP9FobMs2gUpNzy9f63JUcFFLcKvEqULoo9F8dQ'

# Test 1: Check if we can import lyricsgenius
try:
    import lyricsgenius
    print("✓ lyricsgenius library imported successfully")
    # Try to get version info if available
    try:
        version = getattr(lyricsgenius, '__version__', 'version not available')
        print(f"Library version: {version}")
    except:
        print("Library version: not available")
except ImportError as e:
    print(f"✗ Cannot import lyricsgenius: {e}")
    print("Make sure it's installed: !pip install lyricsgenius")

# Test 2: Validate token with direct API call
print("\nTesting token validity...")
headers = {'Authorization': f'Bearer {token}'}
try:
    response = requests.get('https://api.genius.com/account', headers=headers)
    if response.status_code == 200:
        data = response.json()
        print("✓ Token is VALID!")
        print(f"Account: {data['response']['user']['name']}")
    elif response.status_code == 401:
        print("✗ Token is INVALID or EXPIRED")
    else:
        print(f"✗ API returned status code: {response.status_code}")
except Exception as e:
    print(f"✗ Error testing token: {e}")

✓ lyricsgenius library imported successfully


AttributeError: module 'lyricsgenius' has no attribute '__version__'

In [3]:
import lyricsgenius
import time

# Your Genius API access token
GENIUS_ACCESS_TOKEN = 'LMPk6C40OW27My1LmWJH6fczQjP9FobMs2gUpNzy9f63JUcFFLcKvEqULoo9F8dQ'

# Initialize with conservative rate limiting for educational use
# IMPORTANT: These settings prevent API blocking and ensure reliable downloads
genius = lyricsgenius.Genius(GENIUS_ACCESS_TOKEN)
genius.timeout = 20          # Wait up to 20 seconds for responses
genius.sleep_time = 2        # Wait 2 seconds between requests (REQUIRED)
genius.retries = 2           # Retry failed requests twice

# RATE LIMITING NOTES FOR ACADEMIC USE:
# - Current settings (2 sec delays) are SAFE for educational projects
# - Genius API allows faster requests, but slower = more reliable
# - If downloading large datasets (50+ songs), consider increasing delays
# - For small projects (< 20 songs), you could reduce to 1 second
# - NEVER remove rate limiting completely - you'll get blocked!

print("✓ Genius API client initialized with educational rate limiting")

✓ Genius API client initialized with rate limiting


In [None]:
# Simple test - search for one song
try:
    print("Testing API connection...")
    song = genius.search_song("Hello", "Adele")
    
    if song:
        print(f"✓ Found: {song.title} by {song.artist}")
        print("API is working!")
    else:
        print("Song not found, but API responded")
        
except Exception as e:
    print(f"✗ Error: {e}")

In [4]:
# If the simple test works, try searching for an artist
try:
    print("Searching for Taylor Swift (limited to 2 songs)...")
    time.sleep(3)  # Wait 3 seconds before making request
    
    artist = genius.search_artist("Taylor Swift", max_songs=2, sort="popularity")
    
    if artist:
        print(f"✓ Found artist: {artist.name}")
        print(f"Found {len(artist.songs)} songs:")
        for i, song in enumerate(artist.songs, 1):
            print(f"  {i}. {song.title}")
    else:
        print("Artist not found")
        
except Exception as e:
    print(f"✗ Error: {e}")
    print("This might be a rate limiting issue. Wait a few minutes and try again.")

Searching for Taylor Swift (limited to 2 songs)...
Searching for songs by Taylor Swift...

Song 1: "All Too Well (10 Minute Version) (Taylor’s Version) [From The Vault]"
Song 2: "Wood"

Reached user-specified song limit (2).
Done. Found 2 songs.
✓ Found artist: Taylor Swift
Found 2 songs:
  1. All Too Well (10 Minute Version) (Taylor’s Version) [From The Vault]
  2. Wood


In [5]:
# Get lyrics for a specific Taylor Swift song
try:
    print("Searching for a Taylor Swift song with lyrics...")
    time.sleep(2)  # Rate limiting delay
    
    # Search for a specific song
    song = genius.search_song("Shake It Off", "Taylor Swift")
    
    if song:
        print(f"✓ Found: {song.title} by {song.artist}")
        print(f"Release date: {song.year}")
        print(f"Album: {song.album}")
        
        # Get basic info about the lyrics (without displaying full text)
        if song.lyrics:
            lyrics_length = len(song.lyrics)
            word_count = len(song.lyrics.split())
            print(f"Lyrics retrieved: {lyrics_length} characters, ~{word_count} words")
            
            # Show just the first few words as a sample (respecting copyright)
            first_line = song.lyrics.split('\n')[0][:50] + "..."
            print(f"First line sample: {first_line}")
            print("\n✓ Lyrics successfully retrieved!")
            print("Note: Full lyrics not displayed to respect copyright")
        else:
            print("No lyrics found for this song")
    else:
        print("Song not found")
        
except Exception as e:
    print(f"✗ Error retrieving lyrics: {e}")
    print("This might be due to rate limiting. Try again in a moment.")

Searching for a Taylor Swift song with lyrics...
Searching for "Shake It Off" by Taylor Swift...
Searching for "Shake It Off" by Taylor Swift...
Done.
✓ Found: Shake It Off by Taylor Swift
✗ Error retrieving lyrics: 'Song' object has no attribute 'year'
This might be due to rate limiting. Try again in a moment.
Done.
✓ Found: Shake It Off by Taylor Swift
✗ Error retrieving lyrics: 'Song' object has no attribute 'year'
This might be due to rate limiting. Try again in a moment.


In [None]:
# Distant Reading Analysis of Lyrics
import re
from collections import Counter

def analyze_lyrics(song_obj):
    """
    Perform distant reading analysis on song lyrics
    Returns analytical data without displaying copyrighted content
    """
    if not song_obj or not song_obj.lyrics:
        return None
    
    lyrics = song_obj.lyrics.lower()
    
    # Basic metrics
    total_chars = len(lyrics)
    total_words = len(lyrics.split())
    lines = [line.strip() for line in lyrics.split('\n') if line.strip()]
    total_lines = len(lines)
    
    # Clean text for analysis (remove punctuation, etc.)
    clean_text = re.sub(r'[^\w\s]', '', lyrics)
    words = clean_text.split()
    
    # Word frequency analysis
    word_freq = Counter(words)
    most_common = word_freq.most_common(10)
    
    # Sentiment indicators (simple keyword counting)
    positive_words = ['love', 'happy', 'good', 'beautiful', 'amazing', 'wonderful', 'joy']
    negative_words = ['sad', 'hurt', 'pain', 'crying', 'broken', 'lonely', 'dark']
    
    positive_count = sum(word_freq[word] for word in positive_words if word in word_freq)
    negative_count = sum(word_freq[word] for word in negative_words if word in word_freq)
    
    # Repetition analysis
    unique_words = len(set(words))
    repetition_ratio = len(words) / unique_words if unique_words > 0 else 0
    
    return {
        'title': song_obj.title,
        'artist': song_obj.artist,
        'total_chars': total_chars,
        'total_words': total_words,
        'total_lines': total_lines,
        'unique_words': unique_words,
        'repetition_ratio': round(repetition_ratio, 2),
        'most_common_words': most_common,
        'positive_sentiment': positive_count,
        'negative_sentiment': negative_count,
        'avg_words_per_line': round(total_words / total_lines if total_lines > 0 else 0, 2)
    }

# Test the analysis function
try:
    print("Performing distant reading analysis on Taylor Swift song...")
    time.sleep(2)
    
    # Use the song from previous cell if available, or search for a new one
    if 'song' in locals():
        analysis = analyze_lyrics(song)
    else:
        song = genius.search_song("Anti-Hero", "Taylor Swift")
        analysis = analyze_lyrics(song)
    
    if analysis:
        print(f"\n=== Distant Reading Analysis: {analysis['title']} by {analysis['artist']} ===")
        print(f"Total characters: {analysis['total_chars']}")
        print(f"Total words: {analysis['total_words']}")
        print(f"Total lines: {analysis['total_lines']}")
        print(f"Unique words: {analysis['unique_words']}")
        print(f"Repetition ratio: {analysis['repetition_ratio']}")
        print(f"Average words per line: {analysis['avg_words_per_line']}")
        
        print(f"\nSentiment indicators:")
        print(f"Positive words: {analysis['positive_sentiment']}")
        print(f"Negative words: {analysis['negative_sentiment']}")
        
        print(f"\nMost common words:")
        for word, count in analysis['most_common_words']:
            print(f"  '{word}': {count} times")
            
        print(f"\n✓ Analysis complete - suitable for distant reading research!")
    else:
        print("Could not analyze lyrics")
        
except Exception as e:
    print(f"✗ Error in analysis: {e}")

In [10]:
# Download and access full lyrics for research
def download_lyrics_for_research(artist_name, song_title):
    """
    Download full lyrics for academic research purposes
    Returns the complete lyrics text for analysis
    """
    try:
        print(f"Downloading lyrics for research: {song_title} by {artist_name}")
        time.sleep(2)  # Rate limiting
        
        song = genius.search_song(song_title, artist_name)
        
        if song and song.lyrics:
            print(f"✓ Successfully downloaded lyrics for '{song.title}'")
            print(f"Lyrics length: {len(song.lyrics)} characters")
            
            # Return the full lyrics for your research
            # Use getattr with defaults for optional attributes
            return {
                'title': song.title,
                'artist': song.artist,
                'lyrics': song.lyrics,
                'album': getattr(song, 'album', 'Unknown'),
                'year': getattr(song, 'year', 'Unknown'),
                'url': getattr(song, 'url', '')
            }
        else:
            print("No lyrics found")
            return None
            
    except Exception as e:
        print(f"Error downloading lyrics: {e}")
        return None

# Example: Download lyrics for research
try:
    # Download a song for analysis
    song_data = download_lyrics_for_research("Taylor Swift", "Anti-Hero")
    
    if song_data:
        lyrics_text = song_data['lyrics']
        
        # Now you have the full lyrics text for your distant reading analysis
        print(f"\n✓ Lyrics ready for research analysis")
        print(f"You can now perform any text analysis on the variable 'lyrics_text'")
        print(f"Example operations:")
        print(f"- Word counts: len(lyrics_text.split())")
        print(f"- Line analysis: lyrics_text.split('\\n')")
        print(f"- Sentiment analysis, topic modeling, etc.")
        
        # Store for further analysis
        research_lyrics = lyrics_text
        
    else:
        print("Failed to download lyrics data")
        
except Exception as e:
    print(f"Error: {e}")

Downloading lyrics for research: Anti-Hero by Taylor Swift
Searching for "Anti-Hero" by Taylor Swift...
Done.
✓ Successfully downloaded lyrics for 'Anti-Hero'
Lyrics length: 1894 characters

✓ Lyrics ready for research analysis
You can now perform any text analysis on the variable 'lyrics_text'
Example operations:
- Word counts: len(lyrics_text.split())
- Line analysis: lyrics_text.split('\n')
- Sentiment analysis, topic modeling, etc.


In [12]:
# Save lyrics to text files for research
import os
import re
from datetime import datetime

def save_lyrics_to_file(song_data, folder_path="lyrics_corpus"):
    """
    Save downloaded lyrics to a .txt file for research purposes
    """
    if not song_data or not song_data['lyrics']:
        print("No lyrics data to save")
        return None
    
    # Create folder if it doesn't exist
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Created folder: {folder_path}")
    
    # Create safe filename (remove special characters)
    safe_title = re.sub(r'[^\w\s-]', '', song_data['title'])
    safe_artist = re.sub(r'[^\w\s-]', '', song_data['artist'])
    filename = f"{safe_artist} - {safe_title}.txt"
    filepath = os.path.join(folder_path, filename)
    
    try:
        with open(filepath, 'w', encoding='utf-8') as f:
            # Write metadata header
            f.write(f"Title: {song_data['title']}\n")
            f.write(f"Artist: {song_data['artist']}\n")
            f.write(f"Album: {song_data.get('album', 'Unknown')}\n")
            f.write(f"Year: {song_data.get('year', 'Unknown')}\n")
            f.write(f"Genius URL: {song_data.get('url', '')}\n")
            f.write(f"Downloaded: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write("=" * 50 + "\n\n")
            
            # Write the lyrics
            f.write(song_data['lyrics'])
        
        print(f"✓ Lyrics saved to: {filepath}")
        return filepath
        
    except Exception as e:
        print(f"✗ Error saving file: {e}")
        return None

# Example: Save the previously downloaded lyrics
try:
    if 'song_data' in locals() and song_data:
        # Save to a text file
        saved_file = save_lyrics_to_file(song_data)
        
        if saved_file:
            print(f"\n✓ Lyrics corpus file created!")
            print(f"File location: {saved_file}")
            print(f"You can now use this file for:")
            print(f"- Text analysis software")
            print(f"- Import into other research tools")
            print(f"- Building larger corpora")
    else:
        print("No song data available. Run the previous cell first.")
        
except Exception as e:
    print(f"Error: {e}")

✓ Lyrics saved to: lyrics_corpus/Taylor Swift - Anti-Hero.txt

✓ Lyrics corpus file created!
File location: lyrics_corpus/Taylor Swift - Anti-Hero.txt
You can now use this file for:
- Text analysis software
- Import into other research tools
- Building larger corpora


In [None]:
# Simple function to download lyrics for multiple songs from an album
def download_album_lyrics(artist_name, album_name, max_songs=10):
    """
    Download lyrics for songs from a specific album
    """
    print(f"Searching for {album_name} by {artist_name}...")
    time.sleep(2)
    
    try:
        # Get artist with songs
        artist = genius.search_artist(artist_name, max_songs=max_songs, sort="popularity")
        
        if not artist:
            print(f"Artist {artist_name} not found")
            return []
        
        album_songs = []
        for song in artist.songs:
            # Check if song is from the specified album
            song_album = getattr(song, 'album', '')
            if album_name.lower() in song_album.lower():
                song_data = {
                    'title': song.title,
                    'artist': song.artist,
                    'lyrics': song.lyrics,
                    'album': song_album,
                    'year': getattr(song, 'year', 'Unknown'),
                    'url': getattr(song, 'url', '')
                }
                album_songs.append(song_data)
                print(f"✓ Found: {song.title}")
                time.sleep(3)  # Rate limiting
        
        print(f"\n✓ Downloaded {len(album_songs)} songs from {album_name}")
        return album_songs
        
    except Exception as e:
        print(f"Error: {e}")
        return []

# Example usage (uncomment to use):
# album_songs = download_album_lyrics("Taylor Swift", "Midnights", max_songs=8)
# if album_songs:
#     save_lyrics_corpus(album_songs)  # Save all songs to files

## Rate Limiting Guidelines for Students

**Why We Use Rate Limiting:**
- Prevents API access from being blocked or suspended
- Shows respect for Genius API's server resources  
- Ensures reliable downloads for academic research
- Models responsible data collection practices

**Current Settings:**
- **2-3 second delays** between requests (conservative for education)
- **20 second timeouts** (handles slower connections)
- **Automatic retries** for failed requests

**When You Might Adjust Rates:**
- **Small projects (1-10 songs)**: Could reduce to `genius.sleep_time = 1`
- **Large datasets (50+ songs)**: Increase delays to `genius.sleep_time = 3-5`
- **Multiple students using same network**: Keep conservative settings
- **Personal research projects**: Could experiment with faster rates

**Important:** Always test with a few songs first before running large downloads!