In [5]:
from spotify_functions import *
from get_album_counts import *

artist_name = "Beck"
artist_id = get_artist_id(artist_name, access_token)
if artist_id:
    album_count, total_tracks = get_album_counts(artist_id, access_token)
    print(f"Artist: {artist_name}")
    print(f"Number of Albums: {album_count}")
    print(f"Total Number of Tracks: {total_tracks}")
else:
    print(f"Artist {artist_name} not found.")

KeyError: 'artists'

In [2]:
from get_album_counts import *
from spotify_functions import *

def get_all_artists_album_count(csv_path, min_pause=0.5, max_pause=2.0, token_refresh_minutes=45):
    # Read the CSV file
    artists_df = pd.read_csv(csv_path)

    # Initialize list to store results
    album_data = []

    # Get initial access token and track time
    access_token = get_access_token()
    token_start_time = time.time()
    token_refresh_seconds = token_refresh_minutes * 60

    # Iterate through each artist with progress bar
    for idx, row in tqdm(artists_df.iterrows(), total=len(artists_df), desc="Fetching Album Counts"):
        # Check if we need to refresh the access token
        if time.time() - token_start_time > token_refresh_seconds:
            print(f"\nRefreshing access token after {token_refresh_minutes} minutes...")
            access_token = get_access_token()
            token_start_time = time.time()
            print("Access token refreshed successfully!")

        #artist_name = row['artist']
        artist_id = row['artist_id']

        # Get follower count
        try:
            albums = get_album_count(artist_id, access_token)
            album_data.append({
                #'artist': artist_name,
                'artist_id': artist_id,
                'album_count': album_count,
                'total_tracks': total_tracks
            })
        except Exception as e:
            print(f"Error fetching albums for {artist_id}: {e}")
            album_data.append({
                #'artist': artist_name,
                'artist_id': artist_id,
                'album_count': None,
                'total_tracks': None
            })

        # Random pause between requests (skip on last iteration)
        if idx < len(artists_df) - 1:
            pause_time = random.uniform(min_pause, max_pause)
            time.sleep(pause_time)

    # Create DataFrame from results
    followers_df = pd.DataFrame(album_data)

    return followers_df

In [7]:
#from spotify_functions import *
from get_album_counts import *

artist_id = get_all_artists_album_count("artist_followers.csv", access_token)

if artist_id:
    album_count, total_tracks = get_album_counts(artist_id, access_token)
    print(f"Artist: {artist_name}")
    print(f"Number of Albums: {album_count}")
    print(f"Total Number of Tracks: {total_tracks}")
else:
    print(f"Artist {artist_name} not found.")

Fetching Album Counts:   0%|          | 0/5121 [00:00<?, ?it/s]

Error fetching albums for 2rDxtYUzTAYJJE3Bl3Z5IN: get_album_count() takes 1 positional argument but 2 were given





TypeError: unsupported operand type(s) for -: 'float' and 'str'

### Get Album Counts for Artists in Batch (Final Code)

In [4]:
# python
import pandas as pd
import time
import random
import os
from tqdm import tqdm

from spotify_functions import get_access_token, get_artist_id
from get_album_counts import get_album_counts

access_token = get_access_token()

# Batch function with checkpoint support
def get_all_artists_album_count(csv_path, 
                                  checkpoint_file="album_counts_checkpoint.csv",
                                  checkpoint_interval=10,
                                  min_pause=0.5, 
                                  max_pause=2.0, 
                                  token_refresh_minutes=45):
    """
    Fetch album counts for all artists with checkpoint support.
    
    Args:
        csv_path: Path to CSV with artist data
        checkpoint_file: Path to save checkpoint data
        checkpoint_interval: Save checkpoint every N artists (default: 10)
        min_pause: Minimum pause between requests (default: 0.5s)
        max_pause: Maximum pause between requests (default: 2.0s)
        token_refresh_minutes: Refresh token every N minutes (default: 45)
    
    Returns:
        DataFrame with artist_id, album_count, total_tracks
    """
    # Read the CSV file
    artists_df = pd.read_csv(csv_path)
    
    # Check if checkpoint exists and load it
    start_idx = 0
    album_data = []
    
    if os.path.exists(checkpoint_file):
        print(f"Found checkpoint file: {checkpoint_file}")
        checkpoint_df = pd.read_csv(checkpoint_file)
        album_data = checkpoint_df.to_dict('records')
        start_idx = len(album_data)
        print(f"Resuming from artist #{start_idx} (already processed {start_idx} artists)")
    else:
        print("No checkpoint found. Starting from beginning.")
    
    # Get initial access token and track time
    access_token = get_access_token()
    token_start_time = time.time()
    token_refresh_seconds = token_refresh_minutes * 60
    
    # Iterate through each artist with progress bar
    total_artists = len(artists_df)
    
    for idx, row in tqdm(artists_df.iterrows(), 
                         total=total_artists, 
                         initial=start_idx,
                         desc="Fetching Album Counts"):
        
        # Skip already processed artists
        if idx < start_idx:
            continue
        
        # Refresh token if needed
        if time.time() - token_start_time > token_refresh_seconds:
            print(f"\nRefreshing access token after {token_refresh_minutes} minutes...")
            access_token = get_access_token()
            token_start_time = time.time()
            print("Access token refreshed successfully!")
        
        # Get artist_id
        artist_id = row.get('artist_id') if 'artist_id' in row.index else None
        if not artist_id:
            artist_name = row.get('artist') if 'artist' in row.index else None
            if artist_name:
                try:
                    artist_id = get_artist_id(artist_name, access_token)
                except Exception as e:
                    print(f"Error getting artist_id for {artist_name}: {e}")
        
        # Fetch album counts
        try:
            if artist_id:
                album_count, total_tracks = get_album_counts(artist_id, access_token)
            else:
                album_count, total_tracks = None, None
            
            album_data.append({
                'artist_id': artist_id,
                'album_count': album_count,
                'total_tracks': total_tracks
            })
        except Exception as e:
            print(f"Error fetching albums for {artist_id}: {e}")
            album_data.append({
                'artist_id': artist_id,
                'album_count': None,
                'total_tracks': None
            })
        
        # Save checkpoint every N artists
        if (len(album_data) % checkpoint_interval == 0):
            checkpoint_df = pd.DataFrame(album_data)
            checkpoint_df.to_csv(checkpoint_file, index=False)
            print(f"\nCheckpoint saved: {len(album_data)} artists processed")
        
        # Pause between requests (skip on last iteration)
        if idx < total_artists - 1:
            time.sleep(random.uniform(min_pause, max_pause))
    
    # Final save
    albums_df = pd.DataFrame(album_data)
    albums_df.to_csv(checkpoint_file, index=False)
    print(f"\nFinal save: {len(album_data)} artists processed")
    
    return albums_df

# Run batch and inspect results
albums_df = get_all_artists_album_count("artist_followers.csv")
print(f"\nTotal artists processed: {len(albums_df)}")
print(f"Successful: {albums_df['album_count'].notna().sum()}")
print(f"Failed: {albums_df['album_count'].isna().sum()}")
print("\nFirst few results:")
print(albums_df.head(10))

No checkpoint found. Starting from beginning.


Fetching Album Counts:   0%|          | 0/5121 [00:00<?, ?it/s]

Error fetching albums for 2rDxtYUzTAYJJE3Bl3Z5IN: Expecting value: line 1 column 1 (char 0)


Fetching Album Counts:   0%|          | 1/5121 [00:01<2:07:44,  1.50s/it]

Error fetching albums for 0WCo84qtCKfbyIf1lqQWB4: Expecting value: line 1 column 1 (char 0)


Fetching Album Counts:   0%|          | 2/5121 [00:03<2:18:17,  1.62s/it]

Error fetching albums for 0mneo6UHjcOtZBm1Tw8t67: Expecting value: line 1 column 1 (char 0)


Fetching Album Counts:   0%|          | 3/5121 [00:04<1:59:38,  1.40s/it]

Error fetching albums for 2z6qOkQVyn7h3XzUuNlRlM: Expecting value: line 1 column 1 (char 0)


Fetching Album Counts:   0%|          | 4/5121 [00:06<2:20:00,  1.64s/it]

Error fetching albums for 0dgJbQ0bKPyUXco8hEXN7X: Expecting value: line 1 column 1 (char 0)


Fetching Album Counts:   0%|          | 5/5121 [00:07<2:11:13,  1.54s/it]

Error fetching albums for 6R1kfr0GIWnwxY4zW11Vag: Expecting value: line 1 column 1 (char 0)


Fetching Album Counts:   0%|          | 6/5121 [00:09<2:22:13,  1.67s/it]

Error fetching albums for 4mw86zm4QZIL8SksdyE6OU: Expecting value: line 1 column 1 (char 0)


Fetching Album Counts:   0%|          | 7/5121 [00:10<1:54:50,  1.35s/it]

Error fetching albums for 4kYSro6naA4h99UJvo89HB: Expecting value: line 1 column 1 (char 0)


Fetching Album Counts:   0%|          | 8/5121 [00:11<1:38:51,  1.16s/it]

Error fetching albums for 1TQhvHMVoECTNs3Xxo3RMv: Expecting value: line 1 column 1 (char 0)


Fetching Album Counts:   0%|          | 8/5121 [00:11<2:05:19,  1.47s/it]


KeyboardInterrupt: 

In [None]:
# python
# When complete, save the final results with a permanent name
# (The checkpoint file can be deleted after this)

import pandas as pd
import os

checkpoint_file = "album_counts_checkpoint.csv"

if os.path.exists(checkpoint_file):
    albums_df = pd.read_csv(checkpoint_file)
    
    # Save to permanent file
    output_file = "artists_album_counts_final.csv"
    albums_df.to_csv(output_file, index=False)
    
    print(f"✓ Final results saved to: {output_file}")
    print(f"Total artists: {len(albums_df)}")
    print(f"Successful: {albums_df['album_count'].notna().sum()}")
    print(f"Failed: {albums_df['album_count'].isna().sum()}")
    
    # Optionally delete checkpoint after saving
    # os.remove(checkpoint_file)
    # print(f"✓ Checkpoint file deleted")
else:
    print("No checkpoint file found. Run the batch function first.")

## Checkpoint Management

The updated batch function includes automatic checkpoint functionality:

**Features:**
- Saves progress every 10 artists by default (configurable via `checkpoint_interval`)
- Automatically resumes from last checkpoint if interrupted
- Default checkpoint file: `album_counts_checkpoint.csv`

**Usage:**
```python
# Start fresh (will create checkpoint)
albums_df = get_all_artists_album_count("artist_followers.csv")

# If interrupted, just re-run - it automatically resumes
albums_df = get_all_artists_album_count("artist_followers.csv")

# Use custom checkpoint settings
albums_df = get_all_artists_album_count(
    "artist_followers.csv",
    checkpoint_file="my_checkpoint.csv",
    checkpoint_interval=20  # Save every 20 artists
)
```

**To start completely fresh:**
Delete the checkpoint file first:
```python
import os
if os.path.exists("album_counts_checkpoint.csv"):
    os.remove("album_counts_checkpoint.csv")
```

In [None]:
# python
# Helper functions for checkpoint management

import pandas as pd
import os

def check_checkpoint_status(checkpoint_file="album_counts_checkpoint.csv"):
    """Check the status of the current checkpoint."""
    if os.path.exists(checkpoint_file):
        df = pd.read_csv(checkpoint_file)
        print(f"Checkpoint file: {checkpoint_file}")
        print(f"Artists processed: {len(df)}")
        print(f"Successful: {df['album_count'].notna().sum()}")
        print(f"Failed: {df['album_count'].isna().sum()}")
        print(f"\nLast 5 artists processed:")
        print(df.tail())
        return df
    else:
        print(f"No checkpoint file found at: {checkpoint_file}")
        return None

def delete_checkpoint(checkpoint_file="album_counts_checkpoint.csv"):
    """Delete the checkpoint file to start fresh."""
    if os.path.exists(checkpoint_file):
        os.remove(checkpoint_file)
        print(f"Checkpoint deleted: {checkpoint_file}")
    else:
        print(f"No checkpoint file found at: {checkpoint_file}")

# Check current checkpoint status
check_checkpoint_status()

In [11]:
# python
import pandas as pd
import time
import random
from tqdm import tqdm

from spotify_functions import get_access_token, get_artist_id
from get_album_counts import get_album_counts

# Single-artist example
artist_name = "Beck"
access_token = get_access_token()
artist_id = get_artist_id(artist_name, access_token)
if artist_id:
    album_count, total_tracks = get_album_counts(artist_id, access_token)
    print(f"Artist: {artist_name}")
    print(f"Number of Albums: {album_count}")
    print(f"Total Number of Tracks: {total_tracks}")
else:
    print(f"Artist {artist_name} not found.")

# Batch function: read CSV of artists and return DataFrame with album counts
def get_all_artists_album_count(csv_path, min_pause=0.5, max_pause=2.0, token_refresh_minutes=45):
    artists_df = pd.read_csv(csv_path)
    album_data = []

    access_token = get_access_token()
    token_start_time = time.time()
    token_refresh_seconds = token_refresh_minutes * 60

    for idx, row in tqdm(artists_df.iterrows(), total=len(artists_df), desc="Fetching Album Counts"):
        # Refresh token if needed
        if time.time() - token_start_time > token_refresh_seconds:
            access_token = get_access_token()
            token_start_time = time.time()

        # Prefer explicit access via .get to avoid KeyError
        artist_id = row.get('artist_id') if 'artist_id' in row.index else None
        if not artist_id:
            artist_name = row.get('artist') if 'artist' in row.index else None
            if artist_name:
                artist_id = get_artist_id(artist_name, access_token)

        try:
            if artist_id:
                album_count, total_tracks = get_album_counts(artist_id, access_token)
            else:
                album_count, total_tracks = None, None

            album_data.append({
                'artist_id': artist_id,
                'album_count': album_count,
                'total_tracks': total_tracks
            })
        except Exception as e:
            # Keep row but mark as failed
            album_data.append({
                'artist_id': artist_id,
                'album_count': None,
                'total_tracks': None
            })

        # Pause between requests except after last
        if idx < len(artists_df) - 1:
            time.sleep(random.uniform(min_pause, max_pause))

    return pd.DataFrame(album_data)

# Run batch and inspect
albums_df = get_all_artists_album_count('artist_followers.csv')
print(albums_df.head())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [13]:
# python
# Fixed versions for two files: `spotify_functions.py` and `get_album_counts.py`

# --- File: `spotify_functions.py` ---
import requests
import json

SPOTIFY_API_BASE = "https://api.spotify.com/v1"

def get_artist_albums(artist_id, access_token, include_groups="album,single", limit=50):
    """
    Return list of album objects for the given artist_id.
    Validates HTTP status and JSON content to avoid JSONDecodeError.
    """
    url = f"{SPOTIFY_API_BASE}/artists/{artist_id}/albums"
    headers = {"Authorization": f"Bearer {access_token}"}
    params = {
        "include_groups": include_groups,
        "limit": limit
    }

    response = requests.get(url, headers=headers, params=params, timeout=10)

    # Non-OK responses -> descriptive errors
    if not response.ok:
        if response.status_code == 401:
            raise RuntimeError("Unauthorized: access token may be invalid or expired")
        if response.status_code == 429:
            retry = response.headers.get("Retry-After")
            raise RuntimeError(f"Rate limited (HTTP 429). Retry-After: {retry}")
        raise RuntimeError(f"HTTP {response.status_code} error when fetching artist albums: {response.text}")

    # Empty body (e.g. 200 with no content) -> treat as no items
    if not response.text or not response.text.strip():
        return []

    # Parse JSON safely
    try:
        data = response.json()
    except json.JSONDecodeError as e:
        # Include a snippet of the response text for debugging
        snippet = (response.text[:200] + "...") if len(response.text) > 200 else response.text
        raise RuntimeError(f"Failed to parse JSON response: {e}. Response snippet: {snippet}")

    # Return items or empty list if not present
    return data.get("items", []) if isinstance(data, dict) else []

# --- File: `get_album_counts.py` ---
def get_album_counts(artist_id, access_token):
    """
    Get number of albums and total tracks for an artist.
    Returns (album_count, total_tracks) or (None, None) on failure.
    """
    try:
        albums = get_artist_albums(artist_id, access_token)
    except Exception:
        # Returning None tuple keeps callers safe without raising JSONDecodeError
        return None, None

    if not isinstance(albums, list):
        return None, None

    album_count = len(albums)
    total_tracks = 0

    # Sum track counts, guard against missing fields
    for alb in albums:
        tracks = alb.get("total_tracks") if isinstance(alb, dict) else None
        if isinstance(tracks, int):
            total_tracks += tracks

    return album_count, total_tracks

In [14]:
# python
import pandas as pd
import time
import random
from tqdm import tqdm

from spotify_functions import get_access_token, get_artist_id
from get_album_counts import get_album_counts

# Single-artist example
artist_name = "Beck"
access_token = get_access_token()
artist_id = get_artist_id(artist_name, access_token)
if artist_id:
    album_count, total_tracks = get_album_counts(artist_id, access_token)
    print(f"Artist: {artist_name}")
    print(f"Number of Albums: {album_count}")
    print(f"Total Number of Tracks: {total_tracks}")
else:
    print(f"Artist {artist_name} not found.")

# Batch function
def get_all_artists_album_count(csv_path, min_pause=0.5, max_pause=2.0, token_refresh_minutes=45):
    artists_df = pd.read_csv(csv_path)
    album_data = []

    access_token = get_access_token()
    token_start_time = time.time()
    token_refresh_seconds = token_refresh_minutes * 60

    for idx, row in tqdm(artists_df.iterrows(), total=len(artists_df), desc="Fetching Album Counts"):
        # Refresh token if needed
        if time.time() - token_start_time > token_refresh_seconds:
            access_token = get_access_token()
            token_start_time = time.time()

        artist_id = row.get('artist_id') if 'artist_id' in row.index else None
        if not artist_id:
            artist_name = row.get('artist') if 'artist' in row.index else None
            if artist_name:
                artist_id = get_artist_id(artist_name, access_token)

        try:
            if artist_id:
                album_count, total_tracks = get_album_counts(artist_id, access_token)
            else:
                album_count, total_tracks = None, None

            album_data.append({
                'artist_id': artist_id,
                'album_count': album_count,
                'total_tracks': total_tracks
            })
        except Exception as e:
            print(f"Error fetching albums for {artist_id}: {e}")
            album_data.append({
                'artist_id': artist_id,
                'album_count': None,
                'total_tracks': None
            })

        if idx < len(artists_df) - 1:
            time.sleep(random.uniform(min_pause, max_pause))

    return pd.DataFrame(album_data)

# Run batch and inspect
albums_df = get_all_artists_album_count('artist_followers.csv')
print(albums_df.head())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [15]:
# python
# File: `spotify_functions.py`
import requests
import json

SPOTIFY_API_BASE = "https://api.spotify.com/v1"

def get_artist_albums(artist_id, access_token, include_groups="album,single", limit=50):
    """
    Return list of album objects for the given artist_id.
    Validates HTTP status and JSON content to avoid JSONDecodeError.
    """
    url = f"{SPOTIFY_API_BASE}/artists/{artist_id}/albums"
    headers = {"Authorization": f"Bearer {access_token}"}
    params = {"include_groups": include_groups, "limit": limit}

    response = requests.get(url, headers=headers, params=params, timeout=10)

    # Non-OK responses -> descriptive errors
    if not response.ok:
        if response.status_code == 401:
            raise RuntimeError("Unauthorized: access token may be invalid or expired")
        if response.status_code == 429:
            retry = response.headers.get("Retry-After")
            raise RuntimeError(f"Rate limited (HTTP 429). Retry-After: {retry}")
        raise RuntimeError(f"HTTP {response.status_code} error when fetching artist albums: {response.text}")

    # Empty body (e.g. 200 with no content) -> treat as no items
    if not response.text or not response.text.strip():
        return []

    # Parse JSON safely
    try:
        data = response.json()
    except json.JSONDecodeError as e:
        snippet = (response.text[:200] + "...") if len(response.text) > 200 else response.text
        raise RuntimeError(f"Failed to parse JSON response: {e}. Response snippet: {snippet}")

    return data.get("items", []) if isinstance(data, dict) else []


# File: `get_album_counts.py`
from spotify_functions import get_artist_albums

def get_album_counts(artist_id, access_token):
    """
    Get number of albums and total tracks for an artist.
    Returns (album_count, total_tracks) or (None, None) on failure.
    """
    try:
        albums = get_artist_albums(artist_id, access_token)
    except Exception:
        return None, None

    if not isinstance(albums, list):
        return None, None

    album_count = len(albums)
    total_tracks = 0

    for alb in albums:
        tracks = alb.get("total_tracks") if isinstance(alb, dict) else None
        if isinstance(tracks, int):
            total_tracks += tracks

    return album_count, total_tracks