In [None]:
import requests
import base64
import json
import pandas as pd
from datetime import datetime

CLIENT_ID = ''
CLIENT_SECRET = ''

def get_access_token(client_id, client_secret):
    auth_url = 'https://accounts.spotify.com/api/token'
    auth_header = base64.b64encode(f"{client_id}:{client_secret}".encode('utf-8')).decode('utf-8')
    headers = {'Authorization': f'Basic {auth_header}'}
    data = {'grant_type': 'client_credentials'}
    response = requests.post(auth_url, headers=headers, data=data)
    response.raise_for_status()
    token_info = response.json()
    return token_info['access_token']

def get_artist_details(artist_id, headers):
    artist_url = f"https://api.spotify.com/v1/artists/{artist_id}"
    response = requests.get(artist_url, headers=headers)
    response.raise_for_status()
    artist_data = response.json()

    followers = artist_data['followers']['total']
    genres = artist_data.get('genres', [])
    return followers, genres

def get_decade(release_date):
  year = int(release_date.split('-')[0])
  decade = (year // 10) * 10

def fetch_spotify_data(access_token, num_tracks=700):
    search_url = 'https://api.spotify.com/v1/search'
    headers = {'Authorization': f'Bearer {access_token}'}

    all_tracks_data = []
    limit = 50
    offset = 0

    while len(all_tracks_data) < num_tracks:
        if offset > 1000:
            offset = 0

        params = {
            'q': f'year',
            'type': 'track',
            'market': 'US',
            'limit': limit,
            'offset': offset
        }

        response = requests.get(search_url, headers=headers, params=params)

        if response.status_code != 200:
            print(f"Error fetching data: {response.status_code}. Skipping.")
            offset += limit
            continue

        search_results = response.json()
        tracks = search_results.get('tracks', {}).get('items', [])

        if not tracks:
            offset = 0
            continue

        for track in tracks:
            if len(all_tracks_data) >= num_tracks:
                break

            track_name = track['name']
            popularity = track['popularity']
            release_date = track.get('album', {}).get('release_date')
            decade = get_decade(release_date)

            if track['artists']:
                artist_info = track['artists'][0]
                artist_name = artist_info['name']
                artist_id = artist_info['id']

                try:
                    followers, genres = get_artist_details(artist_id, headers)
                    primary_genre = genres[0] if genres else 'N/A'
                except requests.exceptions.HTTPError:
                    followers = 0
                    primary_genre = 'N/A'
            else:
                artist_name = "N/A"
                followers = 0
                primary_genre = 'N/A'

            all_tracks_data.append({
                'artist': artist_name,
                'track': track_name,
                'genre': primary_genre,
                'popularity': popularity,
                'followers': followers,
                'decade': decade
            })

            print(f"Collected ({len(all_tracks_data)}/{num_tracks}): {track_name} by {artist_name} [{primary_genre}]")

        offset += limit

    return all_tracks_data

print(" Getting access token...")
token = get_access_token(CLIENT_ID, CLIENT_SECRET)

print(f" Fetching 700 tracks for EDA...")
spotify_data = fetch_spotify_data(token, num_tracks=700)

df = pd.DataFrame(spotify_data)

df.to_csv('spotify_data_for_eda.csv', index=False)

print("\nData collected and saved to 'spotify_data_for_eda.csv'")
print("First 5 rows of your data:")
print(df.head())

 Getting access token...
 Fetching 700 tracks for EDA...
Collected (1/700): Year 3000 by Jonas Brothers [N/A]
Collected (2/700): Years by Sierra Ferrell [americana]
Collected (3/700): Year Zero by Ghost [metal]
Collected (4/700): Year 3000 by Busted [N/A]
Collected (5/700): Year 3000 by Jonas Brothers [N/A]
Collected (6/700): Year of the Cat by Al Stewart [yacht rock]
Collected (7/700): COMË N GO by Yeat [rage rap]
Collected (8/700): Yearning For Your Love by The Gap Band [funk]
Collected (9/700): Years by Sierra Ferrell [americana]
Collected (10/700): Years by John Anderson [classic country]
Collected (11/700): A Year Without Rain by Selena Gomez & The Scene [N/A]
Collected (12/700): Years Go By by Bryson Tiller [trap soul]
Collected (13/700): Yearnin’ For You by 49 Winchester [red dirt]
Collected (14/700): Year of the Optimist by Beach Bunny [N/A]
Collected (15/700): YEARNING by Nilson's [N/A]
Collected (16/700): Monëy so big by Yeat [rage rap]
Collected (17/700): Year to Be Young 19

In [None]:
print(df.head(50))

                      artist  \
0               Taylor Swift   
1               Taylor Swift   
2               Taylor Swift   
3               Taylor Swift   
4               Taylor Swift   
5               Taylor Swift   
6                 Tate McRae   
7          Sabrina Carpenter   
8               Taylor Swift   
9                       Rumi   
10                   HUNTR/X   
11              Taylor Swift   
12               Olivia Dean   
13              Taylor Swift   
14                   HUNTR/X   
15                 Saja Boys   
16         Sabrina Carpenter   
17                 Saja Boys   
18            Dream Supplier   
19              Taylor Swift   
20                   HUNTR/X   
21        Tyler, The Creator   
22              Taylor Swift   
23                   HUNTR/X   
24              Taylor Swift   
25                    Eminem   
26               Linkin Park   
27              Furacão 2000   
28                      Sade   
29                  Coldplay   
30      

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   artist      700 non-null    object
 1   track       700 non-null    object
 2   genre       700 non-null    object
 3   popularity  700 non-null    int64 
 4   followers   700 non-null    int64 
 5   decade      700 non-null    object
dtypes: int64(2), object(4)
memory usage: 32.9+ KB


In [None]:
df.describe()

Unnamed: 0,popularity,followers
count,700.0,700.0
mean,59.167143,3986602.0
std,13.21125,20104170.0
min,30.0,0.0
25%,50.0,0.0
50%,59.0,0.0
75%,67.0,0.0
max,100.0,144684800.0


In [None]:
print('shape:', df.shape)

shape: (700, 6)


In [None]:
df.isnull().sum()

Unnamed: 0,0
artist,0
track,0
genre,0
popularity,0
followers,0
decade,0


others


In [None]:
import requests
import base64
import json
import pandas as pd
from datetime import datetime

CLIENT_ID = ''
CLIENT_SECRET = ''

def get_access_token(client_id, client_secret):
    """Gets the access token from Spotify API."""
    auth_url = 'https://accounts.spotify.com/api/token'

    auth_header = base64.b64encode(f"{client_id}:{client_secret}".encode('utf-8')).decode('utf-8')
    headers = {
        'Authorization': f'Basic {auth_header}'
    }
    data = {
        'grant_type': 'client_credentials'
    }

    response = requests.post(auth_url, headers=headers, data=data)

    response.raise_for_status()

    token_info = response.json()
    return token_info['access_token']

def get_artist_followers(artist_id, headers):
    """Fetches the follower count for a specific artist."""
    artist_url = f"https://api.spotify.com/v1/artists/{artist_id}"
    response = requests.get(artist_url, headers=headers)

    response.raise_for_status()

    artist_data = response.json()
    return artist_data['followers']['total']

def get_decade(release_date):
    try:
        # Handle different date formats (e.g., '2020-01-15', '2020')
        year = int(release_date.split('-')[0])
        decade = (year // 10) * 10
        return f"{decade}s"
    except (ValueError, IndexError):
        return None


def fetch_spotify_data(access_token, num_tracks=700):
    search_url = 'https://api.spotify.com/v1/search'
    headers = {
        'Authorization': f'Bearer {access_token}'
    }

    all_tracks_data = []
    limit = 50
    offset = 0

    while len(all_tracks_data) < num_tracks: #loop
        if offset > 1000:
            offset = 0 #reset

        current_year = datetime.now().year
        search_year = current_year - (len(all_tracks_data) % (current_year - 1960))

        params = {
            'q': f'year:{search_year}',
            'type': 'track',
            'market': 'US',
            'limit': limit,
            'offset': offset
        }

        response = requests.get(search_url, headers=headers, params=params)

        if response.status_code != 200:
            print(f"Error fetching data: {response.status_code}. Skipping this request.")
            offset += limit # Move to the next page even if there's an error
            continue

        search_results = response.json()
        tracks = search_results.get('tracks', {}).get('items', [])

        if not tracks:
            offset = 0
            continue #skip

        for track in tracks:
            if len(all_tracks_data) >= num_tracks:
                break

            track_name = track['name']
            popularity = track['popularity']
            album_info = track.get('album', {})
            release_date = album_info.get('release_date')
            decade = get_decade(release_date)

            if track['artists']:
                artist_info = track['artists'][0]
                artist_name = artist_info['name']
                artist_id = artist_info['id']

                try:
                    followers = get_artist_followers(artist_id, headers)
                except requests.exceptions.HTTPError:
                    followers = 0 # Default to 0 if artist lookup fails
            else:
                artist_name = "N/A"
                followers = 0

            all_tracks_data.append({
                'artist': artist_name,
                'track': track_name,
                'popularity': popularity,
                'followers': followers,
                'decade': decade
            })

            print(f"Collected ({len(all_tracks_data)}/{num_tracks}): {track_name} by {artist_name}")

        offset += limit

    return all_tracks_data

if __name__ == "__main__":
    if CLIENT_ID == "YOUR_CLIENT_ID" or CLIENT_SECRET == "YOUR_CLIENT_SECRET":
        print(" Error: Please replace 'YOUR_CLIENT_ID' and 'YOUR_CLIENT_SECRET' with your actual Spotify credentials.")
    else:
        try:
            print(" Getting access token...")
            token = get_access_token(CLIENT_ID, CLIENT_SECRET)

            print(f" Fetching 700 tracks for EDA...")
            spotify_data = fetch_spotify_data(token, num_tracks=700)

            df = pd.DataFrame(spotify_data)

            df.to_csv('spotify_data_for_eda.csv', index=False)

            print("\nData collected and saved to 'spotify_data_for_eda.csv'")
            print("First 5 rows of your data:")
            print(df.head())

        except requests.exceptions.HTTPError as e:
            print(f"An HTTP error occurred: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

 Getting access token...
 Fetching 700 tracks for EDA...
Collected (1/700): The Fate of Ophelia by Taylor Swift
Collected (2/700): Honey by Taylor Swift
Collected (3/700): Opalite by Taylor Swift
Collected (4/700): Actually Romantic by Taylor Swift
Collected (5/700): Wi$h Li$t by Taylor Swift
Collected (6/700): Wood by Taylor Swift
Collected (7/700): TIT FOR TAT by Tate McRae
Collected (8/700): Tears by Sabrina Carpenter
Collected (9/700): CANCELLED! by Taylor Swift
Collected (10/700): Free by Rumi
Collected (11/700): How It’s Done by HUNTR/X
Collected (12/700): Eldest Daughter by Taylor Swift
Collected (13/700): Man I Need by Olivia Dean
Collected (14/700): Father Figure by Taylor Swift
Collected (15/700): Takedown by HUNTR/X
Collected (16/700): Soda Pop by Saja Boys
Collected (17/700): When Did You Get Hot? by Sabrina Carpenter
Collected (18/700): Your Idol by Saja Boys
Collected (19/700): Clean Baby Sleep White Noise (Loopable no fade) by Dream Supplier
Collected (20/700): Elizabeth