In [None]:
import requests
import random
import string
import pandas as pd
import base64
import time

# Spotify API credentials
client_id = 'f63f501d60b148f0b4836424ff5ca45f'
client_secret = 'e885b2a05c86409b868093bb60779ed4'

# Step 1: Encode Client ID and Client Secret
def get_access_token(client_id, client_secret):
    client_creds = f"{client_id}:{client_secret}"
    client_creds_b64 = base64.b64encode(client_creds.encode())
    
    token_url = 'https://accounts.spotify.com/api/token'
    headers = {
        'Authorization': f'Basic {client_creds_b64.decode()}',
    }
    data = {
        'grant_type': 'client_credentials',
    }

    r = requests.post(token_url, headers=headers, data=data)
    if r.status_code == 200:
        token = r.json()['access_token']
        return token
    else:
        raise Exception("Failed to get access token", r.status_code, r.text)

# Get the access token
access_token = get_access_token(client_id, client_secret)

# Function to generate random search queries
def get_random_query():
    return random.choice(string.ascii_lowercase)

# Function to search for random tracks
def search_random_tracks(access_token, query, limit=10):
    url = "https://api.spotify.com/v1/search"
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    params = {
        'q': query,
        'type': 'track',
        'limit': limit
    }
    
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
        return response.json()['tracks']['items']
    else:
        print(f"Error searching tracks: {response.status_code} - {response.text}")
        return None

# Function to get track metadata
def get_track_data(track_id, access_token):
    url = f"https://api.spotify.com/v1/tracks/{track_id}"
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching track data: {response.status_code} - {response.text}")
        return None

# Function to get audio features
def get_audio_features(track_id, access_token):
    url = f"https://api.spotify.com/v1/audio-features/{track_id}"
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching audio features: {response.status_code} - {response.text}")
        return None

# Function to handle API rate limits and retry after a delay
def handle_rate_limit(response):
    if response.status_code == 429:
        retry_after = int(response.headers.get('Retry-After', 1))
        print(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
        time.sleep(retry_after)
        return True
    return False

# Function to get artist metadata (genres, popularity, followers, image)
def get_artist_metadata(artist_id, access_token):
    url = f"https://api.spotify.com/v1/artists/{artist_id}"
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    
    while True:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            artist_data = response.json()
            artist_image_url = artist_data['images'][0]['url'] if artist_data['images'] else None
            return {
                'genres': artist_data.get('genres', []),
                'artist_popularity': artist_data.get('popularity'),
                'artist_followers': artist_data['followers']['total'],
                'artist_external_url': artist_data['external_urls']['spotify'],
                'artist_image_url': artist_image_url
            }
        elif handle_rate_limit(response):
            continue
        else:
            print(f"Error fetching artist metadata: {response.status_code} - {response.text}")
            return {
                'genres': [],
                'artist_popularity': None,
                'artist_followers': None,
                'artist_external_url': None,
                'artist_image_url': None
            }

# Function to combine track metadata and audio features
def get_combined_data(track, access_token):
    track_id = track['id']
    audio_features = get_audio_features(track_id, access_token)
    
    if audio_features:
        artist_id = track['artists'][0]['id']
        artist_metadata = get_artist_metadata(artist_id, access_token)
        album_image_url = track['album']['images'][0]['url'] if track['album']['images'] else None
        
        track_info = {
            'track_id': track_id,
            'artists': ', '.join([artist['name'] for artist in track['artists']]),
            'album_name': track['album']['name'],
            'release_date': track['album']['release_date'],
            'album_image_url': album_image_url,
            'track_name': track['name'],
            'popularity': track['popularity'],
            'duration_ms': track['duration_ms'],
            'explicit': track['explicit'],
            'available_markets': ', '.join(track.get('available_markets', [])),
            'track_external_url': track['external_urls']['spotify'],
            'track_genre': ', '.join(artist_metadata['genres']),
            'artist_popularity': artist_metadata['artist_popularity'],
            'artist_followers': artist_metadata['artist_followers'],
            'artist_image_url': artist_metadata['artist_image_url'],
            'artist_external_url': artist_metadata['artist_external_url']
        }

        combined_data = {**track_info, **audio_features}
        return combined_data
    else:
        return None

# Initialize a list to store DataFrames
df_list = []

# Collect all possible columns from Spotify data
def collect_random_tracks(df_list, access_token, num_tracks=50):
    total_collected = 0
    
    while total_collected < num_tracks:
        query = get_random_query()
        random_tracks = search_random_tracks(access_token, query, limit=10)
        
        if random_tracks:
            for track in random_tracks:
                combined_data = get_combined_data(track, access_token)
                if combined_data:
                    df_list.append(combined_data) # Add combined data to df_list
                    total_collected += 1
                    if total_collected % 1000 == 0:
                        print(f"{total_collected} tracks collected.")
                    if len(df_list) >= num_tracks:
                        break
        
# Example of collecting tracks
collect_random_tracks(df_list, access_token, num_tracks=250000)

# Check if df_list is storing the data
print(f"Number of DataFrames in df_list: {len(df_list)}")

Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status

In [43]:
file_path = r"C:\Users\keith\OneDrive\Desktop\OMSA\CSE6242\Project\random_tracks.xlsx"
random_tracks_df = pd.DataFrame(df_list)

# Display the DataFrame
display(random_tracks_df)

# Check if the file exists
if os.path.exists(file_path):
    # Import the existing file into a dataframe
    existing_data = pd.read_excel(file_path)
    
    # Append the new dataframe to the existing one
    updated_data = pd.concat([existing_data, random_tracks_df], ignore_index=True)
    
    # Export the updated dataframe to the same file
    updated_data.to_excel(file_path, index=False)
    print(f"Data has been appended and exported to {file_path}")
else:
    # If the file doesn't exist, create it with the new data
    new_data.to_excel(file_path, index=False)
    print(f"File did not exist, created {file_path} with new data")

Unnamed: 0,track_id,artists,album_name,release_date,album_image_url,track_name,popularity,duration_ms,explicit,available_markets,...,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,time_signature
0,21B4gaTWnTkuSh77iWEXdS,Sabrina Carpenter,Short n' Sweet,2024-08-23,https://i.scdn.co/image/ab67616d0000b273fd8d7a...,Juno,88,223192,True,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.000000,0.2050,0.640,130.504,audio_features,21B4gaTWnTkuSh77iWEXdS,spotify:track:21B4gaTWnTkuSh77iWEXdS,https://api.spotify.com/v1/tracks/21B4gaTWnTku...,https://api.spotify.com/v1/audio-analysis/21B4...,4
1,0QkWikH5Z3U0f79T9iuF6c,Lady Gaga,Born This Way,2011-01-01,https://i.scdn.co/image/ab67616d0000b2734ba15b...,Judas,73,249067,False,"CA, MX, US",...,0.000016,0.3550,0.535,130.999,audio_features,0QkWikH5Z3U0f79T9iuF6c,spotify:track:0QkWikH5Z3U0f79T9iuF6c,https://api.spotify.com/v1/tracks/0QkWikH5Z3U0...,https://api.spotify.com/v1/audio-analysis/0QkW...,4
2,2mWfVxEo4xZYDaz0v7hYrN,Clairo,Charm,2024-07-12,https://i.scdn.co/image/ab67616d0000b273193c2f...,Juna,82,195110,False,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.098000,0.1060,0.448,141.512,audio_features,2mWfVxEo4xZYDaz0v7hYrN,spotify:track:2mWfVxEo4xZYDaz0v7hYrN,https://api.spotify.com/v1/tracks/2mWfVxEo4xZY...,https://api.spotify.com/v1/audio-analysis/2mWf...,4
3,1aXV8GrmQLvgoFtBPERP7E,Eyedress,Jealous,2019-12-06,https://i.scdn.co/image/ab67616d0000b2734de4da...,Jealous,83,122339,False,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.850000,0.4760,0.696,93.010,audio_features,1aXV8GrmQLvgoFtBPERP7E,spotify:track:1aXV8GrmQLvgoFtBPERP7E,https://api.spotify.com/v1/tracks/1aXV8GrmQLvg...,https://api.spotify.com/v1/audio-analysis/1aXV...,4
4,7KoyXL9zghiNpXkb5iVDyj,Junior H,Atrapado en un Sueño,2020-03-27,https://i.scdn.co/image/ab67616d0000b2739c076f...,Jueves 10,69,287929,True,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.000005,0.1060,0.538,155.293,audio_features,7KoyXL9zghiNpXkb5iVDyj,spotify:track:7KoyXL9zghiNpXkb5iVDyj,https://api.spotify.com/v1/tracks/7KoyXL9zghiN...,https://api.spotify.com/v1/audio-analysis/7Koy...,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,0WbMK4wrZ1wFSty9F7FCgu,Chappell Roan,"Good Luck, Babe!",2024-04-05,https://i.scdn.co/image/ab67616d0000b27391b4bc...,"Good Luck, Babe!",95,218424,False,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.000000,0.0881,0.785,116.712,audio_features,0WbMK4wrZ1wFSty9F7FCgu,spotify:track:0WbMK4wrZ1wFSty9F7FCgu,https://api.spotify.com/v1/tracks/0WbMK4wrZ1wF...,https://api.spotify.com/v1/audio-analysis/0WbM...,4
481,3FU6urUVsgXa6RBuV2PdRk,"Diplo, Morgan Wallen",Diplo Presents Thomas Wesley: Chapter 1 - Snak...,2020-05-29,https://i.scdn.co/image/ab67616d0000b273d79c10...,Heartless (feat. Morgan Wallen),80,169227,True,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.000000,0.0824,0.274,111.033,audio_features,3FU6urUVsgXa6RBuV2PdRk,spotify:track:3FU6urUVsgXa6RBuV2PdRk,https://api.spotify.com/v1/tracks/3FU6urUVsgXa...,https://api.spotify.com/v1/audio-analysis/3FU6...,4
482,487JCvAXVblflZyDJn3fSp,Real Boston Richey,Help Me,2024-05-31,https://i.scdn.co/image/ab67616d0000b2735844da...,Help Me,74,181166,True,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.000000,0.3290,0.564,78.530,audio_features,487JCvAXVblflZyDJn3fSp,spotify:track:487JCvAXVblflZyDJn3fSp,https://api.spotify.com/v1/tracks/487JCvAXVblf...,https://api.spotify.com/v1/audio-analysis/487J...,4
483,4EWCNWgDS8707fNSZ1oaA5,Kanye West,808s & Heartbreak,2008-11-24,https://i.scdn.co/image/ab67616d0000b273346d77...,Heartless,84,211000,False,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.000000,0.2480,0.654,87.999,audio_features,4EWCNWgDS8707fNSZ1oaA5,spotify:track:4EWCNWgDS8707fNSZ1oaA5,https://api.spotify.com/v1/tracks/4EWCNWgDS870...,https://api.spotify.com/v1/audio-analysis/4EWC...,4
