In [51]:
import requests
import random
import string
import pandas as pd
import base64
import time

# Spotify API credentials
client_id = 'f63f501d60b148f0b4836424ff5ca45f'
client_secret = 'e885b2a05c86409b868093bb60779ed4'

# Step 1: Encode Client ID and Client Secret
def get_access_token(client_id, client_secret):
    client_creds = f"{client_id}:{client_secret}"
    client_creds_b64 = base64.b64encode(client_creds.encode())
    
    token_url = 'https://accounts.spotify.com/api/token'
    headers = {
        'Authorization': f'Basic {client_creds_b64.decode()}',
    }
    data = {
        'grant_type': 'client_credentials',
    }

    r = requests.post(token_url, headers=headers, data=data)
    if r.status_code == 200:
        token = r.json()['access_token']
        return token
    else:
        raise Exception("Failed to get access token", r.status_code, r.text)

# Get the access token
access_token = get_access_token(client_id, client_secret)

# Function to generate random search queries
def get_random_query():
    return random.choice(string.ascii_lowercase)

# Function to search for random tracks
def search_random_tracks(access_token, query, limit=10):
    url = "https://api.spotify.com/v1/search"
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    params = {
        'q': query,
        'type': 'track',
        'limit': limit
    }
    
    response = requests.get(url, headers=headers, params=params, timeout=10)
    
    if response.status_code == 200:
        return response.json()['tracks']['items']
    else:
        print(f"Error searching tracks: {response.status_code} - {response.text}")
        return None

# Function to get track metadata
def get_track_data(track_id, access_token):
    url = f"https://api.spotify.com/v1/tracks/{track_id}"
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    
    response = requests.get(url, headers=headers, timeout=10)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching track data: {response.status_code} - {response.text}")
        return None

# Function to get audio features
def get_audio_features(track_id, access_token):
    url = f"https://api.spotify.com/v1/audio-features/{track_id}"
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    
    response = requests.get(url, headers=headers, timeout=10)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching audio features: {response.status_code} - {response.text}")
        return None

# Function to handle API rate limits and retry after a delay
def handle_rate_limit(response):
    if response.status_code == 429:
        retry_after = int(response.headers.get('Retry-After', 5))
        print(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
        time.sleep(retry_after)
        return True
    return False

# Function to get artist metadata (genres, popularity, followers, image)
def get_artist_metadata(artist_id, access_token):
    url = f"https://api.spotify.com/v1/artists/{artist_id}"
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    
    while True:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code == 200:
            artist_data = response.json()
            artist_image_url = artist_data['images'][0]['url'] if artist_data['images'] else None
            return {
                'genres': artist_data.get('genres', []),
                'artist_popularity': artist_data.get('popularity'),
                'artist_followers': artist_data['followers']['total'],
                'artist_external_url': artist_data['external_urls']['spotify'],
                'artist_image_url': artist_image_url
            }
        elif handle_rate_limit(response):
            continue
        else:
            print(f"Error fetching artist metadata: {response.status_code} - {response.text}")
            return {
                'genres': [],
                'artist_popularity': None,
                'artist_followers': None,
                'artist_external_url': None,
                'artist_image_url': None
            }

# Function to combine track metadata and audio features
def get_combined_data(track, access_token):
    track_id = track['id']
    audio_features = get_audio_features(track_id, access_token)
    
    if audio_features:
        artist_id = track['artists'][0]['id']
        artist_metadata = get_artist_metadata(artist_id, access_token)
        album_image_url = track['album']['images'][0]['url'] if track['album']['images'] else None
        
        track_info = {
            'track_id': track_id,
            'artists': ', '.join([artist['name'] for artist in track['artists']]),
            'album_name': track['album']['name'],
            'release_date': track['album']['release_date'],
            'album_image_url': album_image_url,
            'track_name': track['name'],
            'popularity': track['popularity'],
            'duration_ms': track['duration_ms'],
            'explicit': track['explicit'],
            'available_markets': ', '.join(track.get('available_markets', [])),
            'track_external_url': track['external_urls']['spotify'],
            'track_genre': ', '.join(artist_metadata['genres']),
            'artist_popularity': artist_metadata['artist_popularity'],
            'artist_followers': artist_metadata['artist_followers'],
            'artist_image_url': artist_metadata['artist_image_url'],
            'artist_external_url': artist_metadata['artist_external_url']
        }

        combined_data = {**track_info, **audio_features}
        return combined_data
    else:
        return None

# Initialize a list to store DataFrames
df_list = []

# Collect all possible columns from Spotify data
def collect_random_tracks(df_list, access_token, num_tracks=50):
    total_collected = 0
    
    while total_collected < num_tracks:
        query = get_random_query()
        random_tracks = search_random_tracks(access_token, query, limit=10)
        
        if random_tracks:
            for track in random_tracks:
                combined_data = get_combined_data(track, access_token)
                if combined_data:
                    df_list.append(combined_data) # Add combined data to df_list
                    total_collected += 1
                    if total_collected % 1000 == 0:
                        print(f"{total_collected} tracks collected.")
                    if len(df_list) >= num_tracks:
                        break
        
# Example of collecting tracks
collect_random_tracks(df_list, access_token, num_tracks=250000)

# Check if df_list is storing the data
print(f"Number of DataFrames in df_list: {len(df_list)}")

Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status" : 429
  }
}
Error fetching audio features: 429 - {
  "error" : {
    "status

ReadTimeout: HTTPSConnectionPool(host='api.spotify.com', port=443): Read timed out. (read timeout=10)

In [55]:
import os

file_path = r"C:\Users\keith\OneDrive\Desktop\OMSA\CSE6242\Project\random_tracks.xlsx"
random_tracks_df = pd.DataFrame(df_list)

# Display the DataFrame
display(random_tracks_df)

# Check if the file exists
if os.path.exists(file_path):
    # Import the existing file into a dataframe
    existing_data = pd.read_excel(file_path)
    
    # Append the new dataframe to the existing one
    updated_data = pd.concat([existing_data, random_tracks_df], ignore_index=True)
    
    # Export the updated dataframe to the same file
    updated_data.to_excel(file_path, index=False)
    print(f"Data has been appended and exported to {file_path}")
else:
    # If the file doesn't exist, create it with the new data
    new_data.to_excel(file_path, index=False)
    print(f"File did not exist, created {file_path} with new data")

Unnamed: 0,track_id,artists,album_name,release_date,album_image_url,track_name,popularity,duration_ms,explicit,available_markets,...,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,time_signature
0,7221xIgOnuakPdLqT0F3nP,"Post Malone, Morgan Wallen",I Had Some Help,2024-05-10,https://i.scdn.co/image/ab67616d0000b273973069...,I Had Some Help (Feat. Morgan Wallen),86,178206,True,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.0,0.245,0.731,127.986,audio_features,7221xIgOnuakPdLqT0F3nP,spotify:track:7221xIgOnuakPdLqT0F3nP,https://api.spotify.com/v1/tracks/7221xIgOnuak...,https://api.spotify.com/v1/audio-analysis/7221...,4
1,6me7F0aaZjwDo6RJ5MrfBD,Richy Mitch & The Coal Miners,RMCM,2017-05-17,https://i.scdn.co/image/ab67616d0000b2731cc17b...,Evergreen,85,87000,False,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.00416,0.109,0.504,79.109,audio_features,6me7F0aaZjwDo6RJ5MrfBD,spotify:track:6me7F0aaZjwDo6RJ5MrfBD,https://api.spotify.com/v1/tracks/6me7F0aaZjwD...,https://api.spotify.com/v1/audio-analysis/6me7...,3
2,2qSkIjg1o9h3YT9RAgYN75,Sabrina Carpenter,Espresso,2024-04-12,https://i.scdn.co/image/ab67616d0000b273659cd4...,Espresso,93,175459,True,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,6.5e-05,0.185,0.69,103.969,audio_features,2qSkIjg1o9h3YT9RAgYN75,spotify:track:2qSkIjg1o9h3YT9RAgYN75,https://api.spotify.com/v1/tracks/2qSkIjg1o9h3...,https://api.spotify.com/v1/audio-analysis/2qSk...,4
3,6gBFPUFcJLzWGx4lenP6h2,Travis Scott,Birds In The Trap Sing McKnight,2016-09-16,https://i.scdn.co/image/ab67616d0000b273f54b99...,goosebumps,86,243837,True,"AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, CY...",...,0.0,0.149,0.43,130.049,audio_features,6gBFPUFcJLzWGx4lenP6h2,spotify:track:6gBFPUFcJLzWGx4lenP6h2,https://api.spotify.com/v1/tracks/6gBFPUFcJLzW...,https://api.spotify.com/v1/audio-analysis/6gBF...,4


Data has been appended and exported to C:\Users\keith\OneDrive\Desktop\OMSA\CSE6242\Project\random_tracks.xlsx
