Spotify app redirect URI: http://localhost:8888/callback

### Key Components of Data Science

- **Data Collection**: Gathering information from various sources.
- **Data Preprocessing**: Cleaning and organizing raw data into a suitable format for analysis.
- **Exploratory Data Analysis (EDA)**: Analyzing the data to summarize main characteristics and visualize trends.
- **Machine Learning**: Building models to make predictions or understand patterns.
- **Data Visualization**: Presenting data in graphical form to help stakeholders understand the findings.

In [23]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import requests
import os

client_id = os.getenv('SPOTIFY_CLIENT_ID')
client_secret = os.getenv('SPOTIFY_CLIENT_SECRET')

In [31]:
import requests
import base64

def get_spotify_token(client_id, client_secret):
    client_creds = f"{client_id}:{client_secret}"
    # client_creds_b64 = base64.b64encode(client_creds.encode()).decode()  # Ensure this is decoded back to string
    client_creds_b64 = "ZWUxYzg1OWZhMTgwNDIyMzhiODIwM2IxMWNlOTMzMjU6ZDUwMjgxOTM2OWJhNDVjYWExYWJlYWRmYjIxYTc3YWU="   # TODO

    url = 'https://accounts.spotify.com/api/token'
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'Authorization': f'Basic {client_creds_b64}'
    }
    payload = {'grant_type': 'client_credentials'}

    response = requests.post(url, headers=headers, data=payload)
    if response.status_code != 200:
        print("Failed to fetch token:")
        print("Status Code:", response.status_code)
        print("Response:", response.text)
    else:
        print(response.json())
    token = response.json().get('access_token')
    return token

token = get_spotify_token(client_id, client_secret)

{'access_token': 'BQAkck3Sv219zqQEXlA1D6Jf-bo2M7eztDmoV-LknvsRnxfry0h0CE6IStcInWhcuCf_QoS6UpastRFir6iW4_kqbKthq6xZuULyfMmsCRY2OaDbToQ', 'token_type': 'Bearer', 'expires_in': 3600}


In [32]:
def fetch_tracks(token, search_query):
    url = f"https://api.spotify.com/v1/search?q={search_query}&type=track&limit=20"
    headers = {'Authorization': f'Bearer {token}'}
    response = requests.get(url, headers=headers)
    tracks = response.json()
    return tracks

tracks = fetch_tracks(token, 'Beatles')
print(tracks)

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Beatles&type=track&offset=0&limit=20', 'items': [{'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3WrFJ7ztbogyGnTHbHJFl2'}, 'href': 'https://api.spotify.com/v1/artists/3WrFJ7ztbogyGnTHbHJFl2', 'id': '3WrFJ7ztbogyGnTHbHJFl2', 'name': 'The Beatles', 'type': 'artist', 'uri': 'spotify:artist:3WrFJ7ztbogyGnTHbHJFl2'}], 'available_markets': ['AR', 'AU', 'AT', 'BE', 'BO', 'BR', 'BG', 'CA', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DK', 'DO', 'DE', 'EC', 'EE', 'SV', 'FI', 'FR', 'GR', 'GT', 'HN', 'HK', 'HU', 'IS', 'IE', 'IT', 'LV', 'LT', 'LU', 'MY', 'MT', 'MX', 'NL', 'NZ', 'NI', 'NO', 'PA', 'PY', 'PE', 'PH', 'PL', 'PT', 'SG', 'SK', 'ES', 'SE', 'CH', 'TW', 'TR', 'UY', 'US', 'GB', 'AD', 'LI', 'MC', 'ID', 'JP', 'TH', 'VN', 'RO', 'IL', 'ZA', 'SA', 'AE', 'BH', 'QA', 'OM', 'KW', 'EG', 'MA', 'DZ', 'TN', 'LB', 'JO', 'PS', 'IN', 'BY', 'KZ', 'MD', 'UA', 'AL', 'BA', 'HR', 'ME', 'MK', 'RS', 'SI', '

In [None]:
import requests
import pandas as pd

# Function to get an access token from Spotify
def get_spotify_token(client_id, client_secret):
    url = 'https://accounts.spotify.com/api/token'
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    payload = {'grant_type': 'client_credentials'}
    response = requests.post(url, headers=headers, data=payload, auth=(client_id, client_secret))
    token = response.json().get('access_token')
    return token

# Function to fetch track details and audio features
def fetch_track_data(track_id, token):
    headers = {'Authorization': f'Bearer {token}'}
    track_url = f"https://api.spotify.com/v1/tracks/{track_id}"
    features_url = f"https://api.spotify.com/v1/audio-features/{track_id}"
    
    # Fetch track details
    track_response = requests.get(track_url, headers=headers)
    track_details = track_response.json()
    
    # Fetch audio features
    features_response = requests.get(features_url, headers=headers)
    features_details = features_response.json()
    
    # Combine both responses
    track_data = {
        'track_id': track_id,
        'artist_name': track_details.get('artists', [{}])[0].get('name', ''),
        'track_name': track_details.get('name', ''),
        'album_name': track_details.get('album', {}).get('name', ''),
        'acousticness': features_details.get('acousticness', 0),
        'danceability': features_details.get('danceability', 0),
        'energy': features_details.get('energy', 0),
        'instrumentalness': features_details.get('instrumentalness', 0),
        'liveness': features_details.get('liveness', 0),
        'loudness': features_details.get('loudness', 0),
        'speechiness': features_details.get('speechiness', 0),
        'tempo': features_details.get('tempo', 0),
        'valence': features_details.get('valence', 0)
    }
    return track_data

# Main script to process playlists and build the dataset
def build_dataset(client_id, client_secret, playlist_data, max_songs=5000):
    token = get_spotify_token(client_id, client_secret)
    unique_tracks = set()
    data = []

    # Iterate through playlists in the dataset
    for playlist in playlist_data:
        if len(data) >= max_songs:
            break
        for track in playlist['tracks']:
            track_id = track['track_uri'].split(':')[-1]
            if track_id not in unique_tracks:
                unique_tracks.add(track_id)
                track_data = fetch_track_data(track_id, token)
                data.append(track_data)
                if len(data) >= max_songs:
                    break

    return pd.DataFrame(data)

# Example usage
client_id = 'your_client_id_here'
client_secret = 'your_client_secret_here'
playlist_data = []  # Load your Spotify Million Playlist Dataset here

# Limit to 5000 songs and build the dataset
data = build_dataset(client_id, client_secret, playlist_data)
print(data.head())