# Spotify Data Collection API

In [3]:
# Import necessary libraries
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time

In [4]:
# Perform authentication (ADD YOUR CREDENTIALS HERE)
client_id = "cbf123007b33453cb4218e9a25e13f4c"
client_secret = "4398a3bfb2c044f595841eba30b63556"

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

## Utility Functions 

In [22]:
# Function to get the features of the track based on ID
def get_single_track_features(track_id, playlist_id):
    """
    Function to get the features and other information of a single track.
    """

    meta = sp.track(track_id)
    features = sp.audio_features(track_id)

    # Get the metadata of the track
    name = meta["name"]
    album = meta["album"]["name"]
    artist = meta["album"]["artists"][0]["name"]
    release_date = meta["album"]["release_date"]
    length = meta["duration_ms"]
    popularity = meta["popularity"]
    genres = meta["artists"]

    # Get the audio Features of the track
    acousticness = features[0]["acousticness"]
    key = features[0]["key"]
    danceability = features[0]["danceability"]
    energy = features[0]["energy"]
    instrumentalness = features[0]["instrumentalness"]
    liveness = features[0]["liveness"]
    loudness = features[0]["loudness"]
    speechiness = features[0]["speechiness"]
    tempo = features[0]["tempo"]
    time_signature = features[0]["time_signature"]
    mode = features[0]["mode"]
    valence = features[0]["valence"]
    # Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry).

    return [
        name,
        album,
        artist,
        release_date,
        length,
        popularity,
        key,
        danceability,
        acousticness,
        danceability,
        energy,
        instrumentalness,
        liveness,
        loudness,
        speechiness,
        tempo,
        valence,
        time_signature,
        mode,
        playlist_id,
    ]

In [23]:
# Function to create a dataframe of track information
def create_track_features_dataframe(tracks: list, playlist_id):
    """Function to generate a dataframe of song features from a list of songs

    Args:
        tracks (list): List of songs to make a dataframe of

    Returns:
        pandas.DataFrame: Dataframe of song features
    """

    # Iterate through the list of tracks
    all_track_data = []
    for track in tracks:
        time.sleep(0.5)
        track_data = get_single_track_features(track, playlist_id)
        all_track_data.append(track_data)

    return pd.DataFrame(
        all_track_data,
        columns=[
            "name",
            "album",
            "artist",
            "release_date",
            "length",
            "popularity",
            "key",
            "danceability",
            "acousticness",
            "danceability",
            "energy",
            "instrumentalness",
            "liveness",
            "loudness",
            "speechiness",
            "tempo",
            "valence",
            "time_signature",
            "mode",
            "playlist_id",
        ],
    )

In [24]:
# Function to get the tracks from a playlist
def get_playlist_tracks(playlist_id):
    """Function to get the tracks from a playlist

    Args:
        playlist_id (str): ID of the playlist

    Returns:
        list: List of track IDs
    """

    playlist_tracks = sp.playlist(playlist_id, additional_types=("track",))

    track_ids = []
    for item in playlist_tracks["tracks"]["items"]:
        track = item["track"]
        track_ids.append(track["id"])

    return track_ids

### Perform Data Collection using playlists from Spotify. 

In [25]:
# Import the playlist data
playlist_df = pd.read_csv("data/decade_wise_playlist.csv")
playlist_df.shape

(26, 5)

We are looking at 26 different playlists from various different decades. 

In [26]:
playlist_df.head()

Unnamed: 0,playlist_name,playlist_id,decade,user_id,language
0,60s Bollywood Hits,6mx7rVYF6ed2JTMegQ8SY0,1960,12163512469,hindi
1,60s Hindi Hit Songs,3dKv6gpADy34FI6rcP7DAT,1960,nq6nh207vo927cq4wpquchuor,hindi
2,70s Bollywood Hits,5p88YPX4eSead8hq0OCKAp,1970,12163512469,hindi
3,70s Evergreen Hindi,1arIwnl806bdxvrgTEuvLw,1970,juhere,hindi
4,All out 70s,37i9dQZF1DX9kVlnA5Si6s,1970,spotify,hindi


In [27]:
# Make a list of playlist IDs
playlist_id_list = playlist_df["playlist_id"].to_list()
playlist_id_list

['6mx7rVYF6ed2JTMegQ8SY0',
 '3dKv6gpADy34FI6rcP7DAT',
 '5p88YPX4eSead8hq0OCKAp',
 '1arIwnl806bdxvrgTEuvLw',
 '37i9dQZF1DX9kVlnA5Si6s',
 '37i9dQZF1DX5rOEFf3Iycd',
 '37i9dQZF1DXa1eCiO3E6Rr',
 '2o7YgEXIRPoQGHyFTuoJCS',
 '4Ja9naAu5wU8LC4xanc0pK',
 '37i9dQZF1DXa2huSXaKVkW',
 '70VQP8EHThS1uGVRq0qN9Z',
 '34nx68uXAiYvsrDdD1giJa',
 '1zpooNgRto3yMMDslyB1tn',
 '2poyZdIZgxPXymgKgH45Mu',
 'qi4jxs8hlj44noibkzc8p7epu',
 '37i9dQZF1DWZNJXX2UeBij',
 '3kRi9OHXNYJeSxYjqK2qfJ',
 '2Lw2dIL7pg5haDmtUSjzPq',
 '2Kj5NUtVetggUDHPIGC9U7',
 '4z6K8rFn6rjR5gddqWs0D2',
 '1pZmVlfPBUNMS19bBy3Qap',
 '4wJLkwU84uscxJ7SOlmUX1',
 '37i9dQZF1DX7BXvxNJkwzB',
 '37i9dQZF1DX0XUfTFmNBRM',
 '37i9dQZF1DX5q67ZpWyRrZ',
 '4PS7niVvIybq4KFGLDcy6D']

In [50]:
%%time

# Create a dataframe with the first playlist ID \
print("Started the function for the first playlist..")
song_ids = get_playlist_tracks(playlist_id_list[0])
complete_playlist_data = create_track_features_dataframe(song_ids, playlist_id_list[0])


# Iterate through the playlist IDs
for playlist_id in playlist_id_list[1:]:
    print(f"Collecting data for playlist - {playlist_id}")

    try:
        # Get the tracks for a particular playlist
        song_ids = get_playlist_tracks(playlist_id)

        # Create a dataset for those tracks
        single_playlist_data = create_track_features_dataframe(song_ids, playlist_id)

        print(f"Collected {single_playlist_data.shape[0]} songs for this playlist")

        # Append to the existing data
        complete_playlist_data = pd.concat(
            [complete_playlist_data, single_playlist_data], axis=0
        )

    except Exception as e:
        print("Encountered an error so skipping.")
        print(f"Error message: {e}")
        continue

print(f"Total songs collected - {complete_playlist_data.shape[0]}")

Started the function for the first playlist..
Collecting data for playlist - 3dKv6gpADy34FI6rcP7DAT
Collected 100 songs for this playlist
Collecting data for playlist - 5p88YPX4eSead8hq0OCKAp
Collected 57 songs for this playlist
Collecting data for playlist - 1arIwnl806bdxvrgTEuvLw
Collected 45 songs for this playlist
Collecting data for playlist - 37i9dQZF1DX9kVlnA5Si6s
Collected 100 songs for this playlist
Collecting data for playlist - 37i9dQZF1DX5rOEFf3Iycd
Collected 100 songs for this playlist
Collecting data for playlist - 37i9dQZF1DXa1eCiO3E6Rr
Collected 100 songs for this playlist
Collecting data for playlist - 2o7YgEXIRPoQGHyFTuoJCS
Collected 90 songs for this playlist
Collecting data for playlist - 4Ja9naAu5wU8LC4xanc0pK
Collected 42 songs for this playlist
Collecting data for playlist - 37i9dQZF1DXa2huSXaKVkW
Collected 100 songs for this playlist
Collecting data for playlist - 70VQP8EHThS1uGVRq0qN9Z
Collected 22 songs for this playlist
Collecting data for playlist - 34nx68uX

HTTP Error for GET to https://api.spotify.com/v1/playlists/qi4jxs8hlj44noibkzc8p7epu with Params: {'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Invalid playlist Id


Collected 100 songs for this playlist
Collecting data for playlist - qi4jxs8hlj44noibkzc8p7epu
Encountered an error so skipping.
Error message: http status: 404, code:-1 - https://api.spotify.com/v1/playlists/qi4jxs8hlj44noibkzc8p7epu?additional_types=track:
 Invalid playlist Id, reason: None
Collecting data for playlist - 37i9dQZF1DWZNJXX2UeBij
Collected 100 songs for this playlist
Collecting data for playlist - 3kRi9OHXNYJeSxYjqK2qfJ
Collected 100 songs for this playlist
Collecting data for playlist - 2Lw2dIL7pg5haDmtUSjzPq
Collected 100 songs for this playlist
Collecting data for playlist - 2Kj5NUtVetggUDHPIGC9U7
Collected 67 songs for this playlist
Collecting data for playlist - 4z6K8rFn6rjR5gddqWs0D2
Collected 100 songs for this playlist
Collecting data for playlist - 1pZmVlfPBUNMS19bBy3Qap
Collected 100 songs for this playlist
Collecting data for playlist - 4wJLkwU84uscxJ7SOlmUX1
Collected 75 songs for this playlist
Collecting data for playlist - 37i9dQZF1DX7BXvxNJkwzB
Collected 

In [52]:
complete_playlist_data.to_csv("complete_playlist_data.csv", index=False)

### Process the Complete songs data

In [5]:
song_data = pd.read_csv("../data/complete_playlist_data.csv")
song_data.shape

(1903, 20)

In [6]:
# Convert song data dates to datetime
song_data

Unnamed: 0,name,album,artist,release_date,length,popularity,key,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,time_signature,mode,playlist_id
0,Ehsaan Tera Hoga Mujh Par,Junglee (Original Motion Picture Soundtrack),Various Artists,2014-01-28,208986,11,4,0.380,0.985,0.380,0.389,0.000602,0.151,-7.241,0.0419,175.505,0.490,4,1,6mx7rVYF6ed2JTMegQ8SY0
1,Chahe Mujhe Koi Junglee Kahe,Junglee (Original Motion Picture Soundtrack),Various Artists,2014-01-28,213720,12,3,0.485,0.925,0.485,0.847,0.000000,0.110,-6.312,0.0736,147.565,0.803,4,1,6mx7rVYF6ed2JTMegQ8SY0
2,Mera Saaya Saath Hoga,Mera Saaya (Original Motion Picture Soundtrack),Madan Mohan,1966-12-01,363200,12,2,0.557,0.982,0.557,0.422,0.100000,0.229,-10.230,0.0285,95.235,0.583,4,1,6mx7rVYF6ed2JTMegQ8SY0
3,Jhoomka Gira Re,Mera Saaya (Original Motion Picture Soundtrack),Madan Mohan,1966-12-01,296825,13,8,0.598,0.901,0.598,0.787,0.000191,0.812,-4.920,0.1600,93.159,0.964,4,1,6mx7rVYF6ed2JTMegQ8SY0
4,Lag Ja Gale Se Phir,Woh Kaun Thi? (Original Motion Picture Soundtr...,Madan Mohan,2013-11-01,257613,37,8,0.312,0.796,0.312,0.452,0.000874,0.286,-11.872,0.0391,132.485,0.391,3,1,6mx7rVYF6ed2JTMegQ8SY0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1898,Pyar Do Pyar Lo - Janbaaz / Soundtrack Version,Janbaaz,Various Artists,1986-01-01,332293,34,11,0.460,0.357,0.460,0.874,0.000805,0.146,-8.898,0.2980,127.471,0.646,4,0,4PS7niVvIybq4KFGLDcy6D
1899,Dance Dance,Kasam Paida Karnewale Ki (Original Motion Pict...,Bappi Lahiri,1984-11-23,339304,21,7,0.604,0.173,0.604,0.637,0.000370,0.115,-12.944,0.1990,165.033,0.384,4,1,4PS7niVvIybq4KFGLDcy6D
1900,Aap Ke Aa Jane Se,Khudgarz (Original Motion Picture Soundtrack),Rajesh Roshan,1987-08-19,432293,45,1,0.648,0.170,0.648,0.569,0.000089,0.159,-9.185,0.1500,123.614,0.678,4,0,4PS7niVvIybq4KFGLDcy6D
1901,Yeh Mera Dil Yaar Ka Diwana,Don (Original Motion Picture Soundtrack),Kalyanji-Anandji,1977-12-30,253773,34,7,0.497,0.830,0.497,0.476,0.018400,0.332,-11.248,0.0624,141.460,0.521,4,0,4PS7niVvIybq4KFGLDcy6D


In [11]:
# Add year to the data
song_data["year"] = pd.to_datetime(
    song_data["release_date"], dayfirst=True, errors="coerce"
).dt.year

# Drop the null values
song_data.dropna(inplace=True)

# Add the decade to the data
song_data["decade"] = song_data["year"].apply(lambda x: x // 10 * 10)

In [15]:
# Save the data with the new values
song_data.to_csv("../data/complete_playlist_data.csv", index=False)

In [14]:
# Create a dataframe with the decade wise data
decade_wise_data = (
    song_data.groupby(["decade"])
    .agg(
        {
            "popularity": "mean",
            "danceability": "mean",
            "acousticness": "mean",
            "energy": "mean",
            "loudness": "mean",
            "speechiness": "mean",
            "valence": "mean",
            "tempo": "mean",
            "time_signature": "mean",
            "mode": "mean",
        }
    )
    .reset_index()
)
decade_wise_data

Unnamed: 0,decade,popularity,danceability,acousticness,energy,loudness,speechiness,valence,tempo,time_signature,mode
0,1940.0,26.5,0.4295,0.9245,0.3945,-12.281,0.05325,0.652,90.0525,4.0,0.5
1,1950.0,19.375,0.446,0.933,0.466375,-10.853375,0.0554,0.68325,105.409375,3.75,0.625
2,1960.0,24.684211,0.470332,0.895984,0.472692,-10.237,0.070979,0.653656,111.418239,3.797571,0.688259
3,1970.0,21.222892,0.448536,0.8,0.519331,-9.829169,0.10099,0.581602,110.961289,3.795181,0.644578
4,1980.0,37.927273,0.503118,0.649983,0.56547,-9.837691,0.069704,0.621176,115.179477,3.895455,0.572727
5,1990.0,44.44523,0.57664,0.481292,0.595647,-11.064389,0.058888,0.680965,117.650095,3.911661,0.4947
6,2000.0,49.028698,0.649448,0.305977,0.681386,-7.980609,0.072127,0.632962,115.015132,3.969095,0.615894
7,2010.0,36.208219,0.611874,0.396002,0.666252,-7.487499,0.070099,0.621488,116.847077,3.882192,0.594521
8,2020.0,58.228758,0.626209,0.482247,0.555,-7.958405,0.065522,0.455584,114.275778,3.882353,0.705882
