In [78]:
import requests
import base64

In [79]:
'''add id client and client secret
(get from settings account in spotify developer dashboard after creating an app) '''

client_id = 'ebcd74a7e3cf4c7e85a95e0f1eb2141c'
client_secret = 'bbddcf5be9964144a2bb1404a3c14d23'

These credentials are obtained when a developer registers their application with Spotify’s developer dashboard. The Client ID identifies the application, while the Client Secret is a confidential key used for authentication.

In [80]:
# Base64 encode client ID and client secret
client_credentials = f'{client_id}:{client_secret}'
client_credentials_base64 = base64.b64encode(client_credentials.encode())

The client ID and secret are combined in the client_credentials variable, separated by a colon (:). Then, this string is encoded using Base64 encoding to create a secure representation of the credentials. We then proceed to request an access token from the Spotify API.

In [81]:
# request access token

token_url = 'https://accounts.spotify.com/api/token'
headers = {
    'Authorization' : f'Basic {client_credentials_base64.decode()}'
}

data = {
    'grant_type' : 'client_credentials'
}

response = requests.post(token_url, data=data, headers=headers)

It sends a POST request to the token_url (https://accounts.spotify.com/api/token) with the client credentials in the Authorization header, which is required for client authentication. The grant_type parameter is set to ‘client_credentials’ to indicate that the application is requesting an access token for the client credentials flow.

In [82]:
# obtain the access token

if response.status_code == 200:
  access_token = response.json()['access_token']
  print('Access token obtained successfully')
else:
  print('Error obtaining acces token')
  exit()

Access token obtained successfully


With the access token, the application can now make authorized requests to retrieve music data, such as tracks, albums, artists, and user information, which is fundamental for building a music recommendation system using the Spotify API and Python

In [83]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

def get_trending_playlist_data(playlist_id, access_token):
    #setup spotipy with the access token
    sp = spotipy.Spotify(auth=access_token)

    #get the playlist track
    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

    #extract relevant information and store in a list of dictionaries
    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']
    
    #get audio features for track
    audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

    #get release date of the album
    try:
        album_info = sp.album(album_id) if album_id != 'Not available' else None
        release_date = album_info['release_date'] if album_info else None
    except:
        release_date = None
    
    #get popularity of the track
    try:
        track_info = sp.track(track_id) if track_id != "Not available" else None
        popularity = track_info['popularity'] if track_info else None
    except:
        popularity = None
    
    #add additional track information to the track data
    track_data = {
        'Track Name': track_name,
        'Artists': artists,
        'Album Name' : album_name,
        'Album ID': album_id,
        'Track ID': track_id,
        'Popularity': popularity,
        'Release Date': release_date,
        'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
        'Explicit': track_info.get('explicit', None),
        'External URLs' : track_info.get('external_url', {}).get('spotify', None),
        'Danceability' : audio_features['danceability'] if audio_features else None,
        'Energy': audio_features['energy'] if audio_features else None, 
        'Key': audio_features['key'] if audio_features else None,
        'Loudness': audio_features['loudness'] if audio_features else None,
        'Mode': audio_features['mode'] if audio_features else None,
        'Speechiness': audio_features['speechiness'] if audio_features else None,
        'Acousticness' : audio_features['acousticness'] if audio_features else None,
        'Instrumentalness' : audio_features['instrumentalness'] if audio_features else None,
        'Liveness': audio_features['liveness'] if audio_features else None,
        'Valence': audio_features['valence'] if audio_features else None,
        'Tempo': audio_features['tempo'] if audio_features else None
    }

    music_data.append(track_data)

    # create pandas dataframe from the list of dictionary
    df = pd.DataFrame(music_data)
    return df

In [84]:
playlist_id = '37i9dQZF1DX76Wlfdnj7AP'

#call function to get the music data from playlist

music_df = get_trending_playlist_data(playlist_id, access_token)

In [85]:
print(music_df)

         Track Name               Artists        Album Name  \
0  PLAYA DEL INGLÉS  Quevedo, Myke Towers  PLAYA DEL INGLÉS   

                 Album ID                Track ID  Popularity Release Date  \
0  1MgW79L1nRyxWHOCu4nxR9  2t6IxTASaSFkZEt61tQ6W6          75   2022-12-15   

   Duration (ms)  Explicit External URLs  ...  Energy  Key  Loudness  Mode  \
0         237525     False          None  ...   0.736    7    -3.254     0   

   Speechiness  Acousticness  Instrumentalness  Liveness  Valence    Tempo  
0       0.0469        0.0822                 0     0.109    0.656  112.993  

[1 rows x 21 columns]


In [86]:
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       1
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


In [102]:
#build the music recommendation system using Python.

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

data = music_df

In [88]:
# function to calculate weighted popularity scores based on release date
def calculate_weighted_popularity(release_date):
    #convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    #calculate the time span between release date and today's date
    time_span = datetime.now() - release_date

    #calculate the weighted popularity score based on time span
    weight = 1 / (time_span.days + 1)
    return weight

The above function takes the release date of a music track as input, which is provided in the format ‘YYYY-MM-DD’. It then uses the datetime.strptime function from the Python datetime module to convert the release date string to a datetime object. This conversion allows us to perform arithmetic operations with dates. The function then calculates the time span between the release date of the track and the current date (today’s date) using datetime.now() – release_date. This results in a timedelta object representing the time difference between the two dates.

In [89]:
# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()

music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

We will create a hybrid recommendation system for music recommendations. The first approach will be based on recommending music based on music audio features, and the second approach will be based on recommending music based on weighted popularity.

In [90]:
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name")
        return
    
    #get the index of the input song in the music dataframe
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]

    #calculate the similarity scores based on music features
    similarity_scores = cosine_similarity([music_features[input_song_index]], music_features_scaled)

    #get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]
    
    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations


In [91]:
# a function to get hybrid recommendations based on weighted popularity
def hybrid_recommendation(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in music_df["Track Name"].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song")
        return
    
    #get content-based recommendation
    content_based_recs = content_based_recommendations(input_song_name, num_recommendations)

    #get the popularity score of the input song
    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]

    #calculate the weighted popularity score
    weighted_popularity_score = popularity_score * calculate_weighted_popularity(music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0])
    
    #combine content-based and popularity-based recommendation based on weighted popularity
    hybrid_recommendation = content_based_recs
    hybrid_recommendation = hybrid_recommendation.append(
        {
            'Track Name': input_song_name,
            'Artists' : music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0],
            'Album Name' : music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0],
            'Release Date' : music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0],
            'Popularity': weighted_popularity_score
        }, ignore_index = True
    )
    
    #sort the hybrid recommendation based on weighted popularity score
    hybrid_recommendation = hybrid_recommendation.sort_values(by='Popularity', ascending=False)

    #remove the input song from the recommendation
    hybrid_recommendation = hybrid_recommendation[hybrid_recommendation['Track Name'] != input_song_name]

    return hybrid_recommendation

The hybrid approach aims to provide more personalized and relevant recommendations by considering both the content similarity of songs and their weighted popularity. The function takes input_song_name as the input, representing the name of the song for which recommendations are to be generated. The function first calls the content_based_recommendations function to get content-based recommendations for the input song. The num_recommendations parameter determines the number of content-based recommendations to be retrieved.

In [100]:
#Test

input_song_name = "Quevedo"
recommendation = hybrid_recommendation(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendation)

'Quevedo' not found in the dataset. Please enter a valid song
Hybrid recommended songs for 'Quevedo':
None
