<a href="https://colab.research.google.com/github/ovietite/Music-Recommendation-System-using-Python/blob/main/Music_Recommendation_System_using_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import requests
import base64

In [17]:
CLIENT_ID = 'f3ac1efc083b41378b198d09ecc5bfc4'
CLIENT_SECRET = '5a52d054946e42bc8ff5245dbfd3fed6'

In [18]:
#USE BASE64 TO ENCODE THE VARIABLES HOLDING THE AUTHORIZATION CREDENTILAS
client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

In [19]:
#REQUEST THE ACCESS TOKEN
token_url = 'https://accounts.spotify.com/api/token'
headers = {'Authorization' : f'Basic {client_credentials_base64.decode()}'}
data = {'grant_type':'client_credentials'}
response = requests.post(token_url, data = data, headers=headers)

if response.status_code == 200:
  access_token = response.json()['access_token']
  print("Access token obtained successfully.")
else:
  print("Error obtaining access token.")
  exit()

Access token obtained successfully.


In [20]:

pip install spotipy



In [21]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

def get_trending_playlist_data(playlist_id, access_token):
  #set up spotipy with access token
  sp = spotipy.Spotify(auth=access_token)

  #Get the tracks from the playlist
  playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id,name, artists, album(id, name)))')

  #Extract relevant information and store in a list of dictionaries
  music_data = []
  for track_info in playlist_tracks['items']:
    track = track_info['track']
    track_name = track['name']
    artists = ','.join([artist['name'] for artist in track['artists']])
    album_name = track['album']['name']
    album_id = track['album']['id']
    track_id = track['id']

    #get audio features for the track
    audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

    #get release date of the album \
    try:
      album_info = sp.album(album_id) if album_id != 'Not available' else None
      release_date = album_info['release_date'] if album_info else None
    except:
      release_date = None

    #get the popularity of the track
    try:
      track_info = sp.track(track_id) if track_id != 'Not available' else None
      popularity = track_info['popularity'] if track_info else None
    except:
      popularity = None

    #Add additional track information to the track data
    track_data={
        'Track Name': track_name,
        'Artists' : artists,
        'Album Name' : album_name,
        'Track ID': track_id,
        'Album ID': album_id,
        'Popularity': popularity,
        'Release Date': release_date,
        'Duration (ms)':audio_features['duration_ms'] if audio_features else None,
        'Explicit': track_info.get('explicit', None),
        'External URLs': track_info.get('external_urls', {}).get('spotify', None),
        'Danceability': audio_features['danceability'] if audio_features else None,
        'Energy': audio_features['energy'] if audio_features else None,
        'Key': audio_features['key'] if audio_features else None,
        'Loudness': audio_features['loudness'] if audio_features else None,
        'Mode': audio_features['mode'] if audio_features else None,
        'Speechiness': audio_features['speechiness'] if audio_features else None,
        'Acousticness': audio_features['acousticness'] if audio_features else None,
        'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
        'Liveness': audio_features['liveness'] if audio_features else None,
        'Valence': audio_features['valence'] if audio_features else None,
        'Tempo': audio_features['tempo'] if audio_features else None,
    }
    music_data.append(track_data)



  #create a df from the list of dictionaries
  df = pd.DataFrame(music_data)

  return df





In [24]:
#we then supply the code with our target playlist
playlist_id = '37i9dQZF1EQqFPe2ux3rbj'

music_df = get_trending_playlist_data(playlist_id, access_token)

print(music_df)

                                       Track Name  \
0     UNAVAILABLE (feat. Musa Keys) - Latto Remix   
1                                           Charm   
2                                       recognize   
3                        KU LO SA - A COLORS SHOW   
4                                       Last Last   
5                                            Sere   
6        Soweto (with Don Toliver, Rema & Tempoe)   
7                                   Sip (Alcohol)   
8                                             Lie   
9                                            soso   
10                                          Ngozi   
11               Sungba (feat. Burna Boy) - Remix   
12                                           Dior   
13                                    Electricity   
14                                          Alone   
15                                         Jolene   
16                                           Baby   
17                                          Pe

In [25]:
#checking if there is null data
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Track ID            0
Album ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


In [26]:
#importing the neccessary libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

data = music_df

In [27]:
# Function to calculate weighted popularity scores based on release date
def calculate_weighted_popularity(release_date):
    # Convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    # Calculate the time span between release date and today's date
    time_span = datetime.now() - release_date

    # Calculate the weighted popularity score based on time span (e.g., more recent releases have higher weight)
    weight = 1 / (time_span.days + 1)
    return weight

In [28]:
# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key',
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

In [29]:
# a function to get content-based recommendations based on music features
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get the index of the input song in the music DataFrame
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]

    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    # Get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations

In [30]:
# a function to get hybrid recommendations based on weighted popularity
def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get content-based recommendations
    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    # Get the popularity score of the input song
    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]

    # Calculate the weighted popularity score
    weighted_popularity_score = popularity_score * calculate_weighted_popularity(music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0])

    # Combine content-based and popularity-based recommendations based on weighted popularity
    hybrid_recommendations = content_based_rec
    hybrid_recommendations = hybrid_recommendations.append({
        'Track Name': input_song_name,
        'Artists': music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0],
        'Album Name': music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0],
        'Release Date': music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0],
        'Popularity': weighted_popularity_score
    }, ignore_index=True)

    # Sort the hybrid recommendations based on weighted popularity score
    hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)

    # Remove the input song from the recommendations
    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]


    return hybrid_recommendations

In [32]:
#test scenerio

input_song_name = "Lonely At The Top"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=3)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'Lonely At The Top':
                                      Track Name  \
1                                          Charm   
2                                         PARIWO   
0  PAMI (feat. Wizkid, Adekunle Gold & Omah Lay)   

                                  Artists  \
1                                    Rema   
2                    Mohbad,Bella Shmurda   
0  DJ Tunez,Wizkid,Adekunle Gold,Omah Lay   

                                      Album Name Release Date  Popularity  
1                             Rave & Roses Ultra   2023-04-27        77.0  
2                                        Blessed   2023-06-30        66.0  
0  PAMI (feat. Wizkid, Adekunle Gold & Omah Lay)   2020-08-13        58.0  


  hybrid_recommendations = hybrid_recommendations.append({
