In [1]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from access_token import get_token

In [2]:
def get_playlist_data(playlist_id, access_token):
    # Set up Spotipy with the access token
    sp = spotipy.Spotify(auth=access_token)

    # Get the tracks from the playlist
    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

    # Extract relevant information and store in a list of dictionaries
    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        # Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

        # Get release date of the album
        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        # Get popularity of the track
        try:
            track_info = sp.track(track_id) if track_id != 'Not available' else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity = None

        # Adding additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
        }

        music_data.append(track_data)

    # Create a pandas DataFrame from the list of dictionaries
    df = pd.DataFrame(music_data)
    return df

In [4]:
playlist_id = '36rpTUKlOWpyLEQRyviwPP'

# Get access token
access_token = get_token()

# Call the function to get the music data from the playlist and store it in a DataFrame
music_df = get_playlist_data(playlist_id, access_token)

# Display the DataFrame
music_df.head()

Access token obtained successfully.


Unnamed: 0,Track Name,Artists,Album Name,Album ID,Track ID,Popularity,Release Date,Duration (ms),Explicit,External URLs,...,Energy,Key,Loudness,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo
0,Every Breath You Take,The Police,Synchronicity (Remastered 2003),5W9OT0a5iZlBr83a9WMKFY,1JSTJqkT5qHq8MDJnJbRE1,89,1983-06-17,253920,False,https://open.spotify.com/track/1JSTJqkT5qHq8MD...,...,0.452,1,-9.796,1,0.0348,0.543,0.00294,0.0714,0.74,117.401
1,Africa,TOTO,Toto IV,62U7xIHcID94o20Of5ea4D,2374M0fQpWi3dLnB54qaLX,87,1982-04-08,295893,False,https://open.spotify.com/track/2374M0fQpWi3dLn...,...,0.373,9,-18.064,1,0.0323,0.257,8e-05,0.0481,0.732,92.718
2,Wind Of Change,Scorpions,Crazy World,3dLKM8bD8R3H3XnSOXGjTF,3ovjw5HZZv43SxTwApooCM,82,1990-01-01,312360,False,https://open.spotify.com/track/3ovjw5HZZv43SxT...,...,0.524,0,-10.108,1,0.0351,0.315,0.0,0.103,0.246,151.505
3,Time After Time,Cyndi Lauper,She's So Unusual,1FvdZ1oizXwF9bxogujoF0,7o9uu2GDtVDr9nsR7ZRN73,80,1983-10-14,241333,False,https://open.spotify.com/track/7o9uu2GDtVDr9ns...,...,0.449,0,-9.206,1,0.0286,0.487,1e-06,0.0824,0.294,130.388
4,Can't Turn Back The Years - 2015 Remastered,Phil Collins,Both Sides (Deluxe Edition),3a43NX3hFXATIa89aNk6AF,3R3hLQX2u6lZIORZQsJmtD,48,1993-10-19,280507,False,https://open.spotify.com/track/3R3hLQX2u6lZIOR...,...,0.194,5,-13.318,0,0.037,0.476,0.0102,0.09,0.304,173.231


In [5]:
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
# Function to calculate weighted popularity scores based on release date
def calculate_weighted_popularity(release_date):
    # Convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    # Calculate the time span between release date and today's date
    time_span = datetime.now() - release_date

    # Calculate the weighted popularity score based on time span (e.g., more recent releases have higher weight)
    weight = 1 / (time_span.days + 1)
    return weight

In [8]:
    # Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

In [9]:
# a function to get content-based recommendations based on music features
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get the index of the input song in the music DataFrame
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]

    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    # Get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations

In [14]:
# a function to get hybrid recommendations based on weighted popularity
def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get content-based recommendations
    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    # Get the popularity score of the input song
    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]

    # Calculate the weighted popularity score
    weighted_popularity_score = popularity_score * calculate_weighted_popularity(music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0])

    # Combine content-based and popularity-based recommendations based on weighted popularity
    hybrid_recommendations = content_based_rec
    hybrid_recommendations = hybrid_recommendations.append({
        'Track Name': input_song_name,
        'Artists': music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0],
        'Album Name': music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0],
        'Release Date': music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0],
        'Popularity': weighted_popularity_score
    }, ignore_index=True)

    # Sort the hybrid recommendations based on weighted popularity score
    hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)

    # Remove the input song from the recommendations
    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]


    return hybrid_recommendations

In [17]:
input_song_name = "So Far Away"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'So Far Away':
                         Track Name        Artists  \
3             Every Breath You Take     The Police   
4              Laughter In The Rain    Neil Sedaka   
2  Do You Remember? - 2016 Remaster   Phil Collins   
1                    You're My Home     Billy Joel   
0                        Everywhere  Fleetwood Mac   

                          Album Name Release Date  Popularity  
3    Synchronicity (Remastered 2003)   1983-06-17        89.0  
4          The Definitive Collection   2007-04-24        67.0  
2  ...But Seriously (Deluxe Edition)   1989-11-20        54.0  
1                          Piano Man   1973-11-09        47.0  
0                 Tango In The Night   1987-04-13         0.0  


  hybrid_recommendations = hybrid_recommendations.append({
