In [1]:
pip install spotipy

Collecting spotipy
  Downloading spotipy-2.24.0-py3-none-any.whl.metadata (4.9 kB)
Collecting redis>=3.5.3 (from spotipy)
  Downloading redis-5.0.8-py3-none-any.whl.metadata (9.2 kB)
Downloading spotipy-2.24.0-py3-none-any.whl (30 kB)
Downloading redis-5.0.8-py3-none-any.whl (255 kB)
Installing collected packages: redis, spotipy
Successfully installed redis-5.0.8 spotipy-2.24.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

def get_trending_playlist_data(playlist_id, access_token):
    # Set up Spotipy with the access token
    sp = spotipy.Spotify(auth=access_token)

    # Get the tracks from the playlist
    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

    # Extract relevant information and store in a list of dictionaries
    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        # Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

        # Get release date of the album
        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        # Get popularity of the track
        try:
            track_info = sp.track(track_id) if track_id != 'Not available' else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity = None

        # Add additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
            # Add more attributes as needed
        }

        music_data.append(track_data)

    # Create a pandas DataFrame from the list of dictionaries
    df = pd.DataFrame(music_data)

    return df

In [4]:
playlist_id = '37i9dQZF1DX76Wlfdnj7AP'
access_token= 'BQCR8JB-G2bjqs2q0suzYV45SdIOvAUvtP3iDpfsfd9glf2zUhU9HAYCj_NfEeIESd7OoPwc05X-2w2tc_oyXZPtdVZb21FubU3t3Tjh2jjpt3L2eOc'
# Call the function to get the music data from the playlist and store it in a DataFrame
music_df = get_trending_playlist_data(playlist_id, access_token)

# Display the DataFrame
print(music_df)

                               Track Name                     Artists  \
0                             Not Like Us              Kendrick Lamar   
1                               Big Dawgs          Hanumankind, Kalmi   
2           Guess featuring Billie Eilish   Charli xcx, Billie Eilish   
3                      I Don't Wanna Wait   David Guetta, OneRepublic   
4   I Had Some Help (Feat. Morgan Wallen)  Post Malone, Morgan Wallen   
..                                    ...                         ...   
95         Everybody (feat. Lil Uzi Vert)   Nicki Minaj, Lil Uzi Vert   
96                                Players                   Coi Leray   
97                          Where You Are          John Summit, HAYLA   
98                             Area Codes                      Kaliii   
99                                   BOTH      Tiësto, 21 Savage, BIA   

                       Album Name                Album ID  \
0                     Not Like Us  5JjnoGJyOxfSZUZtk2rRwZ   
1

In [5]:
music_df.head(3)

Unnamed: 0,Track Name,Artists,Album Name,Album ID,Track ID,Popularity,Release Date,Duration (ms),Explicit,External URLs,...,Energy,Key,Loudness,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo
0,Not Like Us,Kendrick Lamar,Not Like Us,5JjnoGJyOxfSZUZtk2rRwZ,6AI3ezQ4o3HUoP6Dhudph3,93,2024-05-04,274192,True,https://open.spotify.com/track/6AI3ezQ4o3HUoP6...,...,0.472,1,-7.001,1,0.0776,0.0107,0.0,0.141,0.214,101.061
1,Big Dawgs,"Hanumankind, Kalmi",Big Dawgs,6Yw4204wbgmpsGTzjXBhYD,0OA00aPt3BV10qeMIs3meW,92,2024-07-09,190667,True,https://open.spotify.com/track/0OA00aPt3BV10qe...,...,0.745,4,-3.202,0,0.161,0.0235,0.0,0.363,0.262,180.098
2,Guess featuring Billie Eilish,"Charli xcx, Billie Eilish",Guess featuring Billie Eilish,3ThlxfLSy4bfKzxWqmC7VN,3WOhcATHxK2SLNeP5W3v1v,92,2024-08-01,145219,False,https://open.spotify.com/track/3WOhcATHxK2SLNe...,...,0.667,7,-6.622,1,0.0983,0.0146,0.3,0.0761,0.618,130.019


In [8]:
music_df.to_csv("music_df.csv")

In [9]:
music_df.to_excel("music_df.xlsx")

In [10]:
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

data = music_df

In [14]:
# Function to calculate weighted popularity scores based on release date
def calculate_weighted_popularity(release_date):
    # Convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    # Calculate the time span between release date and today's date
    time_span = datetime.now() - release_date

    # Calculate the weighted popularity score based on time span (e.g., more recent releases have higher weight)
    weight = 1 / (time_span.days + 1)
    return weight

In [15]:
# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

In [16]:
# a function to get content-based recommendations based on music features
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get the index of the input song in the music DataFrame
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]

    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    # Get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations

In [19]:
import pandas as pd

def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]

    weighted_popularity_score = popularity_score * calculate_weighted_popularity(
        music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]
    )

    new_entry = pd.DataFrame({
        'Track Name': [input_song_name],
        'Artists': [music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0]],
        'Album Name': [music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0]],
        'Release Date': [music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]],
        'Popularity': [weighted_popularity_score]
    })

    hybrid_recommendations = pd.concat([content_based_rec, new_entry], ignore_index=True)

    hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)

    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]

    return hybrid_recommendations

In [20]:
input_song_name = "I'm Good (Blue)"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'I'm Good (Blue)':
                     Track Name                                     Artists  \
3                       KEEP UP                                     Odetari   
4  It's Not Right But It's Okay                            Mr. Belt & Wezol   
1                         REACT  Switch Disco, Ella Henderson, Robert Miles   
0                          BOTH                      Tiësto, 21 Savage, BIA   
2                 Where You Are                          John Summit, HAYLA   

                     Album Name Release Date  Popularity  
3          KEEP UP // FROSTBITE   2024-07-17        85.0  
4  It's Not Right But It's Okay   2024-02-23        82.0  
1                         REACT   2023-01-13        73.0  
0                          BOTH   2023-08-29        70.0  
2                 Where You Are   2023-03-03        70.0  


In [21]:
playlist_id = '5Kjyqt6gNEiDM0xKhYb1nx'
access_token= 'BQCR8JB-G2bjqs2q0suzYV45SdIOvAUvtP3iDpfsfd9glf2zUhU9HAYCj_NfEeIESd7OoPwc05X-2w2tc_oyXZPtdVZb21FubU3t3Tjh2jjpt3L2eOc'
# Call the function to get the music data from the playlist and store it in a DataFrame
music_df = get_trending_playlist_data(playlist_id, access_token)

# Display the DataFrame
print(music_df)

                                           Track Name  \
0                                          Heat Waves   
1                                                Hope   
2                                            Infinity   
3                                           On My Way   
4                                               Faded   
..                                                ...   
95                                            Whoopty   
96                                           Skechers   
97                                       Love Is Gone   
98  La Casa De Papel (Money Heist) - My Life Is Go...   
99                                        At My Worst   

                                    Artists  \
0                             Glass Animals   
1                              XXXTENTACION   
2                              Jaymes Young   
3   Alan Walker, Sabrina Carpenter, Farruko   
4                               Alan Walker   
..                               

In [22]:
music_df.to_csv("music_df2.csv")

In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

data = music_df

In [25]:
input_song_name = "Heat Waves"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'Heat Waves':
                                          Track Name  \
2                             Mood (feat. iann dior)   
3  Hey Mama (feat. Nicki Minaj, Bebe Rexha & Afro...   
4                              Gangnam Style (강남스타일)   
1                                         Fight Back   
0                        Mask Off - Marshmello Remix   

                                           Artists  \
2                              24kGoldn, iann dior   
3  David Guetta, AFROJACK, Bebe Rexha, Nicki Minaj   
4                                              PSY   
1                                           NEFFEX   
0                               Future, Marshmello   

                    Album Name Release Date  Popularity  
2                    El Dorado   2021-03-26        81.0  
3                       Listen   2014-11-10        73.0  
4        Gangnam Style (강남스타일)   2012-01-01        73.0  
1   Fight Back: The Collection   2018-11-30        71.0  
0  Ma