In [None]:
import requests
from sklearn.externals import joblib

# URL of the saved pipeline on GitHub
url_pipeline = 'https://raw.githubusercontent.com/your_username/your_repository/master/data_cluster_pipeline.pkl'

# Download the pipeline file
response = requests.get(url_pipeline)
with open('data_cluster_pipeline.pkl', 'wb') as f:
    f.write(response.content)

# Load the pipeline
data_cluster_pipeline = joblib.load('data_cluster_pipeline.pkl')


In [None]:
url_data = 'https://raw.githubusercontent.com/username/repository/branch/filename.csv'
data_pd = pd.read_csv(url_data)

In [None]:
!pip install spotipy

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict
os.environ['SPOTIFY_CLIENT_ID'] = 'f2b655afff124363892fc3859c13fb3c'
os.environ['SPOTIFY_CLIENT_SECRET'] = '7309fef931524a29933750d4352a224a'

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=os.environ['SPOTIFY_CLIENT_ID'],
                                                           client_secret=os.environ['SPOTIFY_CLIENT_SECRET']))

def find_song(name, year):
    song_data = defaultdict()
    results = sp.search(q= 'track: {} year: {}'.format(name,year), limit=1)
    if results['tracks']['items'] == []:
        return None

    results = results['tracks']['items'][0]
    track_id = results['id']
    audio_features = sp.audio_features(track_id)[0]

    song_data['name'] = [name]
    song_data['year'] = [year]
    song_data['explicit'] = [int(results['explicit'])]
    song_data['duration_ms'] = [results['duration_ms']]
    song_data['popularity'] = [results['popularity']]

    for key, value in audio_features.items():
        song_data[key] = value

    return pd.DataFrame(song_data)

In [None]:
from collections import defaultdict
from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist
import difflib

number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']


def get_song_data(song, spotify_data):

    try:
        song_data = spotify_data[(spotify_data['name'] == song['name'])
                                & (spotify_data['year'] == song['year'])].iloc[0]
        return song_data

    except IndexError:
        return find_song(song['name'], song['year'])


def get_mean_vector(song_list, spotify_data):

    song_vectors = []

    for song in song_list:
        song_data = get_song_data(song, spotify_data)
        if song_data is None:
            print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
            continue
        song_vector = song_data[number_cols].values
        song_vectors.append(song_vector)

    song_matrix = np.array(list(song_vectors))
    return np.mean(song_matrix, axis=0)


def flatten_dict_list(dict_list):

    flattened_dict = defaultdict()
    for key in dict_list[0].keys():
        flattened_dict[key] = []

    for dictionary in dict_list:
        for key, value in dictionary.items():
            flattened_dict[key].append(value)

    return flattened_dict


def recommend_songs( song_list, spotify_data, n_songs=10):

    metadata_cols = ['name', 'year', 'artists']
    song_dict = flatten_dict_list(song_list)

    song_center = get_mean_vector(song_list, spotify_data)
    scaler = data_cluster_pipeline.steps[0][1]
    scaled_data = scaler.transform(spotify_data[number_cols])
    scaled_song_center = scaler.transform(song_center.reshape(1, -1))
    distances = cdist(scaled_song_center, scaled_data, 'cosine')
    index = list(np.argsort(distances)[:, :n_songs][0])

    rec_songs = spotify_data.iloc[index]
    rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
    return rec_songs[metadata_cols].to_dict(orient='records')

In [None]:
recommend_songs([{'name': 'Meltdown', 'year': 2023}],  data_pd)

[{'name': 'ROXANNE', 'year': 2019, 'artists': 'Arizona Zervas'},
 {'name': 'Hate The Way (feat. blackbear)',
  'year': 2020,
  'artists': "G-Eazy', 'blackbear"},
 {'name': 'All The Way Up (Remix)',
  'year': 2016,
  'artists': "Fat Joe', 'Remy Ma', 'JAY-Z', 'French Montana', 'InfaRed"},
 {'name': 'Show & Tell', 'year': 2019, 'artists': 'Melanie Martinez'},
 {'name': 'Lost In The World',
  'year': 2010,
  'artists': "Kanye West', 'Bon Iver"},
 {'name': 'What That Speed Bout!?',
  'year': 2020,
  'artists': "Mike WiLL Made-It', 'Nicki Minaj', 'YoungBoy Never Broke Again"},
 {'name': 'Da Rockwilder', 'year': 1999, 'artists': "Method Man', 'Redman"},
 {'name': 'Emotionally Scarred', 'year': 2020, 'artists': 'Lil Baby'},
 {'name': 'Lucid Dreams', 'year': 2018, 'artists': 'Juice WRLD'},
 {'name': 'Staring At The Sun (feat. SZA)',
  'year': 2019,
  'artists': "Post Malone', 'SZA"}]