<a href="https://colab.research.google.com/github/overgithubitttttt/ML-PROJECT/blob/main/Spotify_Recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Data Preprocessing & Transformation

In [None]:
# To read and handle data files
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# For handling arrays and vectors
import numpy as np

# For string manipulation
import string

# For displaying progress
from tqdm.auto import tqdm
tqdm.pandas()

# Supress warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/artists.csv.zip

Archive:  /content/drive/MyDrive/artists.csv.zip
  inflating: artists.csv             


### Preprocess & Transform Artists Data

In [None]:
# Read data file and display first 5 records
artists = pd.read_csv('/content/artists.csv')
artists.head()

Unnamed: 0,id,followers,genres,name,popularity
0,0DheY5irMjBUeLybbCUEZ2,0.0,[],Armid & Amir Zare Pashai feat. Sara Rouzbehani,0
1,0DlhY15l3wsrnlfGio2bjU,5.0,[],ปูนา ภาวิณี,0
2,0DmRESX2JknGPQyO15yxg7,0.0,[],Sadaa,0
3,0DmhnbHjm1qw6NCYPeZNgJ,0.0,[],Tra'gruda,0
4,0Dn11fWM7vHQ3rinvWEl4E,2.0,[],Ioannis Panoutsopoulos,0


In [None]:
# Display final 5 records in artists DataFrame
artists.tail()

Unnamed: 0,id,followers,genres,name,popularity
1162090,3cOzi726Iav1toV2LRVEjp,4831.0,['black comedy'],Ali Siddiq,34
1162091,6LogY6VMM3jgAE6fPzXeMl,46.0,[],Rodney Laney,2
1162092,19boQkDEIay9GaVAWkUhTa,257.0,[],Blake Wexler,10
1162093,5nvjpU3Y7L6Hpe54QuvDjy,2357.0,['black comedy'],Donnell Rawlings,15
1162094,2bP2cNhNBdKXHC6AnqgyVp,40.0,['new comedy'],Gabe Kea,8


In [None]:
# Sample record from artists data
pd.DataFrame(artists.iloc[142254][list(artists.columns)]).transpose()

Unnamed: 0,id,followers,genres,name,popularity
142254,0nmQIMXWTXfhgOBdNzhGOs,4606565.0,"['alternative metal', 'nu metal']",Avenged Sevenfold,78


In [None]:
# Function to preprocess and transform artists data
def transform_artists(df):
    print('Transforming artists DataFrame...')

    # Remove punctuations from "genre" field for each record
    print('\nRemoving punctuations from "genre" field...')
    df['genres'] = df.progress_apply(lambda x: x['genres'].lower().translate(str.maketrans('', '', string.punctuation)), axis=1)

    # Reorder columns
    df_transformed = df.reindex(columns=['id', 'name', 'genres', 'followers', 'popularity'])

    # Return preprocessed DataFrame
    print("\nartists DataFrame transformed successfully!\n")
    return df_transformed

In [None]:
# Preprocess and transform artists DataFrame and display first 5 records
artists_transformed = transform_artists(artists)
artists_transformed.head()

Transforming artists DataFrame...

Removing punctuations from "genre" field...


  0%|          | 0/1162095 [00:00<?, ?it/s]


artists DataFrame transformed successfully!



Unnamed: 0,id,name,genres,followers,popularity
0,0DheY5irMjBUeLybbCUEZ2,Armid & Amir Zare Pashai feat. Sara Rouzbehani,,0.0,0
1,0DlhY15l3wsrnlfGio2bjU,ปูนา ภาวิณี,,5.0,0
2,0DmRESX2JknGPQyO15yxg7,Sadaa,,0.0,0
3,0DmhnbHjm1qw6NCYPeZNgJ,Tra'gruda,,0.0,0
4,0Dn11fWM7vHQ3rinvWEl4E,Ioannis Panoutsopoulos,,2.0,0


In [None]:
# Display final 5 records in artists DataFrame
artists_transformed.tail()

Unnamed: 0,id,name,genres,followers,popularity
1162090,3cOzi726Iav1toV2LRVEjp,Ali Siddiq,black comedy,4831.0,34
1162091,6LogY6VMM3jgAE6fPzXeMl,Rodney Laney,,46.0,2
1162092,19boQkDEIay9GaVAWkUhTa,Blake Wexler,,257.0,10
1162093,5nvjpU3Y7L6Hpe54QuvDjy,Donnell Rawlings,black comedy,2357.0,15
1162094,2bP2cNhNBdKXHC6AnqgyVp,Gabe Kea,new comedy,40.0,8


In [None]:
# Sample record from transformed artists data
pd.DataFrame(artists_transformed.iloc[142254][list(artists_transformed.columns)]).transpose()

Unnamed: 0,id,name,genres,followers,popularity
142254,0nmQIMXWTXfhgOBdNzhGOs,Avenged Sevenfold,alternative metal nu metal,4606565.0,78


### Preprocess & Transform Tracks Data

In [None]:
!unzip /content/drive/MyDrive/tracks.csv.zip

Archive:  /content/drive/MyDrive/tracks.csv.zip
  inflating: tracks.csv              


In [None]:
# Read data file and display first 5 records
tracks = pd.read_csv('/content/tracks.csv')
tracks.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,35iwgR4jXetI318WEWsa1Q,Carve,6,126903,0,['Uli'],['45tIt06XoI0Iio4LBEVpls'],1922-02-22,0.645,0.445,0,-13.338,1,0.451,0.674,0.744,0.151,0.127,104.851,3
1,021ht4sdgPcrDgSk7JTbKY,Capítulo 2.16 - Banquero Anarquista,0,98200,0,['Fernando Pessoa'],['14jtPCOoNZwquk5wd9DxrY'],1922-06-01,0.695,0.263,0,-22.136,1,0.957,0.797,0.0,0.148,0.655,102.009,1
2,07A5yehtSnoedViJAZkNnc,Vivo para Quererte - Remasterizado,0,181640,0,['Ignacio Corsini'],['5LiOoJbxVSAMkBS2fUm3X2'],1922-03-21,0.434,0.177,1,-21.18,1,0.0512,0.994,0.0218,0.212,0.457,130.418,5
3,08FmqUhxtyLTn6pAh6bk45,El Prisionero - Remasterizado,0,176907,0,['Ignacio Corsini'],['5LiOoJbxVSAMkBS2fUm3X2'],1922-03-21,0.321,0.0946,7,-27.961,1,0.0504,0.995,0.918,0.104,0.397,169.98,3
4,08y9GfoqCWfOGsKdwojr5e,Lady of the Evening,0,163080,0,['Dick Haymes'],['3BiJGZsyX9sJchTqcSA7Su'],1922,0.402,0.158,3,-16.9,0,0.039,0.989,0.13,0.311,0.196,103.22,4


In [None]:
# Display final 5 records in tracks DataFrame
tracks.tail()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
586667,5rgu12WBIHQtvej2MdHSH0,云与海,50,258267,0,['阿YueYue'],['1QLBXKM5GCpyQQSVMNZqrZ'],2020-09-26,0.56,0.518,0,-7.471,0,0.0292,0.785,0.0,0.0648,0.211,131.896,4
586668,0NuWgxEp51CutD2pJoF4OM,blind,72,153293,0,['ROLE MODEL'],['1dy5WNgIKQU6ezkpZs4y8z'],2020-10-21,0.765,0.663,0,-5.223,1,0.0652,0.141,0.000297,0.0924,0.686,150.091,4
586669,27Y1N4Q4U3EfDU5Ubw8ws2,What They'll Say About Us,70,187601,0,['FINNEAS'],['37M5pPGs6V1fchFJSgCguX'],2020-09-02,0.535,0.314,7,-12.823,0,0.0408,0.895,0.00015,0.0874,0.0663,145.095,4
586670,45XJsGpFTyzbzeWK8VzR8S,A Day At A Time,58,142003,0,"['Gentle Bones', 'Clara Benin']","['4jGPdu95icCKVF31CcFKbS', '5ebPSE9YI5aLeZ1Z2g...",2021-03-05,0.696,0.615,10,-6.212,1,0.0345,0.206,3e-06,0.305,0.438,90.029,4
586671,5Ocn6dZ3BJFPWh4ylwFXtn,Mar de Emociones,38,214360,0,['Afrosound'],['0i4Qda0k4nf7jnNHmSNpYv'],2015-07-01,0.686,0.723,6,-7.067,1,0.0363,0.105,0.0,0.264,0.975,112.204,4


In [None]:
# Sample record from tracks data
pd.DataFrame(tracks.iloc[175912][list(tracks.columns)]).transpose()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
175912,3jaJeSphs7bS5Gt3i5LJL3,Anastasia,55,367280,0,"['Slash', 'Myles Kennedy', 'The Conspirators']","['4Cqia9vrAbm7ANXbJGXsTE', '2YZOQlBE1v44RxPEAV...",2012-05-11,0.449,0.936,1,-5.447,1,0.0445,0.000595,0.00213,0.118,0.437,100.487,4


In [None]:
# Function to preprocess/transform tracks data
def transform_tracks(df):
    print('Transforming tracks DataFrame...')

    # Remove unwanted characters from "artists" field for each record
    print('\nRemoving unwanted characters from "artists" field...')
    df['artists'] = df.progress_apply(lambda x: x['artists'][1:-1].translate(str.maketrans('', '', "\'")), axis=1)

    # Remove "feat" from "artists" field for each record (unwanted)
    print('\nRemoving word "feat" from "artists" field...')
    df['artists'] = df.progress_apply(lambda x: x['artists'].replace('feat', ''), axis=1)

    # Remove unwanted characters from "id_artists" fied for each record and split to form a list of artist IDs
    print('\nTransforming "id_artists" field into list of artist IDs...')
    df['id_artists'] = df.progress_apply(lambda x: x['id_artists'][1:-1].split(','), axis=1)

    # Remove punctuations from artist IDs for each record
    print('\nRemoving punctuations from "id_artists" field...')
    df['id_artists'] = df.progress_apply(lambda x: [x['id_artists'][i].translate(str.maketrans('', '', string.punctuation)).strip() for i in range(len(x['id_artists']))], axis=1)

    # Create new column "release_year" to store year of release of song/track
    print('\nCreating new field "release_year"...')
    df['release_year'] = df.progress_apply(lambda x: x['release_date'][0:4], axis=1)

    # Transform "loudness" to have positive values
    print('\nTransforming "loudness" field to have only positive values...')
    df['loudness'] = df.progress_apply(lambda x: x['loudness']+60.0, axis=1)

    # Convert duration from milliseconds to seconds
    print('\nCreating "duration_s" field using "duration_ms" field...')
    df['duration_s'] = df.progress_apply(lambda x: x['duration_ms']*0.001, axis=1)

    # Create "genres" column using artists data
    print('\nCreating "genres" field using artists data...')
    df['genres'] = df.progress_apply(lambda x: str(artists.loc[artists['id'].isin(x['id_artists'])]['genres'].tolist()).translate(str.maketrans('', '', string.punctuation)), axis=1)

    # Drop "release_date", "duration_ms", "explicit" and "time_signature" columns
    df.drop(['release_date', 'duration_ms', 'explicit', 'time_signature'], axis=1, inplace=True)

    # Reorder columns
    df_transformed = df.reindex(columns=['id', 'name', 'artists', 'id_artists', 'genres', 'release_year', 'duration_s',
                                         'popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
                                         'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo'])

    # Return preprocessed DataFrame
    print("\ntracks DataFrame transformed successfully!\n")
    return df_transformed

In [None]:
# Preprocess and transform tracks DataFrame and display first 5 records
tracks_transformed = transform_tracks(tracks)
tracks_transformed.head()

NameError: ignored

In [None]:
# Display final 5 records in tracks DataFrame
tracks_transformed.tail()

In [None]:
# Sample record from transformed tracks data
pd.DataFrame(tracks_transformed.iloc[175912][list(tracks_transformed.columns)]).transpose()

In [None]:
tracks_transformed.to_csv('tracks_transformed.csv', index=None)

In [None]:
# To read and handle data files
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# For handling arrays and vectors
import numpy as np

# For visualization
import matplotlib.pyplot as plt
%matplotlib inline

# For creating wordcloud
from wordcloud import WordCloud

# For string manipulation
import string

# For creating vectors from text and determining similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# To display DataFrame
from IPython.display import display

# Supress warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Download data files from Google Drive
!pip install gdown
!gdown --id 11lg5kzdTidiiYjqlfLXQn0r8yi5ae77O
!gdown --id 1DXMxVci-rdvXGgtf8fRXr6wWS-FFxUA4
!gdown --id 1M9oq8IN-Icr5S1-aPenEqcJux41ubERa

In [None]:
# Read data file and display first 5 records
tracks = pd.read_csv('./tracks_transformed.csv', na_filter=False)
tracks.head()

In [None]:
# Read artists data file
artists = pd.read_csv('./artists_transformed.csv')

# Drop rows with null values
artists.dropna(inplace=True)

# Convert "followers" field to numeric
artists['followers'] = pd.to_numeric(artists['followers'])

# Display first 5 records
artists.head()

In [None]:
# Read genres data file and display first 5 records
genres = pd.read_csv('./data_by_genres_o.csv', na_filter=False)
genres.head()

## Exploratory Data Analysis

### 1. Summary statistics

In [None]:
# Display summary statistics for tracks data
tracks.describe().transpose()

In [None]:
# Display summary statistics for artists data
artists.describe().transpose()

### 2. Most popular songs/tracks

In [None]:
# Get top 10 most popular tracks
popular_songs = tracks.groupby('name')['popularity'].mean().sort_values(ascending=False).head(10)

# Plot horizontal bar chart
plt.figure(figsize=(16, 10))
plt.barh(popular_songs.index, popular_songs)
plt.title('Most Popular Songs/Tracks', fontdict=dict(fontsize=15), fontweight='bold')
plt.xlabel('Popularity', fontdict=dict(fontsize=15), fontweight='bold')
plt.ylabel('Song/Track Name', fontdict=dict(fontsize=15), fontweight='bold')
plt.xticks(fontsize='large', weight='bold')
plt.yticks(fontsize='large', weight='bold')
for i, v in enumerate(popular_songs):
    plt.text(v+0.5, i-0.15, str(int(v)), fontdict=dict(fontsize=15), fontweight='bold')
plt.show()

### 3. Most popular artists

In [None]:
# Get top 10 most popular artists
popular_artists = artists.groupby('name')['popularity'].mean().sort_values(ascending=False).head(10)

# Plot horizontal bar chart
plt.figure(figsize=(16, 10))
plt.barh(popular_artists.index, popular_artists)
plt.title('Most Popular Artists', fontdict=dict(fontsize=15), fontweight='bold')
plt.xlabel('Popularity', fontdict=dict(fontsize=15), fontweight='bold')
plt.ylabel('Artist Name', fontdict=dict(fontsize=15), fontweight='bold')
plt.xticks(fontsize='large', weight='bold')
plt.yticks(fontsize='large', weight='bold')
for i, v in enumerate(popular_artists):
    plt.text(v+0.5, i-0.15, str(int(v)), fontdict=dict(fontsize=15), fontweight='bold')
plt.show()

### 4. Most followed artists

In [None]:
# Get top 10 most followed artists
popular_artists = artists.groupby('name')['followers'].mean().sort_values(ascending=False).head(10)

# Plot horizontal bar chart
plt.figure(figsize=(16, 10))
plt.barh(popular_artists.index, popular_artists)
plt.title('Most Followed Artists', fontdict=dict(fontsize=15), fontweight='bold')
plt.xlabel('Number of Followers', fontdict=dict(fontsize=15), fontweight='bold')
plt.ylabel('Artist Name', fontdict=dict(fontsize=15), fontweight='bold')
plt.xticks(fontsize='large', weight='bold')
plt.yticks(fontsize='large', weight='bold')
for i, v in enumerate(popular_artists):
    plt.text(v+0.5, i-0.15, str(int(v//1000000))+'M', fontdict=dict(fontsize=15), fontweight='bold')
plt.show()

### 5. Genre WordCloud

In [None]:
# Text from "genre" column in genres data
genre_text = ' '.join(genre for genre in genres.genres.astype(str))

# Generate wordcloud using text
wc = WordCloud(background_color="white")
wordcloud = wc.generate(genre_text)

# Display wordcloud
plt.axis('off')
plt.figure(figsize=(16, 10))
plt.imshow(wordcloud)
plt.show()

### 6. Most popular genres

In [None]:
# Get top 10 most popular genres
popular_genres = genres.groupby('genres')['popularity'].mean().sort_values(ascending=False).head(10)

# Plot horizontal bar chart
plt.figure(figsize=(16, 10))
plt.barh(popular_genres.index, popular_genres)
plt.title('Most Popular Genres', fontdict=dict(fontsize=15), fontweight='bold')
plt.xlabel('Popularity', fontdict=dict(fontsize=15), fontweight='bold')
plt.ylabel('Genre', fontdict=dict(fontsize=15), fontweight='bold')
plt.xticks(fontsize='large', weight='bold')
plt.yticks(fontsize='large', weight='bold')
for i, v in enumerate(popular_genres):
    plt.text(v+0.5, i-0.15, str(int(v)), fontdict=dict(fontsize=15), fontweight='bold')
plt.show()

# Modelling

### 1. Song recommender

In [None]:
# Sort tracks data based on popularity and take first 10,000 records
song_library = tracks.sort_values(by=['popularity'], ascending=False).head(10000)

# Drop "id_artists" field from DataFrame
song_library.drop(['id_artists'], axis=1, inplace=True)

# Reset index for DataFrame
song_library.reset_index(inplace=True, drop=True)

# Display first 5 records
song_library.head()

In [None]:
# Create CountVectorizer object to transform text into vector
song_vectorizer = CountVectorizer()

# Fit the vectorizer on "genres" field of song_library DataFrame
song_vectorizer.fit(song_library['genres'])

In [None]:
# Function to recommend more songs based on given song name
def song_recommender(song_name):
    try:
        # Numeric columns (audio features) in song_library DataFrame
        num_cols = ['release_year', 'duration_s', 'popularity', 'danceability', 'energy', 'key', 'loudness',
                    'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

        # Create vector from "genres" field (text data) for given song
        text_vec1 = song_vectorizer.transform(song_library[song_library['name']==str(song_name)]['genres']).toarray()

        # Create vector from numerical columns for given song
        num_vec1 = song_library[song_library['name']==str(song_name)][num_cols].to_numpy()

        # Initialise empty list to store similarity scores
        sim_scores=  []

        # For every song/track in song library, determine cosine similarity with given song
        for index, row in song_library.iterrows():
            name = row['name']

            # Create vector from "genres" field for other songs
            text_vec2 = song_vectorizer.transform(song_library[song_library['name']==name]['genres']).toarray()

            # Create vector from numerical columns for other songs
            num_vec2 = song_library[song_library['name']==name][num_cols].to_numpy()

            # Calculate cosine similarity using text vectors
            text_sim = cosine_similarity(text_vec1, text_vec2)[0][0]

            # Calculate cosine similarity using numerical vectors
            num_sim = cosine_similarity(num_vec1, num_vec2)[0][0]

            # Take average of both similarity scores and add to list of similarity scores
            sim = (text_sim + num_sim)/2
            sim_scores.append(sim)

        # Add new column containing similarity scores to song_library DataFrame
        song_library['similarity'] = sim_scores

        # Sort DataFrame based on "similarity" column
        song_library.sort_values(by=['similarity', 'popularity', 'release_year'], ascending=[False, False, False], inplace=True)

        # Create DataFrame "recommended_songs" containing 5 songs that are most similar to the given song and return this DataFrame
        recommended_songs = song_library[['name', 'artists', 'release_year']][2:7]
        return recommended_songs
    except:
        # If given song is not found in song library then display message
        print('{} not found in songs library.'.format(song_name))

In [None]:
# More songs like Hail to the King by Avenged Sevenfold
song_recommender('Hail to the King')

In [None]:
# More songs like Eye of the Tiger by Survivor
song_recommender('Eye of the Tiger')

In [None]:
# More songs like Blinding Lights by The Weeknd
song_recommender('Blinding Lights')

### 2. Artists recommender

In [None]:
# Sort artists data based on popularity and take first 10,000 records
artist_library = artists.sort_values(by=['popularity', 'followers'], ascending=[False, False]).head(10000)

# Reset index for DataFrame
artist_library.reset_index(inplace=True, drop=True)

# Display first 5 records
artist_library.head()


In [None]:
# Create CountVectorizer object to transform text into vector
artist_vectorizer = CountVectorizer()

# Fit the vectorizer on "genres" field of song_library DataFrame
artist_vectorizer.fit(artist_library['genres'])

In [None]:
# Function to recommend more artists based on given artist name
def artist_recommender(artist_name):
    try:
        # Numeric columns (audio features) in artist_library DataFrame
        num_cols = ['followers', 'popularity']

        # Create vector from "genres" field (text data) for given artist
        text_vec1 = artist_vectorizer.transform(artist_library[artist_library['name']==str(artist_name)]['genres']).toarray()

        # Create vector from numerical columns for given song
        num_vec1 = artist_library[artist_library['name']==str(artist_name)][num_cols].to_numpy()

        # Initialise empty list to store similarity scores
        sim_scores = []

        # For every artist in artist library, determine cosine similarity with given artist
        for index, row in artist_library.iterrows():
            name = row['name']

            # Create vector from "genres" field for other artists
            text_vec2 = artist_vectorizer.transform(artist_library[artist_library['name']==name]['genres']).toarray()

            # Create vector from numerical columns for other songs
            num_vec2 = artist_library[artist_library['name']==name][num_cols].to_numpy()

            # Calculate cosine similarity using text vectors
            text_sim = cosine_similarity(text_vec1, text_vec2)[0][0]

            # Calculate cosine similarity using numerical vectors
            num_sim = cosine_similarity(num_vec1, num_vec2)[0][0]

            # Take average of both similarity scores and add to list of similarity scores
            sim = (text_sim + num_sim)/2
            sim_scores.append(sim)

        # Add new column containing similarity scores to artist_library DataFrame
        artist_library['similarity'] = sim_scores

        # Sort DataFrame based on "similarity" column
        artist_library.sort_values(by=['similarity', 'popularity', 'followers'], ascending=[False, False, False], inplace=True)

        # Create DataFrame "recommended_artists" containing 5 artists that are most similar to the given artist, sort and return this DataFrame
        recommended_artists = artist_library[['name', 'genres', 'followers', 'popularity']][2:7]
        recommended_artists.sort_values(by=['popularity', 'followers'], ascending=[False, False], inplace=True)
        return recommended_artists
    except:
        # If given artist is not found in artist library then display message
        print('{} not found in artists library.'.format(artist_name))

In [None]:
# More artists like Def Leppard
artist_recommender('Def Leppard')

In [None]:
# More artists like Opeth
artist_recommender('Opeth')

In [None]:
# More artists like Diljit Dosanjh
artist_recommender('Diljit Dosanjh')

### 3. Complete Spotify recommender

In [None]:
# Function to recommend similar songs and artists based on song name
def spotify_recommender(song_name):
    try:
        # Get DataFrame of recommended songs using song_recommender() function
        recommended_songs = song_recommender(song_name)

        # Create empty DataFrame to store details of recommended artists
        recommended_artists = pd.DataFrame({'name':[], 'genres':[], 'followers':[], 'popularity':[]})

        # Get contributing artists for given song
        artists = song_library[song_library['name']==str(song_name)]['artists'].values[0].split(',')

        # For each contributing artist, get recommended artists using artist_recommender() function
        for artist in artists:
            artist_name = artist.strip()

            # Concatenate returned DataFrame with recommended_artists DataFrame
            recommended_artists = pd.concat([recommended_artists, artist_recommender(artist_name)])

        # Sort DataFrame based on "popularity" and "followers" columns
        recommended_artists.sort_values(by=['popularity', 'followers'], ascending=[False, False], inplace=True)

        # Display recommended songs
        print('More songs you might like:')
        display(recommended_songs)

        # Display recommended artists
        print('\n\nOther artists you might like:')
        display(recommended_artists.head())
    except:
        # If given song is not found in song library then display message
        print('{} not found in songs library.'.format(song_name))

In [None]:
# Recommendations for the song "Nero Forte" by Slipknot
spotify_recommender('Nero Forte')

In [None]:
# Recommendations for the song "La Camisa Negra" by Juanes
spotify_recommender('La Camisa Negra')

In [None]:
# Recommendations for the song "Congratulations" by Post Malone
spotify_recommender('Congratulations')