# Vibe Tune - A Music Recommendation System 

### Import the necessary libraries

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from gensim.models import Word2Vec
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
import requests
import pickle
nltk.download('punkt_tab')
nltk.download('stopwords')


[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

### Loading the Datasets

In [4]:
# Reading the spotify cleaned dataset
df = pd.read_csv("C:/Users/Admin/Documents/Projects/Vibe-Tune/Recommendation_Engine/spotify.csv")

In [5]:
# Getting shape of the dataset
df.shape

(900, 23)

In [6]:
# Reading the head of the dataset
df.head()

Unnamed: 0.1,Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_release_date,playlist_genre,danceability,energy,key,...,acousticness,liveness,valence,tempo,duration_ms,track_artist_merged,lyrics,artist_name,image_url,spotify_url
0,0,6oJ6le65B3SEqPwMRNXWjY,higher love,Kygo,0.5,2019-06-28,Pop,0.63268,0.667346,0.727273,...,0.015932,0.086004,0.39137,0.290605,228267,kygo,"['bring', 'me', 'higher', 'love,', 'love', '',...",Kygo,https://i.scdn.co/image/ab67616d0000b2737c8977...,https://open.spotify.com/track/6oJ6le65B3SEqPw...
1,1,3yNZ5r3LKfdmjoS3gkhUCT,bad guy (with justin bieber),Billieeilish,0.318182,2019-07-11,Pop,0.602614,0.425904,0.0,...,0.26112,0.10293,0.687634,0.508374,194840,billie eilish,"['yeah,', 'yeah', '', 'oh,', 'ah', '', 'white'...",Billie Eilish,https://i.scdn.co/image/ab67616d0000b273a69b8b...,https://open.spotify.com/track/3yNZ5r3LKfdmjoS...
2,2,0qc4QlcCxVTGyShurEv1UU,post malone (feat. rani),Samfeldt,0.318182,2019-05-24,Pop,0.498039,0.628716,0.636364,...,0.079871,0.090236,0.656505,0.314439,174444,sam feldt,"['one', 'more', 'drink,', 'got', 'one', 'more'...",Sam Feldt,https://i.scdn.co/image/ab67616d0000b27354de16...,https://open.spotify.com/track/0qc4QlcCxVTGySh...
3,3,4PkIDTPGedm0enzdvilLNd,sixteen,Elliegoulding,0.227273,2019-04-12,Pop,0.601307,0.799335,0.727273,...,0.2777,0.041997,0.538429,0.332383,201072,ellie goulding,"['(sixteen)', '', '(sixteen)', '', 'do', 'you'...",Ellie Goulding,https://i.scdn.co/image/ab67616d0000b27394181f...,https://open.spotify.com/track/4PkIDTPGedm0enz...
4,4,5PYQUBXc7NYeI1obMKSJK0,never really over,Katyperry,0.409091,2019-05-31,Pop,0.733333,0.886254,0.727273,...,0.201014,0.313445,0.370975,0.262872,223523,katy perry,"[""i'm"", 'losing', 'my', 'self', 'control', '',...",Katy Perry,https://i.scdn.co/image/ab67616d0000b2739bea3b...,https://open.spotify.com/track/5PYQUBXc7NYeI1o...


In [7]:
# Getting the info of the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0                900 non-null    int64  
 1   track_id                  900 non-null    object 
 2   track_name                900 non-null    object 
 3   track_artist              900 non-null    object 
 4   track_popularity          900 non-null    float64
 5   track_album_release_date  900 non-null    object 
 6   playlist_genre            900 non-null    object 
 7   danceability              900 non-null    float64
 8   energy                    900 non-null    float64
 9   key                       900 non-null    float64
 10  loudness                  900 non-null    float64
 11  mode                      900 non-null    float64
 12  speechiness               900 non-null    float64
 13  acousticness              900 non-null    float64
 14  liveness  

In [8]:
# Getting Description of the dataset
df.describe()

Unnamed: 0.1,Unnamed: 0,track_popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,liveness,valence,tempo,duration_ms
count,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0
mean,449.5,0.199545,0.623394,0.640775,0.484444,0.726726,0.574444,0.115008,0.22591,0.16005,0.518995,0.414709,211372.507778
std,259.951919,0.199525,0.183919,0.18113,0.334353,0.111564,0.494702,0.133004,0.238884,0.139023,0.237509,0.208335,45092.774311
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,104591.0
25%,224.75,0.045455,0.504248,0.527578,0.181818,0.681972,0.0,0.023636,0.043808,0.077727,0.33421,0.245323,181819.5
50%,449.5,0.136364,0.652288,0.664127,0.454545,0.743644,1.0,0.055262,0.140391,0.107691,0.515887,0.402658,206233.5
75%,674.25,0.318182,0.749346,0.777873,0.818182,0.798564,1.0,0.158157,0.326666,0.194965,0.705882,0.543679,232736.75
max,899.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,484147.0


In [11]:
# Getting summary of the dataset
def summary_df(df):
    list = []
    for col in df.columns:
        dtype = df[col].dtype
        nunique = df[col].nunique()
        null_count = df[col].isnull().sum()
        list.append([col, dtype, nunique, null_count])
        data = pd.DataFrame(list)
    data.columns = ['Column', 'Dtype', 'NUnique', 'Null_Count']
    return data

summary_df(df)

Unnamed: 0,Column,Dtype,NUnique,Null_Count
0,Unnamed: 0,int64,900,0
1,track_id,object,900,0
2,track_name,object,900,0
3,track_artist,object,468,0
4,track_popularity,float64,23,0
5,track_album_release_date,object,467,0
6,playlist_genre,object,6,0
7,danceability,float64,424,0
8,energy,float64,474,0
9,key,float64,12,0


### Data Preprocessing for Recommendation Engine

In [12]:
# Droping column "Unnamed"
df = df.drop(columns="Unnamed: 0")

In [13]:
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_release_date,playlist_genre,danceability,energy,key,loudness,...,acousticness,liveness,valence,tempo,duration_ms,track_artist_merged,lyrics,artist_name,image_url,spotify_url
0,6oJ6le65B3SEqPwMRNXWjY,higher love,Kygo,0.5,2019-06-28,Pop,0.63268,0.667346,0.727273,0.680129,...,0.015932,0.086004,0.39137,0.290605,228267,kygo,"['bring', 'me', 'higher', 'love,', 'love', '',...",Kygo,https://i.scdn.co/image/ab67616d0000b2737c8977...,https://open.spotify.com/track/6oJ6le65B3SEqPw...
1,3yNZ5r3LKfdmjoS3gkhUCT,bad guy (with justin bieber),Billieeilish,0.318182,2019-07-11,Pop,0.602614,0.425904,0.0,0.504094,...,0.26112,0.10293,0.687634,0.508374,194840,billie eilish,"['yeah,', 'yeah', '', 'oh,', 'ah', '', 'white'...",Billie Eilish,https://i.scdn.co/image/ab67616d0000b273a69b8b...,https://open.spotify.com/track/3yNZ5r3LKfdmjoS...
2,0qc4QlcCxVTGyShurEv1UU,post malone (feat. rani),Samfeldt,0.318182,2019-05-24,Pop,0.498039,0.628716,0.636364,0.821136,...,0.079871,0.090236,0.656505,0.314439,174444,sam feldt,"['one', 'more', 'drink,', 'got', 'one', 'more'...",Sam Feldt,https://i.scdn.co/image/ab67616d0000b27354de16...,https://open.spotify.com/track/0qc4QlcCxVTGySh...
3,4PkIDTPGedm0enzdvilLNd,sixteen,Elliegoulding,0.227273,2019-04-12,Pop,0.601307,0.799335,0.727273,0.753612,...,0.2777,0.041997,0.538429,0.332383,201072,ellie goulding,"['(sixteen)', '', '(sixteen)', '', 'do', 'you'...",Ellie Goulding,https://i.scdn.co/image/ab67616d0000b27394181f...,https://open.spotify.com/track/4PkIDTPGedm0enz...
4,5PYQUBXc7NYeI1obMKSJK0,never really over,Katyperry,0.409091,2019-05-31,Pop,0.733333,0.886254,0.727273,0.781608,...,0.201014,0.313445,0.370975,0.262872,223523,katy perry,"[""i'm"", 'losing', 'my', 'self', 'control', '',...",Katy Perry,https://i.scdn.co/image/ab67616d0000b2739bea3b...,https://open.spotify.com/track/5PYQUBXc7NYeI1o...


### Feature Engineering

#### Formating the lyrics variable

In [15]:
# Converting the strings to lower
df['lyrics'] = df['lyrics'].astype(str).str.lower()

In [16]:
# Replacing the new line to space
df['lyrics'] = df['lyrics'].apply(lambda x : x.replace('\n', ' ').split(' '))

#### Derving Lyric Tags from lyrics column

In [17]:
# Getting correct Lyrics in neat format
def lyrics_extraction(lyrics_list):
    lyr = []
    for i in lyrics_list:
        match = re.match(r"(\W+)([a-zA-Z]+)(\W+)", i) 
        if match:
            lyr.append(match.group(2)) 
    lyr = set(lyr)
    tags = ''
    for word in lyr:
        tags += word+' '
    return tags

df['lyric_tags'] = df['lyrics'].apply(lambda x : lyrics_extraction(x))    

In [18]:
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_release_date,playlist_genre,danceability,energy,key,loudness,...,liveness,valence,tempo,duration_ms,track_artist_merged,lyrics,artist_name,image_url,spotify_url,lyric_tags
0,6oJ6le65B3SEqPwMRNXWjY,higher love,Kygo,0.5,2019-06-28,Pop,0.63268,0.667346,0.727273,0.680129,...,0.086004,0.39137,0.290605,228267,kygo,"[['bring',, 'me',, 'higher',, 'love,',, 'love'...",Kygo,https://i.scdn.co/image/ab67616d0000b2737c8977...,https://open.spotify.com/track/6oJ6le65B3SEqPw...,are walk for life turning out need things stan...
1,3yNZ5r3LKfdmjoS3gkhUCT,bad guy (with justin bieber),Billieeilish,0.318182,2019-07-11,Pop,0.602614,0.425904,0.0,0.504094,...,0.10293,0.687634,0.508374,194840,billie eilish,"[['yeah,',, 'yeah',, '',, 'oh,',, 'ah',, '',, ...",Billie Eilish,https://i.scdn.co/image/ab67616d0000b273a69b8b...,https://open.spotify.com/track/3yNZ5r3LKfdmjoS...,mad at think of me teeth chest might criminal ...
2,0qc4QlcCxVTGyShurEv1UU,post malone (feat. rani),Samfeldt,0.318182,2019-05-24,Pop,0.498039,0.628716,0.636364,0.821136,...,0.090236,0.656505,0.314439,174444,sam feldt,"[['one',, 'more',, 'drink,',, 'got',, 'one',, ...",Sam Feldt,https://i.scdn.co/image/ab67616d0000b27354de16...,https://open.spotify.com/track/0qc4QlcCxVTGySh...,tell need are safari like things afterparty we...
3,4PkIDTPGedm0enzdvilLNd,sixteen,Elliegoulding,0.227273,2019-04-12,Pop,0.601307,0.799335,0.727273,0.753612,...,0.041997,0.538429,0.332383,201072,ellie goulding,"[['(sixteen)',, '',, '(sixteen)',, '',, 'do',,...",Ellie Goulding,https://i.scdn.co/image/ab67616d0000b27394181f...,https://open.spotify.com/track/4PkIDTPGedm0enz...,msn intent out life we small give of me couldn...
4,5PYQUBXc7NYeI1obMKSJK0,never really over,Katyperry,0.409091,2019-05-31,Pop,0.733333,0.886254,0.727273,0.781608,...,0.313445,0.370975,0.262872,223523,katy perry,"[[""i'm"",, 'losing',, 'my',, 'self',, 'control'...",Katy Perry,https://i.scdn.co/image/ab67616d0000b2739bea3b...,https://open.spotify.com/track/5PYQUBXc7NYeI1o...,through doesn we think of me cross goodbye do ...


### Derving the track mood lables based on its features

In [19]:
def get_mood_label(row):
    energy = row['energy']
    valence = row['valence']
    tempo = row['tempo']
    danceability = row['danceability']
    acousticness = row['acousticness']
    liveness = row['liveness']

    if energy >= 0.5 and valence >= 0.6 and tempo >= 0.5 and danceability >= 0.5:
        return ["Happy","joyful", "upbeat", "bright", "carefree", "cheerful", "bubbly", "playful", "lively"]
    elif energy <= 0.4 and valence <= 0.3 and tempo <= 0.4:
        return ["gloomy", "heartbroken", "tearful", "lonely", "sorrowful", "nostalgic", "melancholy", "mellow", "Sad"]
    elif energy <= 0.5 and 0.3 <= valence <= 0.6 and tempo <= 0.6 and acousticness >= 0.4:
        return ["Chill", "calm", "soothing", "laid-back", "breezy", "ambient", "tranquil", "soft", "relaxing"]
    elif energy >= 0.6 and valence <= 0.4 and tempo >= 0.6:
        return ["intense", "fierce", "raw", "rebellious", "furious", "aggressive", "loud", "Angry"]
    elif 0.4 <= energy <= 0.8 and 0.4 <= valence <= 0.8 and 0.3 <= tempo <= 0.7:
        return  ["passionate", "affectionate", "dreamy", "tender", "loving", "sentimental", "heartfelt", "longing", "Romantic"]
    elif energy <= 0.4 and valence <= 0.2 and acousticness >= 0.5:
        return ["eerie", "haunting", "mysterious", "brooding", "suspenseful", "cinematic", "shadowy", "Dark"]
    elif energy >= 0.6 and valence >= 0.5 and tempo >= 0.6 and danceability >= 0.5:
        return ["Energetic", "pulsing", "bouncing", "fast-paced", "groovy", "rhythmic", "hype", "club", "vibe"]
    else:
        return ['Mixed']

df['mood'] = df.apply(get_mood_label, axis=1)

In [21]:
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_release_date,playlist_genre,danceability,energy,key,loudness,...,valence,tempo,duration_ms,track_artist_merged,lyrics,artist_name,image_url,spotify_url,lyric_tags,mood
0,6oJ6le65B3SEqPwMRNXWjY,higher love,Kygo,0.5,2019-06-28,Pop,0.63268,0.667346,0.727273,0.680129,...,0.39137,0.290605,228267,kygo,"[['bring',, 'me',, 'higher',, 'love,',, 'love'...",Kygo,https://i.scdn.co/image/ab67616d0000b2737c8977...,https://open.spotify.com/track/6oJ6le65B3SEqPw...,are walk for life turning out need things stan...,[Mixed]
1,3yNZ5r3LKfdmjoS3gkhUCT,bad guy (with justin bieber),Billieeilish,0.318182,2019-07-11,Pop,0.602614,0.425904,0.0,0.504094,...,0.687634,0.508374,194840,billie eilish,"[['yeah,',, 'yeah',, '',, 'oh,',, 'ah',, '',, ...",Billie Eilish,https://i.scdn.co/image/ab67616d0000b273a69b8b...,https://open.spotify.com/track/3yNZ5r3LKfdmjoS...,mad at think of me teeth chest might criminal ...,"[passionate, affectionate, dreamy, tender, lov..."
2,0qc4QlcCxVTGyShurEv1UU,post malone (feat. rani),Samfeldt,0.318182,2019-05-24,Pop,0.498039,0.628716,0.636364,0.821136,...,0.656505,0.314439,174444,sam feldt,"[['one',, 'more',, 'drink,',, 'got',, 'one',, ...",Sam Feldt,https://i.scdn.co/image/ab67616d0000b27354de16...,https://open.spotify.com/track/0qc4QlcCxVTGySh...,tell need are safari like things afterparty we...,"[passionate, affectionate, dreamy, tender, lov..."
3,4PkIDTPGedm0enzdvilLNd,sixteen,Elliegoulding,0.227273,2019-04-12,Pop,0.601307,0.799335,0.727273,0.753612,...,0.538429,0.332383,201072,ellie goulding,"[['(sixteen)',, '',, '(sixteen)',, '',, 'do',,...",Ellie Goulding,https://i.scdn.co/image/ab67616d0000b27394181f...,https://open.spotify.com/track/4PkIDTPGedm0enz...,msn intent out life we small give of me couldn...,"[passionate, affectionate, dreamy, tender, lov..."
4,5PYQUBXc7NYeI1obMKSJK0,never really over,Katyperry,0.409091,2019-05-31,Pop,0.733333,0.886254,0.727273,0.781608,...,0.370975,0.262872,223523,katy perry,"[[""i'm"",, 'losing',, 'my',, 'self',, 'control'...",Katy Perry,https://i.scdn.co/image/ab67616d0000b2739bea3b...,https://open.spotify.com/track/5PYQUBXc7NYeI1o...,through doesn we think of me cross goodbye do ...,[Mixed]


### Deriving Track Tags by combing lyric tags, artist name, playlist genre

In [22]:
df['track_tags'] = df['lyric_tags'] + ' ' + df['artist_name'] + ' ' + df['playlist_genre']

In [23]:
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_release_date,playlist_genre,danceability,energy,key,loudness,...,tempo,duration_ms,track_artist_merged,lyrics,artist_name,image_url,spotify_url,lyric_tags,mood,track_tags
0,6oJ6le65B3SEqPwMRNXWjY,higher love,Kygo,0.5,2019-06-28,Pop,0.63268,0.667346,0.727273,0.680129,...,0.290605,228267,kygo,"[['bring',, 'me',, 'higher',, 'love,',, 'love'...",Kygo,https://i.scdn.co/image/ab67616d0000b2737c8977...,https://open.spotify.com/track/6oJ6le65B3SEqPw...,are walk for life turning out need things stan...,[Mixed],are walk for life turning out need things stan...
1,3yNZ5r3LKfdmjoS3gkhUCT,bad guy (with justin bieber),Billieeilish,0.318182,2019-07-11,Pop,0.602614,0.425904,0.0,0.504094,...,0.508374,194840,billie eilish,"[['yeah,',, 'yeah',, '',, 'oh,',, 'ah',, '',, ...",Billie Eilish,https://i.scdn.co/image/ab67616d0000b273a69b8b...,https://open.spotify.com/track/3yNZ5r3LKfdmjoS...,mad at think of me teeth chest might criminal ...,"[passionate, affectionate, dreamy, tender, lov...",mad at think of me teeth chest might criminal ...
2,0qc4QlcCxVTGyShurEv1UU,post malone (feat. rani),Samfeldt,0.318182,2019-05-24,Pop,0.498039,0.628716,0.636364,0.821136,...,0.314439,174444,sam feldt,"[['one',, 'more',, 'drink,',, 'got',, 'one',, ...",Sam Feldt,https://i.scdn.co/image/ab67616d0000b27354de16...,https://open.spotify.com/track/0qc4QlcCxVTGySh...,tell need are safari like things afterparty we...,"[passionate, affectionate, dreamy, tender, lov...",tell need are safari like things afterparty we...
3,4PkIDTPGedm0enzdvilLNd,sixteen,Elliegoulding,0.227273,2019-04-12,Pop,0.601307,0.799335,0.727273,0.753612,...,0.332383,201072,ellie goulding,"[['(sixteen)',, '',, '(sixteen)',, '',, 'do',,...",Ellie Goulding,https://i.scdn.co/image/ab67616d0000b27394181f...,https://open.spotify.com/track/4PkIDTPGedm0enz...,msn intent out life we small give of me couldn...,"[passionate, affectionate, dreamy, tender, lov...",msn intent out life we small give of me couldn...
4,5PYQUBXc7NYeI1obMKSJK0,never really over,Katyperry,0.409091,2019-05-31,Pop,0.733333,0.886254,0.727273,0.781608,...,0.262872,223523,katy perry,"[[""i'm"",, 'losing',, 'my',, 'self',, 'control'...",Katy Perry,https://i.scdn.co/image/ab67616d0000b2739bea3b...,https://open.spotify.com/track/5PYQUBXc7NYeI1o...,through doesn we think of me cross goodbye do ...,[Mixed],through doesn we think of me cross goodbye do ...


In [24]:
df['track_tags'][0]

'are walk for life turning out need things standing everywhere we bring fear oh world heart just inside think of be without me feeling and what behind your line where that worlds my this our stars so or hanging above thinking on whole alone try fair hidden facing the mine falling keep must higher yearning love is real look see who in a i could time wasted down bad to yeah about it there someone  Kygo Pop'

### Cleaning the tags

In [26]:
# Cleaning the tags
def clean_tags(text):
    tokenize = word_tokenize(text)
    filtered_words = [word for word in tokenize if word not in stopwords.words('english')]
    return ' '.join(filtered_words)
    
df['track_tags'] = df['track_tags'].apply(lambda x : clean_tags(x))

In [27]:
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_release_date,playlist_genre,danceability,energy,key,loudness,...,tempo,duration_ms,track_artist_merged,lyrics,artist_name,image_url,spotify_url,lyric_tags,mood,track_tags
0,6oJ6le65B3SEqPwMRNXWjY,higher love,Kygo,0.5,2019-06-28,Pop,0.63268,0.667346,0.727273,0.680129,...,0.290605,228267,kygo,"[['bring',, 'me',, 'higher',, 'love,',, 'love'...",Kygo,https://i.scdn.co/image/ab67616d0000b2737c8977...,https://open.spotify.com/track/6oJ6le65B3SEqPw...,are walk for life turning out need things stan...,[Mixed],walk life turning need things standing everywh...
1,3yNZ5r3LKfdmjoS3gkhUCT,bad guy (with justin bieber),Billieeilish,0.318182,2019-07-11,Pop,0.602614,0.425904,0.0,0.504094,...,0.508374,194840,billie eilish,"[['yeah,',, 'yeah',, '',, 'oh,',, 'ah',, '',, ...",Billie Eilish,https://i.scdn.co/image/ab67616d0000b273a69b8b...,https://open.spotify.com/track/3yNZ5r3LKfdmjoS...,mad at think of me teeth chest might criminal ...,"[passionate, affectionate, dreamy, tender, lov...",mad think teeth chest might criminal snow know...
2,0qc4QlcCxVTGyShurEv1UU,post malone (feat. rani),Samfeldt,0.318182,2019-05-24,Pop,0.498039,0.628716,0.636364,0.821136,...,0.314439,174444,sam feldt,"[['one',, 'more',, 'drink,',, 'got',, 'one',, ...",Sam Feldt,https://i.scdn.co/image/ab67616d0000b27354de16...,https://open.spotify.com/track/0qc4QlcCxVTGySh...,tell need are safari like things afterparty we...,"[passionate, affectionate, dreamy, tender, lov...",tell need safari like things afterparty post s...
3,4PkIDTPGedm0enzdvilLNd,sixteen,Elliegoulding,0.227273,2019-04-12,Pop,0.601307,0.799335,0.727273,0.753612,...,0.332383,201072,ellie goulding,"[['(sixteen)',, '',, '(sixteen)',, '',, 'do',,...",Ellie Goulding,https://i.scdn.co/image/ab67616d0000b27394181f...,https://open.spotify.com/track/4PkIDTPGedm0enz...,msn intent out life we small give of me couldn...,"[passionate, affectionate, dreamy, tender, lov...",msn intent life small give found stupid someti...
4,5PYQUBXc7NYeI1obMKSJK0,never really over,Katyperry,0.409091,2019-05-31,Pop,0.733333,0.886254,0.727273,0.781608,...,0.262872,223523,katy perry,"[[""i'm"",, 'losing',, 'my',, 'self',, 'control'...",Katy Perry,https://i.scdn.co/image/ab67616d0000b2739bea3b...,https://open.spotify.com/track/5PYQUBXc7NYeI1o...,through doesn we think of me cross goodbye do ...,[Mixed],think cross goodbye takes mess could wan na me...


### Tokenizing the Tags

In [29]:
df['tags_tokenized'] = df['track_tags'].apply(lambda x : x.split())

In [30]:
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_release_date,playlist_genre,danceability,energy,key,loudness,...,duration_ms,track_artist_merged,lyrics,artist_name,image_url,spotify_url,lyric_tags,mood,track_tags,tags_tokenized
0,6oJ6le65B3SEqPwMRNXWjY,higher love,Kygo,0.5,2019-06-28,Pop,0.63268,0.667346,0.727273,0.680129,...,228267,kygo,"[['bring',, 'me',, 'higher',, 'love,',, 'love'...",Kygo,https://i.scdn.co/image/ab67616d0000b2737c8977...,https://open.spotify.com/track/6oJ6le65B3SEqPw...,are walk for life turning out need things stan...,[Mixed],walk life turning need things standing everywh...,"[walk, life, turning, need, things, standing, ..."
1,3yNZ5r3LKfdmjoS3gkhUCT,bad guy (with justin bieber),Billieeilish,0.318182,2019-07-11,Pop,0.602614,0.425904,0.0,0.504094,...,194840,billie eilish,"[['yeah,',, 'yeah',, '',, 'oh,',, 'ah',, '',, ...",Billie Eilish,https://i.scdn.co/image/ab67616d0000b273a69b8b...,https://open.spotify.com/track/3yNZ5r3LKfdmjoS...,mad at think of me teeth chest might criminal ...,"[passionate, affectionate, dreamy, tender, lov...",mad think teeth chest might criminal snow know...,"[mad, think, teeth, chest, might, criminal, sn..."
2,0qc4QlcCxVTGyShurEv1UU,post malone (feat. rani),Samfeldt,0.318182,2019-05-24,Pop,0.498039,0.628716,0.636364,0.821136,...,174444,sam feldt,"[['one',, 'more',, 'drink,',, 'got',, 'one',, ...",Sam Feldt,https://i.scdn.co/image/ab67616d0000b27354de16...,https://open.spotify.com/track/0qc4QlcCxVTGySh...,tell need are safari like things afterparty we...,"[passionate, affectionate, dreamy, tender, lov...",tell need safari like things afterparty post s...,"[tell, need, safari, like, things, afterparty,..."
3,4PkIDTPGedm0enzdvilLNd,sixteen,Elliegoulding,0.227273,2019-04-12,Pop,0.601307,0.799335,0.727273,0.753612,...,201072,ellie goulding,"[['(sixteen)',, '',, '(sixteen)',, '',, 'do',,...",Ellie Goulding,https://i.scdn.co/image/ab67616d0000b27394181f...,https://open.spotify.com/track/4PkIDTPGedm0enz...,msn intent out life we small give of me couldn...,"[passionate, affectionate, dreamy, tender, lov...",msn intent life small give found stupid someti...,"[msn, intent, life, small, give, found, stupid..."
4,5PYQUBXc7NYeI1obMKSJK0,never really over,Katyperry,0.409091,2019-05-31,Pop,0.733333,0.886254,0.727273,0.781608,...,223523,katy perry,"[[""i'm"",, 'losing',, 'my',, 'self',, 'control'...",Katy Perry,https://i.scdn.co/image/ab67616d0000b2739bea3b...,https://open.spotify.com/track/5PYQUBXc7NYeI1o...,through doesn we think of me cross goodbye do ...,[Mixed],think cross goodbye takes mess could wan na me...,"[think, cross, goodbye, takes, mess, could, wa..."


In [31]:
df['tags_tokenized'] = df['tags_tokenized'] + df['mood']

### Creating corpus for each track

In [57]:
def create_corpus(tags):
    corpus = ""
    for tag in tags:
        corpus += tag + ' '
    return corpus.strip()
        

In [58]:
df['corpus'] = df['tags_tokenized'].apply(lambda x : create_corpus(x))

In [59]:
df['corpus'][0]

'walk life turning need things standing everywhere bring fear oh world heart inside think without feeling behind line worlds stars hanging thinking whole alone try fair hidden facing mine falling keep must higher yearning love real look see could time wasted bad yeah someone Kygo Pop Mixed'

### Vector Embeding the tags using Word2Vec

In [37]:
# Initialing the word2vec model
model = Word2Vec(df['tags_tokenized'], window=5, min_count=1, vector_size=300, workers=4)

In [38]:
# Getting the vectors for tags
def get_vector(tags_tokenkized, model):
    vectors = [model.wv[word] for word in tags_tokenkized if word in model.wv]
    return np.mean(vectors, axis=0) if vectors else np.zeros(300)

df['vectors'] = df['tags_tokenized'].apply(lambda x  : get_vector(x, model))


In [39]:
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_release_date,playlist_genre,danceability,energy,key,loudness,...,lyrics,artist_name,image_url,spotify_url,lyric_tags,mood,track_tags,tags_tokenized,corpus,vectors
0,6oJ6le65B3SEqPwMRNXWjY,higher love,Kygo,0.5,2019-06-28,Pop,0.63268,0.667346,0.727273,0.680129,...,"[['bring',, 'me',, 'higher',, 'love,',, 'love'...",Kygo,https://i.scdn.co/image/ab67616d0000b2737c8977...,https://open.spotify.com/track/6oJ6le65B3SEqPw...,are walk for life turning out need things stan...,[Mixed],walk life turning need things standing everywh...,"[walk, life, turning, need, things, standing, ...",walk life turning need things standing everywh...,"[0.039505977, 0.2558645, -0.018304028, 0.02880..."
1,3yNZ5r3LKfdmjoS3gkhUCT,bad guy (with justin bieber),Billieeilish,0.318182,2019-07-11,Pop,0.602614,0.425904,0.0,0.504094,...,"[['yeah,',, 'yeah',, '',, 'oh,',, 'ah',, '',, ...",Billie Eilish,https://i.scdn.co/image/ab67616d0000b273a69b8b...,https://open.spotify.com/track/3yNZ5r3LKfdmjoS...,mad at think of me teeth chest might criminal ...,"[passionate, affectionate, dreamy, tender, lov...",mad think teeth chest might criminal snow know...,"[mad, think, teeth, chest, might, criminal, sn...",mad think teeth chest might criminal snow know...,"[0.048682086, 0.2147804, -0.01664445, 0.003471..."
2,0qc4QlcCxVTGyShurEv1UU,post malone (feat. rani),Samfeldt,0.318182,2019-05-24,Pop,0.498039,0.628716,0.636364,0.821136,...,"[['one',, 'more',, 'drink,',, 'got',, 'one',, ...",Sam Feldt,https://i.scdn.co/image/ab67616d0000b27354de16...,https://open.spotify.com/track/0qc4QlcCxVTGySh...,tell need are safari like things afterparty we...,"[passionate, affectionate, dreamy, tender, lov...",tell need safari like things afterparty post s...,"[tell, need, safari, like, things, afterparty,...",tell need safari like things afterparty post s...,"[0.063285105, 0.24972455, -0.0179446, -0.02168..."
3,4PkIDTPGedm0enzdvilLNd,sixteen,Elliegoulding,0.227273,2019-04-12,Pop,0.601307,0.799335,0.727273,0.753612,...,"[['(sixteen)',, '',, '(sixteen)',, '',, 'do',,...",Ellie Goulding,https://i.scdn.co/image/ab67616d0000b27394181f...,https://open.spotify.com/track/4PkIDTPGedm0enz...,msn intent out life we small give of me couldn...,"[passionate, affectionate, dreamy, tender, lov...",msn intent life small give found stupid someti...,"[msn, intent, life, small, give, found, stupid...",msn intent life small give found stupid someti...,"[0.054347746, 0.23894775, -0.017965272, -0.006..."
4,5PYQUBXc7NYeI1obMKSJK0,never really over,Katyperry,0.409091,2019-05-31,Pop,0.733333,0.886254,0.727273,0.781608,...,"[[""i'm"",, 'losing',, 'my',, 'self',, 'control'...",Katy Perry,https://i.scdn.co/image/ab67616d0000b2739bea3b...,https://open.spotify.com/track/5PYQUBXc7NYeI1o...,through doesn we think of me cross goodbye do ...,[Mixed],think cross goodbye takes mess could wan na me...,"[think, cross, goodbye, takes, mess, could, wa...",think cross goodbye takes mess could wan na me...,"[0.035179295, 0.27692842, -0.024532344, 0.0429..."


### Taking out numerical cols for scaling

In [41]:
num_cols = [col for col in df.columns if df[col].dtype != 'O']
print(num_cols)

['track_popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'liveness', 'valence', 'tempo', 'duration_ms']


In [42]:
# Removing duration_ms as it may mislead scaling of the entire dataframe
num_cols.remove('duration_ms')

### Scaling the dataframe using MinMaxScaler

In [43]:
# initalizing the scaler
scaler = MinMaxScaler()

In [44]:
# Scaling the num cols
df[num_cols] = scaler.fit_transform(df[num_cols])

In [45]:
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_release_date,playlist_genre,danceability,energy,key,loudness,...,lyrics,artist_name,image_url,spotify_url,lyric_tags,mood,track_tags,tags_tokenized,corpus,vectors
0,6oJ6le65B3SEqPwMRNXWjY,higher love,Kygo,0.5,2019-06-28,Pop,0.63268,0.667346,0.727273,0.680129,...,"[['bring',, 'me',, 'higher',, 'love,',, 'love'...",Kygo,https://i.scdn.co/image/ab67616d0000b2737c8977...,https://open.spotify.com/track/6oJ6le65B3SEqPw...,are walk for life turning out need things stan...,[Mixed],walk life turning need things standing everywh...,"[walk, life, turning, need, things, standing, ...",walk life turning need things standing everywh...,"[0.039505977, 0.2558645, -0.018304028, 0.02880..."
1,3yNZ5r3LKfdmjoS3gkhUCT,bad guy (with justin bieber),Billieeilish,0.318182,2019-07-11,Pop,0.602614,0.425904,0.0,0.504094,...,"[['yeah,',, 'yeah',, '',, 'oh,',, 'ah',, '',, ...",Billie Eilish,https://i.scdn.co/image/ab67616d0000b273a69b8b...,https://open.spotify.com/track/3yNZ5r3LKfdmjoS...,mad at think of me teeth chest might criminal ...,"[passionate, affectionate, dreamy, tender, lov...",mad think teeth chest might criminal snow know...,"[mad, think, teeth, chest, might, criminal, sn...",mad think teeth chest might criminal snow know...,"[0.048682086, 0.2147804, -0.01664445, 0.003471..."
2,0qc4QlcCxVTGyShurEv1UU,post malone (feat. rani),Samfeldt,0.318182,2019-05-24,Pop,0.498039,0.628716,0.636364,0.821136,...,"[['one',, 'more',, 'drink,',, 'got',, 'one',, ...",Sam Feldt,https://i.scdn.co/image/ab67616d0000b27354de16...,https://open.spotify.com/track/0qc4QlcCxVTGySh...,tell need are safari like things afterparty we...,"[passionate, affectionate, dreamy, tender, lov...",tell need safari like things afterparty post s...,"[tell, need, safari, like, things, afterparty,...",tell need safari like things afterparty post s...,"[0.063285105, 0.24972455, -0.0179446, -0.02168..."
3,4PkIDTPGedm0enzdvilLNd,sixteen,Elliegoulding,0.227273,2019-04-12,Pop,0.601307,0.799335,0.727273,0.753612,...,"[['(sixteen)',, '',, '(sixteen)',, '',, 'do',,...",Ellie Goulding,https://i.scdn.co/image/ab67616d0000b27394181f...,https://open.spotify.com/track/4PkIDTPGedm0enz...,msn intent out life we small give of me couldn...,"[passionate, affectionate, dreamy, tender, lov...",msn intent life small give found stupid someti...,"[msn, intent, life, small, give, found, stupid...",msn intent life small give found stupid someti...,"[0.054347746, 0.23894775, -0.017965272, -0.006..."
4,5PYQUBXc7NYeI1obMKSJK0,never really over,Katyperry,0.409091,2019-05-31,Pop,0.733333,0.886254,0.727273,0.781608,...,"[[""i'm"",, 'losing',, 'my',, 'self',, 'control'...",Katy Perry,https://i.scdn.co/image/ab67616d0000b2739bea3b...,https://open.spotify.com/track/5PYQUBXc7NYeI1o...,through doesn we think of me cross goodbye do ...,[Mixed],think cross goodbye takes mess could wan na me...,"[think, cross, goodbye, takes, mess, could, wa...",think cross goodbye takes mess could wan na me...,"[0.035179295, 0.27692842, -0.024532344, 0.0429..."


### Getting the num col vectors values

In [46]:
num_col_vectors = df[num_cols].values


### Combining tags vector and num col vectors

In [47]:
def combined_vector(row, num_col_vectors):
    vectors = np.array(row['vectors'])
    return np.concatenate([vectors, num_col_vectors])

In [48]:
df['combined_vector'] = [combined_vector(row, num_col_vectors[i]) for i, row in df.iterrows()]

In [67]:
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_release_date,playlist_genre,danceability,energy,key,loudness,...,artist_name,image_url,spotify_url,lyric_tags,mood,track_tags,tags_tokenized,corpus,vectors,combined_vector
0,6oJ6le65B3SEqPwMRNXWjY,higher love,Kygo,0.5,2019-06-28,Pop,0.63268,0.667346,0.727273,0.680129,...,Kygo,https://i.scdn.co/image/ab67616d0000b2737c8977...,https://open.spotify.com/track/6oJ6le65B3SEqPw...,are walk for life turning out need things stan...,[Mixed],walk life turning need things standing everywh...,"[walk, life, turning, need, things, standing, ...",walk life turning need things standing everywh...,"[0.039505977, 0.2558645, -0.018304028, 0.02880...","[0.0395059771835804, 0.2558645009994507, -0.01..."
1,3yNZ5r3LKfdmjoS3gkhUCT,bad guy (with justin bieber),Billieeilish,0.318182,2019-07-11,Pop,0.602614,0.425904,0.0,0.504094,...,Billie Eilish,https://i.scdn.co/image/ab67616d0000b273a69b8b...,https://open.spotify.com/track/3yNZ5r3LKfdmjoS...,mad at think of me teeth chest might criminal ...,"[passionate, affectionate, dreamy, tender, lov...",mad think teeth chest might criminal snow know...,"[mad, think, teeth, chest, might, criminal, sn...",mad think teeth chest might criminal snow know...,"[0.048682086, 0.2147804, -0.01664445, 0.003471...","[0.048682086169719696, 0.21478040516376495, -0..."
2,0qc4QlcCxVTGyShurEv1UU,post malone (feat. rani),Samfeldt,0.318182,2019-05-24,Pop,0.498039,0.628716,0.636364,0.821136,...,Sam Feldt,https://i.scdn.co/image/ab67616d0000b27354de16...,https://open.spotify.com/track/0qc4QlcCxVTGySh...,tell need are safari like things afterparty we...,"[passionate, affectionate, dreamy, tender, lov...",tell need safari like things afterparty post s...,"[tell, need, safari, like, things, afterparty,...",tell need safari like things afterparty post s...,"[0.063285105, 0.24972455, -0.0179446, -0.02168...","[0.06328510493040085, 0.24972455203533173, -0...."
3,4PkIDTPGedm0enzdvilLNd,sixteen,Elliegoulding,0.227273,2019-04-12,Pop,0.601307,0.799335,0.727273,0.753612,...,Ellie Goulding,https://i.scdn.co/image/ab67616d0000b27394181f...,https://open.spotify.com/track/4PkIDTPGedm0enz...,msn intent out life we small give of me couldn...,"[passionate, affectionate, dreamy, tender, lov...",msn intent life small give found stupid someti...,"[msn, intent, life, small, give, found, stupid...",msn intent life small give found stupid someti...,"[0.054347746, 0.23894775, -0.017965272, -0.006...","[0.054347746074199677, 0.23894774913787842, -0..."
4,5PYQUBXc7NYeI1obMKSJK0,never really over,Katyperry,0.409091,2019-05-31,Pop,0.733333,0.886254,0.727273,0.781608,...,Katy Perry,https://i.scdn.co/image/ab67616d0000b2739bea3b...,https://open.spotify.com/track/5PYQUBXc7NYeI1o...,through doesn we think of me cross goodbye do ...,[Mixed],think cross goodbye takes mess could wan na me...,"[think, cross, goodbye, takes, mess, could, wa...",think cross goodbye takes mess could wan na me...,"[0.035179295, 0.27692842, -0.024532344, 0.0429...","[0.035179294645786285, 0.2769284248352051, -0...."


In [50]:
df.to_csv("combined_vector.csv", index=False)

### Stacking up Combined Vectors to get a matrix

In [51]:
combined_vector_matrix = np.vstack(df['combined_vector'].values)

In [52]:
combined_vector_matrix.shape

(900, 311)

### Perfoming Cosine Similarity on combined vector matrix

In [53]:
similarity_matrix = cosine_similarity(combined_vector_matrix, combined_vector_matrix)

In [54]:
similarity_matrix.shape

(900, 900)

### Building the Recommendation Engine

In [68]:
def recommend_by_embedding_similarity(input_text, df, top_n=10):
    # Combine tags + lyrics to form the corpus
    total_corpus = df['corpus'].tolist()
    total_corpus.insert(0, input_text)  # Add input text as the first entry

    # TF-IDF Vectorization
    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(total_corpus).toarray()

    # Compute cosine similarity: input vs. all others
    input_vector = tfidf_matrix[0].reshape(1, -1)
    corpus_vectors = tfidf_matrix[1:]

    similarities = cosine_similarity(input_vector, corpus_vectors)[0]
    top_indices = similarities.argsort()[-top_n:][::-1]

    # Collect top results
    recommendations = df.iloc[top_indices][['track_name', 'artist_name', 'spotify_url', 'image_url']]
    recommendations['similarity_score'] = similarities[top_indices]
    return recommendations



In [69]:
# Run the recommender
recommendations = recommend_by_embedding_similarity("I feel like dancing", df)

In [70]:
recommendations

Unnamed: 0,track_name,artist_name,spotify_url,image_url,similarity_score
121,cheap thrills,Sia,https://open.spotify.com/track/27SdWb2rFzO6GWi...,https://i.scdn.co/image/ab67616d0000b27349e013...,0.193469
66,dancing with a stranger (with normani),Sam Smith,https://open.spotify.com/track/6Qs4SXO9dwPj5GK...,https://i.scdn.co/image/ab67616d0000b2733b52ec...,0.149983
403,there's no way (feat. julia michaels),Lauv,https://open.spotify.com/track/2hnxrRNzF74mdDz...,https://i.scdn.co/image/ab67616d0000b273b67fed...,0.143241
18,solo (feat. demi lovato),Clean Bandit,https://open.spotify.com/track/6kPJZM97LwdG9QI...,https://i.scdn.co/image/ab67616d0000b27367eda2...,0.141781
154,don't start now,Dua Lipa,https://open.spotify.com/track/6WrI0LAC5M1Rw2M...,https://i.scdn.co/image/ab67616d0000b273c35ea6...,0.132466
298,water fountain,Alec Benjamin,https://open.spotify.com/track/4IhKLu7Vk3j2TLm...,https://i.scdn.co/image/ab67616d0000b273459d67...,0.129259
402,younger,Jonas Blue,https://open.spotify.com/track/4D7ERaKgv8NAeck...,https://i.scdn.co/image/ab67616d0000b273200f52...,0.129194
12,no sleep (feat. bonn),Martin Garrix,https://open.spotify.com/track/1ahVFh0ViDZr8Lv...,https://i.scdn.co/image/ab67616d0000b273873e1c...,0.120478
22,promises (with sam smith),Calvin Harris,https://open.spotify.com/track/5N5k9nd479b1xpD...,https://i.scdn.co/image/ab67616d0000b273ccdcbd...,0.117815
261,2002,Anne-Marie,https://open.spotify.com/track/2BgEsaKNfHUdlh9...,https://i.scdn.co/image/ab67616d0000b27338aae7...,0.115847


In [72]:
df.to_csv("vibetune.csv", index=False)