# Music Recommendation System

## Import Libraries

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Load & Read the Data

In [2]:
ds=pd.read_csv('spotify_millsongdata.csv')

In [3]:
ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57650 entries, 0 to 57649
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   artist  57650 non-null  object
 1   song    57650 non-null  object
 2   link    57650 non-null  object
 3   text    57650 non-null  object
dtypes: object(4)
memory usage: 1.8+ MB


In [4]:
ds.shape

(57650, 4)

In [5]:
ds.head(3)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...


In [6]:
ds.tail(3)

Unnamed: 0,artist,song,link,text
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [7]:
ds=ds.drop_duplicates()

In [8]:
ds.shape

(57650, 4)

In [9]:
ds.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [10]:
ds_2 = ds.sample(20000).drop(['link'], axis=1).reset_index(drop=True)

In [11]:
ds

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...
...,...,...,...,...
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...


## Data Preprocessing

In [12]:
ds_2

Unnamed: 0,artist,song,text
0,Erasure,Video Killed The Radio Star,I heard you on the wireless back in '52 \r\nL...
1,Primus,Green Ranger,I took a ride with the Green Ranger \r\nHe st...
2,Red Hot Chili Peppers,Get Up And Jump,"Get up and jump, get up and jump, get up, get ..."
3,Yello,Koladi-Ola,Koladi-Ola \r\nKoladi-Ola \r\n \r\nI'm gonn...
4,Bonnie Raitt,I Can't Make You Love Me,Turn down the lights \r\nTurn down the bed \...
...,...,...,...
19995,Michael Bolton,How Can We Be Lovers If We Can't Be Friends,How can we be lovers if we can't be friends? ...
19996,Paul McCartney,Dance Tonight,Everybody gonna dance tonight \r\nEverybody g...
19997,Celine Dion,I Love You,I must be crazy now \r\nMaybe I dream too muc...
19998,Lionel Richie,Reason To Believe,Everybody's talking loud \r\nTryin' to get me...


In [13]:
ds_2['song']

0                        Video Killed The Radio Star
1                                       Green Ranger
2                                    Get Up And Jump
3                                         Koladi-Ola
4                           I Can't Make You Love Me
                            ...                     
19995    How Can We Be Lovers If We Can't Be Friends
19996                                  Dance Tonight
19997                                     I Love You
19998                              Reason To Believe
19999                           Lord Byron's Luggage
Name: song, Length: 20000, dtype: object

In [14]:
ds_2['text']

0        I heard you on the wireless back in '52  \r\nL...
1        I took a ride with the Green Ranger  \r\nHe st...
2        Get up and jump, get up and jump, get up, get ...
3        Koladi-Ola  \r\nKoladi-Ola  \r\n  \r\nI'm gonn...
4        Turn down the lights  \r\nTurn down the bed  \...
                               ...                        
19995    How can we be lovers if we can't be friends?  ...
19996    Everybody gonna dance tonight  \r\nEverybody g...
19997    I must be crazy now  \r\nMaybe I dream too muc...
19998    Everybody's talking loud  \r\nTryin' to get me...
19999    Lord Byron had a lot of luggage  \r\nHe took i...
Name: text, Length: 20000, dtype: object

In [15]:
import re

In [16]:
def clean_text(text):
    text = text.lower()  
    text = re.sub(r'\s+', ' ', text)  
    text = re.sub(r'\n', ' ', text)  
    return text

In [17]:
ds_2['text'] = ds_2['text'].apply(clean_text)

In [18]:
ds_2['text']

0        i heard you on the wireless back in '52 lying ...
1        i took a ride with the green ranger he stepped...
2        get up and jump, get up and jump, get up, get ...
3        koladi-ola koladi-ola i'm gonna making you wan...
4        turn down the lights turn down the bed turn do...
                               ...                        
19995    how can we be lovers if we can't be friends? h...
19996    everybody gonna dance tonight everybody gonna ...
19997    i must be crazy now maybe i dream too much but...
19998    everybody's talking loud tryin' to get me to j...
19999    lord byron had a lot of luggage he took it whe...
Name: text, Length: 20000, dtype: object

In [19]:
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\notsu\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [20]:
Steamer = PorterStemmer()
def tokenize(input_text):
    tokens = word_tokenize(input_text)  
    stemmed_tokens = [Steamer.stem(token) for token in tokens]  
    return " ".join(stemmed_tokens)

In [21]:
ds_2['text'] = ds_2['text'].apply(tokenize)

In [22]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [23]:
tf_id=TfidfVectorizer(analyzer='word',stop_words='english')

In [24]:
transformer =tf_id.fit_transform(ds_2['text'])

In [25]:
transformer

<20000x34916 sparse matrix of type '<class 'numpy.float64'>'
	with 1101464 stored elements in Compressed Sparse Row format>

In [26]:
from sklearn.metrics.pairwise import cosine_similarity

In [27]:
similarty = cosine_similarity(transformer)

In [28]:
similarty

array([[1.        , 0.01052689, 0.00552069, ..., 0.01859936, 0.01023498,
        0.00860714],
       [0.01052689, 1.        , 0.00429076, ..., 0.01938396, 0.04122788,
        0.02120552],
       [0.00552069, 0.00429076, 1.        , ..., 0.02434142, 0.0219603 ,
        0.02233728],
       ...,
       [0.01859936, 0.01938396, 0.02434142, ..., 1.        , 0.10769113,
        0.11117112],
       [0.01023498, 0.04122788, 0.0219603 , ..., 0.10769113, 1.        ,
        0.0908156 ],
       [0.00860714, 0.02120552, 0.02233728, ..., 0.11117112, 0.0908156 ,
        1.        ]])

In [29]:
def recommender(text):
    
    text_vector = tf_id.transform([text])
    
    cosine_similarities = cosine_similarity(text_vector, transformer).flatten()
     
    similar_indices = cosine_similarities.argsort()[-11:][::-1]  
    
    song_names = ds_2.iloc[similar_indices[1:]]['song'].tolist()  
    return song_names

In [36]:
input_text = input("Enter Some Text: ")
recommendations = recommender(input_text)
print(recommendations)

Enter Some Text:  Boom


['A Place To Crash', 'Super Bass', 'Endless Love', 'Solsbury Hill', 'Boom Boom', 'Power', 'My Man', 'Hoedown Throwdown', 'Chick-A-Boom', 'Bee Song']


## Files

In [33]:
import pickle

In [34]:
pickle.dump(similarty,open("similarty","wb"))

In [35]:
pickle.dump(ds_2,open("ds_2","wb"))