# music recommendation system

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('songdata.csv')

In [3]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df = df[['artist','song','text']]

In [5]:
df.shape

(57650, 3)

In [6]:
df.head()

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante","Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...


In [13]:
df = df.sample(n=5000,).reset_index(drop=True)

In [14]:
df.head()

Unnamed: 0,artist,song,text
0,Erasure,The Soldier's Return,Slow the years go by \nThey stole your man of...
1,Chuck Berry,Jo Jo Gunne,"It was in ancient history, four thousand B.C. ..."
2,Bing Crosby,It Had To Be You,Why do I do just as you say? \nWhy must I jus...
3,Steely Dan,Let George Do It,What's this fierce new change? \nHas your jug...
4,John Waite,I'm So Lonesome I Could Cry,Hear the lonesome whippoorwill \nHe sounds to...


In [15]:
df['text'][0]

"Slow the years go by  \nThey stole your man off to war  \nWill you see the child  \nAs you sing battle songs?  \n  \nAnd it pains me  \nTo think of the soldier's plight  \nI will pray  \nFor your god to guide you home  \n  \nAnd she cries for the soldier's return  \nDespair at feeling alone for so long  \nBy the light of the candle that burns  \nFor his life for the day of the soldier's return  \n  \nHear the cannon fire  \nLost voices echo in the night  \nSee the spoils of war  \nYoung men are falling, ooh  \n  \nHear the drummer  \nMarching over the hill  \nLove returning  \nAnd the graves of the battle lie still  \n  \nAnd she cries for the soldier's return  \nDespair at feeling alone for so long  \nBy the light of the candle that burns  \nFor his life for the day of the soldier's return  \n  \nHear the drummer  \nMarching over the hill  \nLove returning  \nAnd the graves of the battle lie still  \n  \nAnd she cries for the soldier's return  \nDespair at feeling alone for so long  

In [18]:
df['text'] = df['text'].str.lower().replace(r'\w\s','').replace(r'\n','',regex=True)

In [19]:
df['text']

0       slow the years go by  they stole your man off ...
1       it was in ancient history, four thousand b.c. ...
2       why do i do just as you say?  why must i just ...
3       what's this fierce new change?  has your jugge...
4       hear the lonesome whippoorwill  he sounds too ...
                              ...                        
4995    tonight's the night we're gonna make it happen...
4996    go woman go go woman go  there you stand to te...
4997    she knows about me  i heard about your secret ...
4998    i remember when  i was five and you were ten, ...
4999    i love you honey but i hate your friends  i lo...
Name: text, Length: 5000, dtype: object

In [21]:
import string
punct = string.punctuation

In [22]:
def remove_punc(text):
    return text.translate(str.maketrans('','',punct))

In [24]:
df['text'] = df['text'].apply(remove_punc)

# spell correct

In [25]:
from textblob import TextBlob

In [29]:
from nltk.stem.porter import PorterStemmer

In [30]:
ps = PorterStemmer()

In [31]:
def stem_words(text):
    return " ".join([ps.stem(word) for word in text.split()])

In [32]:
df['text'] = df['text'].apply(stem_words)

In [33]:
df['text']

0       slow the year go by they stole your man off to...
1       it wa in ancient histori four thousand bc back...
2       whi do i do just as you say whi must i just gi...
3       what thi fierc new chang ha your juggernaut re...
4       hear the lonesom whippoorwil he sound too blue...
                              ...                        
4995    tonight the night were gonna make it happen to...
4996    go woman go go woman go there you stand to tel...
4997    she know about me i heard about your secret li...
4998    i rememb when i wa five and you were ten boy y...
4999    i love you honey but i hate your friend i love...
Name: text, Length: 5000, dtype: object

In [34]:
from nltk.tokenize import word_tokenize,sent_tokenize

In [35]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [36]:
def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    return " ".join(tokens)

In [39]:
df['text'].apply(lambda x:tokenization(x))

0       slow the year go by they stole your man off to...
1       it wa in ancient histori four thousand bc back...
2       whi do i do just as you say whi must i just gi...
3       what thi fierc new chang ha your juggernaut re...
4       hear the lonesom whippoorwil he sound too blue...
                              ...                        
4995    tonight the night were gon na make it happen t...
4996    go woman go go woman go there you stand to tel...
4997    she know about me i heard about your secret li...
4998    i rememb when i wa five and you were ten boy y...
4999    i love you honey but i hate your friend i love...
Name: text, Length: 5000, dtype: object

In [38]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)


In [40]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [41]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [42]:
similarity[0]

array([1.        , 0.01185734, 0.0114893 , ..., 0.01240307, 0.05243931,
       0.02486694])

In [43]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [45]:
recommendation('Goodbye Highway')

['Goodbye Girl',
 'Goodbye',
 "It's Just Good-Bye",
 'Never Can Say Goodbye',
 'Friends Never Say Goodbye',
 'Goodbye Girl',
 'Hello Goodbye',
 'Goodbye Again',
 'Another Last Goodbye',
 'Down To Our Last Goodbye',
 'You Are Not Alone',
 "You're Kind",
 'Key To The Highway',
 'Alma Mater',
 "Rockin' Down The Highway",
 'Key To The Highway',
 'Mother',
 "Don't Say Goodbye Again",
 'Highway 51',
 'Say Goodbye']

In [46]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))