In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [5]:
df=pd.read_csv('songdata.csv')
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [6]:
df.shape

(57650, 4)

In [8]:
df=df.sample(n=500)

In [9]:
df.drop(columns=['link'],axis=1)

Unnamed: 0,artist,song,text
13000,Michael Buble,Stardust,And now the purple dusk of twilight time \nSt...
45439,Neil Young,Human Highway,I come down from the misty mountain \nI got l...
33130,Free,Travellin' In Style,Well the train I ride \nIs leaving the statio...
52507,Styx,The Grove Of Eglantine,Hey you there \nCast an eye this way \nYou w...
34351,Gordon Lightfoot,Stay Loose,If you've got a heartache \nOf the kind that ...
...,...,...,...
3526,Creedence Clearwater Revival,Looking Out My Back Door,Just got home from Illinois locked the front d...
28860,David Allan Coe,Love Is Just A Porpoise (Playing In The Tropic...,Everybody knows that \nI've had my share of f...
37470,Janis Joplin,Ball And Chain ( In Album Monterey Internation...,"We got one more song, uh, it's called Love Is ..."
52972,The Script,Give The Love Around,"To your brother, to your sister, to your misse..."


In [10]:
df=df.drop(columns=['link'],axis=1).reset_index(drop=True)

In [11]:
df['artist']

Unnamed: 0,artist
0,Michael Buble
1,Neil Young
2,Free
3,Styx
4,Gordon Lightfoot
...,...
495,Creedence Clearwater Revival
496,David Allan Coe
497,Janis Joplin
498,The Script


In [12]:
df['song']

Unnamed: 0,song
0,Stardust
1,Human Highway
2,Travellin' In Style
3,The Grove Of Eglantine
4,Stay Loose
...,...
495,Looking Out My Back Door
496,Love Is Just A Porpoise (Playing In The Tropic...
497,Ball And Chain ( In Album Monterey Internation...
498,Give The Love Around


**Data** **Cleaning**

In [13]:
df['song'][0]

'Stardust'

In [14]:
df['text'][0]

"And now the purple dusk of twilight time  \nSteals across the meadows of my heart  \nHigh up in the sky the little stars climb  \nAlways reminding me that we're apart  \n  \nYou wandered down the lane and far away  \nLeaving me a song that would not die  \nLove is now the stardust of yesterday  \nThe music of the years gone by  \n  \nSometimes I wonder why I spend  \nThe lonely nights dreaming of a song  \nThe melody haunts my reverie  \nAnd I am once again with you  \n  \nWhen our love was new  \nAnd each kiss an inspiration  \nBut that was long ago  \nAnd now my consolation  \nIs in the stardust of a song  \n  \nAnd beside a garden wall  \nWhen stars are bright  \nYou were in my arms  \nNightingale tells it's fairy tale  \nOf paradise where roses grew  \n  \nThough I dream in vain  \nIn my heart it always will remain  \nMy stardust melody  \nThe memory of love's refrain  \n  \nWhen our love was new  \nAnd each kiss an inspiration  \nOh, but that was long ago  \nAnd now my consolatio

In [15]:
df['text']=df['text'].str.lower().str.replace(r'[^a-zA-Z0-9]','').replace(r'\n',' ',regex=True)

In [16]:
df['text'][0]

"and now the purple dusk of twilight time   steals across the meadows of my heart   high up in the sky the little stars climb   always reminding me that we're apart      you wandered down the lane and far away   leaving me a song that would not die   love is now the stardust of yesterday   the music of the years gone by      sometimes i wonder why i spend   the lonely nights dreaming of a song   the melody haunts my reverie   and i am once again with you      when our love was new   and each kiss an inspiration   but that was long ago   and now my consolation   is in the stardust of a song      and beside a garden wall   when stars are bright   you were in my arms   nightingale tells it's fairy tale   of paradise where roses grew      though i dream in vain   in my heart it always will remain   my stardust melody   the memory of love's refrain      when our love was new   and each kiss an inspiration   oh, but that was long ago   and now my consolation   is in the stardust of a song   

**Tokenization**

In [17]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)


In [18]:
tokenization('this is my project')

'thi is my project'

In [19]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [20]:
df['text']

Unnamed: 0,text
0,and now the purpl dusk of twilight time steal ...
1,i come down from the misti mountain i got lost...
2,well the train i ride is leav the station and ...
3,hey you there cast an eye thi way you with tha...
4,if you 've got a heartach of the kind that wil...
...,...
495,just got home from illinoi lock the front door...
496,everybodi know that i 've had my share of fair...
497,"we got one more song , uh , it 's call love is..."
498,"to your brother , to your sister , to your mis..."


In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [22]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [23]:
similarity[0]

array([1.00000000e+00, 0.00000000e+00, 2.07985232e-02, 2.21594525e-02,
       3.44389876e-02, 1.88021246e-02, 2.02312000e-02, 9.25017806e-03,
       2.62236542e-02, 3.71948890e-02, 2.42868507e-03, 1.45165185e-02,
       1.86863671e-02, 4.43483111e-03, 4.25409841e-02, 9.40531088e-02,
       2.25343235e-03, 4.89028290e-02, 2.59391038e-03, 3.55738716e-02,
       4.79368198e-03, 8.64595185e-02, 1.00215905e-02, 1.34727909e-01,
       3.50254214e-02, 4.11132630e-02, 8.82874079e-02, 9.17226413e-04,
       8.69597121e-02, 8.53316230e-03, 3.10316615e-02, 2.47580989e-02,
       7.27961420e-03, 2.22102925e-02, 7.99091207e-02, 1.04619325e-01,
       3.02618486e-02, 4.59088383e-02, 1.44808523e-01, 5.59457300e-02,
       6.57930710e-02, 5.27261675e-02, 7.21210149e-02, 1.62278231e-02,
       1.12285230e-02, 3.58474371e-03, 4.06934702e-02, 4.47493382e-02,
       3.67357228e-02, 5.47731295e-02, 5.45644252e-02, 7.02801869e-03,
       8.68317296e-03, 1.83753042e-02, 2.22423078e-02, 1.51146219e-02,
      

In [26]:
df[df['song']=='Release your Love']

Unnamed: 0,artist,song,text


**Recommendation System**

In [40]:
def recommendation(song_df):
    # Check if the song exists in the DataFrame
    if df[df['song'] == song_df].empty:
        return f"Song {song_df} not found in the dataset."

    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])

    songs = []
    for m_id in distances[1:21]:#user recommend 20 songs
        songs.append(df.iloc[m_id[0]].song)

    return songs

In [43]:
recommendation('Stardust')

['And I Love Her',
 'Always Now',
 'Rose Rose I Love You',
 'Even Then',
 'But You Know I Love You',
 'Going, Going, Gone',
 'The Arms Of The One Who Loves You',
 'All Over The World',
 'The Seer',
 'Live For The One I Love',
 'Never Get Over You',
 'I Can Almost Hear Her Wings',
 'I Miss You So Much',
 'La Vie En Rose',
 'Just Out Of Reach (Of My Two Empty Arms)',
 'At Last',
 'The Carnival Is Over',
 'Faith',
 'Old Tin Star',
 'I Want To Spend My Lifetime Loving You']

In [44]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))