#### Importing Libraries 

In [110]:
#Data Manipulation 
#===============================================
import numpy as np 
import pandas as pd 
#Regex 
#===============================================
import re 
#NLTK
#===============================================
import nltk
from nltk.stem.porter import PorterStemmer
#nltk.download('punkt')

#Vectorizer
#===============================================
from sklearn.feature_extraction.text import TfidfVectorizer

#similarity_score
#===============================================
from sklearn.metrics.pairwise import cosine_similarity

#Pickel
#===============================================
import pickle

#### Read Data

In [89]:
df=pd.read_csv('E:/My_Project/CodeAlpha/Task 1/My_Data/spotify_millsongdata.csv')

In [90]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [91]:
df.tail()

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [92]:
df.shape

(57650, 4)

In [93]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [94]:
df=df.sample(10000).drop(columns=['link'],axis=1).reset_index(drop=True)

In [95]:
df.head()

Unnamed: 0,artist,song,text
0,Snoop Dogg,I'm Threw Witchu,"[Snoop Dogg] \r\nYeah man, this is another 9 ..."
1,Bruno Mars,Madly In Love With You,Madly In Love With You \r\n \r\nSee you down...
2,Chicago,Christmas Time Is Here,Christmas time is here \r\nHappiness and chee...
3,Boney M.,"Young, Free And Single",Radio speaker: (not on album version) \r\nWel...
4,INXS,Doctor,We're all running fast tonight \r\nRocket sho...


In [96]:
df['text'][0]

"[Snoop Dogg]  \r\nYeah man, this is another 9 inch dick classic  \r\nWe want to dedicate this one, to all the fellas  \r\nWho got a hard head bitch, who just won't listen  \r\nWhy'know? The best thing to do, is leave her  \r\nDon't hit her, shake that ho (bitch I'm gone!)  \r\n  \r\n[Chorus: Soopafly]  \r\nBitch I'm gone, I can't take it no mo'  \r\nI can't get through to you  \r\nThere's nothing I can say that you seem to understand  \r\nBitch I'm gone, I'm out oh yeah yeah yeah  \r\nI'm through witchu  \r\nOh I'm through witchu, yeah  \r\n  \r\n[Soopafly]  \r\nI knew that you, was a ho back then  \r\nAnd bitch you're still one now  \r\nYa like to bust, nuts in your face  \r\nBut with that big-ass mouth  \r\nYou need to shut the fuck up, you talk too much  \r\nI told you once before, yeah  \r\nThat pimping don't, like to tell you twice  \r\nSo I'm walking out that door  \r\n  \r\n[Chorus]  \r\n  \r\n[Soopafly]  \r\n(Bitch I'm gone) Bitch I'm through witchu  \r\nI'm through witchu, ca


#### Text Preprocessing 

##### Extract the Lower Case and apply some regex 

In [97]:
df['text']=df['text'].str.lower().replace(r'^\w\s','').replace(r'\n',' ',regex=True)#\w : word \s : space 

In [98]:
df.tail()

Unnamed: 0,artist,song,text
9995,Loretta Lynn,Get What'cha Got And Go,a pretty boy charlie's the name that you've be...
9996,Stevie Wonder,Part-Time Lover,"call up, ring once, hang up the phone \r to l..."
9997,Neil Young,Barstool Blues,if i could hold on \r to just one thought \r...
9998,Vince Gill,Don't Pretend With Me,"they say you'll lie, they say you'll cheat \r..."
9999,Weezer,In The Garage,i've got a dungeon master's guide \r i've got...


#### Steamming and Text Vectorization  

In [99]:
stemmer=PorterStemmer()


In [100]:
def token(txt):
    token=nltk.word_tokenize(txt)
    a=[stemmer.stem(w) for w in token]
    return " ".join(a)

In [101]:
df['text']=df['text'].apply(lambda x:token(x))

In [102]:
tfid=TfidfVectorizer(analyzer='word',stop_words='english')

In [103]:
matrix=tfid.fit_transform(df['text'])

In [104]:
similar=cosine_similarity(matrix)

In [105]:
similar[0]

array([1.        , 0.02933386, 0.0091598 , ..., 0.03304134, 0.03433584,
       0.0660733 ])

In [107]:
df[df['song']=='Part-Time Lover'].index[0]

9996

##### Recommender Function  

```python
distance = sorted(
    list(enumerate(similar[idx])),
    reverse=True,
    key=lambda x: x[1]
)

The line of code calculates the similarity distance between the input song and all other songs in the dataset. 

1. `similar` seems to be a 2D array or a list of lists, where `similar[idx]` retrieves the similarity scores of the input song with all other songs. 

2. The `enumerate` function adds a counter to an iterable and returns it as an enumerate object. In this case, it enumerates over the similarity scores.

3. The `sorted` function then sorts these similarities in descending order. The `reverse=True` parameter indicates that the sorting should be in descending order.

4. The `key=lambda x: x[1]` parameter specifies that the sorting should be based on the second element of each tuple within the list (which represents the similarity score).


In [108]:
def recommender(song_name):
    idx=df[df['song']==song_name].index[0] #### check the id of each song 
    distance=sorted(list(enumerate(similar[idx])) , reverse=True, key=lambda x:x[1]) #### Distance Calculation 
    song=[]
    for s_id in distance[1:5]:     ### see the top 5 
        song.append(df.iloc[s_id[0]].song)
    return song    


In [109]:
recommender('Part-Time Lover')

['Lover Come Back', 'Like Lovers Do', 'No Money Down', 'Lovers On The Sun']

In [111]:
pickle.dump(similar,open("similarity",'wb'))

In [112]:
pickle.dump(df,open("df",'wb'))