In [31]:
import pandas as pd
import nltk

In [32]:
df = pd.read_csv("spotify_millsongdata.csv")

In [33]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [34]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [35]:
df.shape

(57650, 4)

In [36]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [37]:
df =df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [38]:
df.head(10)

Unnamed: 0,artist,song,text
0,Kenny Rogers,I Don't Wanna Know Why,I don't wanna know why \r\n \r\nI took your ...
1,Spandau Ballet,Code Of Love,He put so much into her life \r\nShe took so ...
2,Backstreet Boys,One Phone Call,Hard to believe it \r\nIt's almost a year sin...
3,Dolly Parton,9 To 5,Tumble out of bed \r\nAnd stumble to the kitc...
4,Radiohead,Talk Show Host,I want to \r\nI want to be someone else or I'...
5,Johnny Cash,Field Of Diamonds,"Field of diamonds in the sky, worlds are whirl..."
6,Ace Of Base,"Hallo, Hallo",Hello hello ahh... \r\nHello hello ahh... \r...
7,Dave Matthews Band,Looking At You,If I had the world to make a day for you \r\n...
8,Deep Purple,MTV,I was driving through the night \r\nInto an e...
9,Europe,Last Look At Eden,I'm calling out tonight. \r\nI wanna feel his...


In [39]:
df['text'][0]

"I don't wanna know why  \r\n  \r\nI took your love as desperate measure  \r\nSeeking shelter from another lonely night  \r\nYou took my love seeking only pleasure  \r\nBut the feeling passes and the passion doesn't last.  \r\n  \r\nAnd I don't wanna know why, I don't want no reasons  \r\nI just feel the seasons, as they touch another year.  \r\nAnd I don't wanna know why, I don't want no reasons  \r\nI just know what is now, and what could never be.  \r\n  \r\nWill the memories fade or will they linger?  \r\nWill we live forever for a moment we can keep?  \r\nNow we stand alone  \r\n(why don't you do what you told me)  \r\nWhere we stood together  \r\n(I only wanted you to hold me)  \r\nHow did we ever let this dream slip through our hand  \r\n(how did we ever let this dream slip through our hand).  \r\n  \r\nAnd I don't wanna know ....\r\n\r\n"

In [40]:
# df = df.sample(5000)

In [41]:
df.shape

(5000, 3)

Text Cleaning/ Text Preprocessing

In [42]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [43]:
df = df.assign(feedback=0)

In [44]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [45]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [46]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [47]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [48]:
similarity[0]

array([1.        , 0.06530841, 0.07895657, ..., 0.05464158, 0.02799277,
       0.05439745])

In [49]:
df[df['song'] == df.iloc[0]['song']]

Unnamed: 0,artist,song,text,feedback
0,Kenny Rogers,I Don't Wanna Know Why,i do n't wan na know whi i took your love as d...,0


In [50]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import numpy as np
from scipy.sparse import hstack, csr_matrix

In [51]:
df['combined_text'] = df['artist'] + ' ' + df['song'] + ' ' + df['text']
tfid_X = tfidvector.fit_transform(df['combined_text'])

df['feedback'] = np.random.choice([-1, 0, 1], size=len(df))

y = df['feedback']

X_train, X_test, y_train, y_test = train_test_split(tfid_X, y, test_size=0.2, random_state=42)

svc = SVC(probability=True)
svc.fit(X_train, y_train)

train_score = svc.score(X_train, y_train)
test_score = svc.score(X_test, y_test)

In [52]:
tfid_X.shape

(5000, 19298)

In [53]:
print("Training accuracy:", train_score)
print("Testing accuracy:", test_score)

Training accuracy: 0.9675
Testing accuracy: 0.328


In [54]:
ypred = svc.predict(X_test)

cm = confusion_matrix(y_test, ypred)
print(cm)

[[ 95 135 116]
 [ 99 138 102]
 [ 84 136  95]]


In [55]:
cr = classification_report(y_test, ypred)
print(cr)

              precision    recall  f1-score   support

          -1       0.34      0.27      0.30       346
           0       0.34      0.41      0.37       339
           1       0.30      0.30      0.30       315

    accuracy                           0.33      1000
   macro avg       0.33      0.33      0.33      1000
weighted avg       0.33      0.33      0.33      1000



In [56]:
def recommendation(song_df, svc_model):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    recommended_songs = []
    song_probs = []
    
    for m_id in distances[1:21]:
        artist = df.iloc[m_id[0]]['artist']
        song = df.iloc[m_id[0]]['song']
        text = df.iloc[m_id[0]]['text']
        
        combo = f"{artist} {song} {text}"
        X = tfidvector.fit_transform([combo])

        num_columns_to_add = tfid_X.shape[1] - X.shape[1]

        zeros_matrix = csr_matrix((X.shape[0], num_columns_to_add), dtype=np.float64)

        X = hstack([X, zeros_matrix])
    
        probability = svc_model.predict_proba(X)[0][1]
        song_probs.append(probability)
    
    sorted_indices = sorted(range(len(song_probs)), key=lambda i: song_probs[i], reverse=True)
    sorted_songs = [distances[i+1][0] for i in sorted_indices]
    
    recommended_songs = [df.iloc[i]['song'] for i in sorted_songs]
    
    return recommended_songs


In [57]:
recommendation(df.iloc[0]['song'], svc)

['There Was A Time',
 'Last Look At Eden',
 "Make No Mistakes, She's Mine",
 'Moments',
 "I Don't Want To Know",
 "It's Only Love",
 'St. Louis',
 "Lyin' In Bed",
 'All I Wanna Do Is You',
 'We Will Be Together',
 'The Runaway',
 "Don't Wanna Let You Go",
 'Detroit City',
 'All Night',
 'Open Off My Love',
 'I Just Wanna Love U (Offcial Kanye West Mumtribute Mix)',
 'Farewell',
 "Don't Wanna Lose You",
 'Ooh Baby',
 'Santa Ana Wind']

In [58]:
def update_model(return_info):
    for row in return_info:
        song_name = row[0]
        feedback_value = row[2]
        df.loc[df['song'] == song_name, 'feedback'] = feedback_value

    df['combined_text'] = df['artist'] + ' ' + df['song'] + ' ' + df['text']
    tfid_X = tfidvector.fit_transform(df['combined_text'])

    df['feedback'] = np.random.choice([-1, 0, 1], size=len(df))

    y = df['feedback']

    X_train, X_test, y_train, y_test = train_test_split(tfid_X, y, test_size=0.2, random_state=42)

    svc = SVC(probability=True)
    svc.fit(X_train, y_train)

    return svc

In [59]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))
pickle.dump(svc,open('svc.pkl','wb'))

In [60]:
df.head()

Unnamed: 0,artist,song,text,feedback,combined_text
0,Kenny Rogers,I Don't Wanna Know Why,i do n't wan na know whi i took your love as d...,-1,Kenny Rogers I Don't Wanna Know Why i do n't w...
1,Spandau Ballet,Code Of Love,he put so much into her life she took so much ...,-1,Spandau Ballet Code Of Love he put so much int...
2,Backstreet Boys,One Phone Call,hard to believ it it 's almost a year sinc i g...,1,Backstreet Boys One Phone Call hard to believ ...
3,Dolly Parton,9 To 5,tumbl out of bed and stumbl to the kitchen pou...,0,Dolly Parton 9 To 5 tumbl out of bed and stumb...
4,Radiohead,Talk Show Host,i want to i want to be someon els or i 'll exp...,0,Radiohead Talk Show Host i want to i want to b...
