In [134]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk

In [135]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [136]:
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("punkt_tab")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\chinm\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\chinm\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\chinm\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [137]:
stop_words = set(stopwords.words("English"))

In [138]:
stop_words

{'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 "he'd",
 "he'll",
 "he's",
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 "i'd",
 "i'll",
 "i'm",
 "i've",
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it'd",
 "it'll",
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'on

In [139]:
df = pd.read_csv("Datasets/spotify_millsongdata.csv")

In [140]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [141]:
import re

In [142]:
def process(text):
    text = re.sub("[^A-Za-z\s]", "", text)
    tok = word_tokenize(text)
    tokens = [words.lower() for words in tok if words not in stop_words]

    return " ".join(tokens)

  text = re.sub("[^A-Za-z\s]", "", text)


In [143]:
df["text"] = df["text"].apply(process)

In [144]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,look face wonderful face and means something s...
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,take easy please touch gently like summer even...
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,ill never know i go why i put lousy rotten sho...
3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy question give take you l...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy question give take you l...


In [145]:
df_batch_1 = df[:20000]

In [146]:
df_batch_1.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,look face wonderful face and means something s...
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,take easy please touch gently like summer even...
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,ill never know i go why i put lousy rotten sho...
3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy question give take you l...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy question give take you l...


In [147]:
tfidf = TfidfVectorizer(max_features=5000, max_df=0.98)
matrix = tfidf.fit_transform(df_batch_1["text"])

In [148]:
matrix.toarray()

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [149]:
cosine_sim = cosine_similarity(matrix)

In [150]:
def recommend_songs(song_name, matrix = matrix, df = df, top_n=5):
    idx_list = df[df["song"].str.lower()==song_name.lower()].index
    idx = idx_list[0]

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key = lambda x:x[1], reverse=True)
    top_songs =  sim_scores[1:top_n+1]

    indic = [i[0] for i in top_songs]
    return df["song"][indic]

In [151]:
recommend_songs("Why try")

889     All The Small Things (Blink 182 Cover)
5252                             Caribean Blue
8301                  Where Do We Go From Here
9246                              What Is Love
4248                                 Missing U
Name: song, dtype: object

In [152]:
np.save("data.npy", cosine_sim)

## User neural network

In [154]:
rate = pd.DataFrame({
    "song":[],
    "rating":[]
})

In [155]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [156]:
from tensorflow.keras.losses import MSE, BinaryCrossentropy
from tensorflow.keras.optimizers import Adam

In [215]:
model = Sequential()
model.add(Dense(5000, input_shape = (5000,), activation="relu"))
model.add(Dense(6000, activation="relu"))
model.add(Dense(5500, activation="relu"))
model.add(Dense(1, activation="linear"))

model.compile(loss = MSE, optimizer=Adam(learning_rate = 0.01))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [217]:
model.summary()

In [209]:
matrix = matrix.toarray()

AttributeError: 'numpy.ndarray' object has no attribute 'toarray'

In [211]:
X = np.array([matrix[3331], matrix[3260]])
y = np.array([3, 5])

In [213]:
model.fit(X, y,  epochs=20)

Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 16.9579
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 507ms/step - loss: 1873.4404
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 490ms/step - loss: 9.4284
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 502ms/step - loss: 18.4480
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 507ms/step - loss: 17.3687
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 519ms/step - loss: 16.6803
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 489ms/step - loss: 15.5817
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 513ms/step - loss: 14.0630
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 505ms/step - loss: 12.1577
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 531ms/step - loss: 9.9415
Ep

<keras.src.callbacks.history.History at 0x28700796330>

In [203]:
model.predict(matrix[3260].reshape(1,5000))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step


array([[4.5832276]], dtype=float32)

In [199]:
recommend_songs(df["song"][3260])

3847     It's In Everyone Of Us
7943     It's In Everyone Of Us
10967        Not Everyone Knows
13039            If'n I Was God
8219        Perverts In The Sun
Name: song, dtype: object

In [221]:
df_batch_1[df_batch_1["artist"]=="Coldplay"]

Unnamed: 0,artist,song,link,text
3253,Coldplay,Another's Arms,/c/coldplay/anothers+arms_21079526.html,late night watching tv used beside used arms a...
3254,Coldplay,Bigger Stronger,/c/coldplay/bigger+stronger_20032648.html,i want bigger stronger drive faster car to tak...
3255,Coldplay,Daylight,/c/coldplay/daylight_20032625.html,to surprise delight i saw sunrise i saw sunlig...
3256,Coldplay,Everglow,/c/coldplay/everglow_21104546.html,oh say people come they say people go this par...
3257,Coldplay,Every Teardrop Is A Waterfall,/c/coldplay/every+teardrop+is+a+waterfall_2091...,i turn music i got records i shut world outsid...
...,...,...,...,...
3328,Coldplay,Why Does It Always Rain On Me,/c/coldplay/why+does+it+always+rain+on+me_2051...,i cant sleep tonight everybody saying everythi...
3329,Coldplay,X Marks The Spot,/c/coldplay/x+marks+the+spot_21104554.html,so i reach stare darkness stare doom my heart ...
3330,Coldplay,Yellow,/c/coldplay/yellow_20032647.html,look stars look shine and everything yeah yell...
3331,Coldplay,Yes,/c/coldplay/yes_20746054.html,when started high hopes now backs line my back...


In [227]:
np.save("songs.npy", matrix)