# Load DataSet


In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('songdata.csv')


In [3]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57650 entries, 0 to 57649
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   artist  57650 non-null  object
 1   song    57650 non-null  object
 2   link    57650 non-null  object
 3   text    57650 non-null  object
dtypes: object(4)
memory usage: 1.8+ MB


In [5]:
print(df.shape)

(57650, 4)


# Data Cleaning

In [6]:
df = df.sample(n=5000).drop('link',axis=1).reset_index(drop=True)

In [7]:
df['text'][0]

"Ooo, ooo, ooo, ooo-ooo, ooo, ooo, ooo  \n  \nHey all you people, for goodness sake,  \nLet's get together, what does it take,  \nTo make you understand the value of a man?  \nI'm talkin' about your son and neighbor, yes I am. oh ...  \n  \nChorus  \nPeople let's stop the war.  \nPeople let's stop the war.  \nPeople let's stop the war.  \nPeople let's stop the war.  \n  \nIf we had a president, that did just what he said,  \nThe country would be just alright, and no one would be dead,  \nFrom fighting in a war, that causes big men to get rich.  \nThere's money in them war machines, now ain't this a bitch? oh ...  \n  \nChorus  \n  \nI been excited, ain't nobody ready.  \nThey don't know what to get ready for.  \nLet's get ready and stop the war.  \n  \nOoo ... excited, ain't nobody ready.  \nThey don't know what to get ready for.  \nLet's get ready and stop the war.  \n  \nI been excited, ain't nobody ready.  \nThey don't know what to get ready for.  \nLet's get ready and stop the war.

In [8]:
df['text'] = df['text'].str.lower().replace(r'[^\w\s]','').replace(r'\n',' ',regex=True)

In [9]:
df['text'][0]

"ooo, ooo, ooo, ooo-ooo, ooo, ooo, ooo      hey all you people, for goodness sake,   let's get together, what does it take,   to make you understand the value of a man?   i'm talkin' about your son and neighbor, yes i am. oh ...      chorus   people let's stop the war.   people let's stop the war.   people let's stop the war.   people let's stop the war.      if we had a president, that did just what he said,   the country would be just alright, and no one would be dead,   from fighting in a war, that causes big men to get rich.   there's money in them war machines, now ain't this a bitch? oh ...      chorus      i been excited, ain't nobody ready.   they don't know what to get ready for.   let's get ready and stop the war.      ooo ... excited, ain't nobody ready.   they don't know what to get ready for.   let's get ready and stop the war.      i been excited, ain't nobody ready.   they don't know what to get ready for.   let's get ready and stop the war.      ooo ... excited, ain't n

# Tokenization

In [10]:
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Hasaan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [11]:
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import word_tokenize

In [12]:
ps = PorterStemmer()

stemming = []
def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [ps.stem(w) for w in tokens]

    return " ".join(stemming)

In [13]:
tokenization("i know sometimes you're scared of the light")

"i know sometim you 're scare of the light"

In [14]:
df['text']=df['text'].apply(lambda x: tokenization(x))

In [15]:
df['text']

0       ooo , ooo , ooo , ooo-ooo , ooo , ooo , ooo he...
1       come on turn it up , mad stalk the night , fee...
2       these social drug these social drug these soci...
3       ( remix ) i love her 'caus she got her own she...
4       one , two steal my heart and hold my tongu i f...
                              ...                        
4995    well , i 'm gon na be a wheel someday i 'm gon...
4996    out of your dream and into hi arm you long to ...
4997    one more sunday in savannah hear the whole cre...
4998    bado , scoobado , scoobado-bado-bado . oh , wo...
4999    i ca n't believ the thing i 've seen i wonder ...
Name: text, Length: 5000, dtype: object

# TF-IDF Vectorization and Similarity Calculation

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [17]:
tfid = TfidfVectorizer(stop_words = 'english')
matrix = tfid.fit_transform(df['text'])

In [18]:
matrix.shape

(5000, 17334)

In [19]:
similarity = cosine_similarity(matrix)

In [20]:
similarity[0]

array([1.        , 0.        , 0.00650069, ..., 0.0028077 , 0.02904456,
       0.0765012 ])

In [21]:
df['song'][0]

"People, Let's Stop The War"

In [22]:
df[df['song'] == 'Brilliant Mistake']

Unnamed: 0,artist,song,text


# Song Recommendation Function

In [23]:
df.head(300)

Unnamed: 0,artist,song,text
0,Grand Funk Railroad,"People, Let's Stop The War","ooo , ooo , ooo , ooo-ooo , ooo , ooo , ooo he..."
1,Venom,Blood Lust,"come on turn it up , mad stalk the night , fee..."
2,Lauryn Hill,Social Drugs,these social drug these social drug these soci...
3,Ne-Yo,She Got Her Own,( remix ) i love her 'caus she got her own she...
4,Coldplay,Kingdom Come,"one , two steal my heart and hold my tongu i f..."
...,...,...,...
295,Louis Armstrong,I Surrender Dear,"pride , sad , splendid liar , sworn enemi of l..."
296,Usher,Pop Ya Collar,"hello everybodi , huh welcom today to the wond..."
297,Stevie Wonder,As,as around the sun the earth know she 's revolv...
298,Bonnie Raitt,Your Good Thing (Is About To End),i do n't have to beg you to hold me caus someb...


In [25]:
def recommendation(song):
    # Check if the song exists in the DataFrame
    if song in df['song'].values:
        idx = df[df['song'] == song].index[0]
        distances = sorted(list(enumerate(similarity[idx])), reverse=False, key=lambda x: x[1])
        
        songs = []
        for i in distances[1:20]: 
            songs.append(df.iloc[i[0]]['song'])
        
        return songs
    else:
        return "Song not found in the dataset."

user_input = input("Enter a song: ")
recommendations = recommendation(user_input)

if isinstance(recommendations, list):
    print("Recommended songs:")
    for song in recommendations:
        print(song)
else:
    print(recommendations)

Enter a song:  Your Good Thing (Is About To End)


Recommended songs:
Smiling Faces
Breath Of The Black Muse
Pollard
But None Like You
Meadows Of Heaven
Hark The Herald Angels Sing
Crescent Noon
Forever Now
Muze Kisee Se Pyaar Ho Gayaa
Guardians Of The Breath
Oriunde Ai Fi
Oh Draw Me Lord
Bird Of Pray
Another Breakthrough
Fernando (In Spanish)
A Portrait Destroyed By Fire
Baila Me
Shake Your Head (Let's Go To Bed)
Back On Earth
