In [1]:
# Importing the Libraries

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Loading the data

data = pd.read_csv("../sample_songs_csv.csv")

# Show the data

data.head(6)

Unnamed: 0,song_id,title,artist,genre,duration_ms,popularity,release_year
0,s1,Midnight Dreams,Luna Rivera,Pop,210000,85,2023
1,s2,Electric Thunder,The Voltage,Rock,195000,72,2022
2,s3,Sunset Boulevard,Jazz Collective,Jazz,240000,68,2021
3,s4,Digital Hearts,Synth Wave,Electronic,180000,91,2023
4,s5,Country Roads Home,Jake Mitchell,Country,205000,76,2022
5,s6,Ocean Waves,Calm Sounds,Ambient,300000,45,2020


In [3]:
# Combining relevant features into a single text column

data["features"] = data["title"] + " " + data["artist"] + " " + data["genre"]

In [4]:
# Handling the missing values by replacing them with an empty string

data["features"] = data["features"].fillna("")

In [5]:
# Converting the text into numbers using TF-IDF (Term Frequency - Inverse Document Frequency)

vectorizer = TfidfVectorizer(stop_words="english")
feature_matrix = vectorizer.fit_transform(data["features"])

In [6]:
# Creating a function to show recommended(similar) songs based on the cosine similarity

def get_recommendation(title: str):

    """
    Returns a list of top 5 recommended items (e.g., songs/products) based on the given title.

    This function uses cosine similarity on a precomputed feature matrix to find the top 5 most 
    similar items to the provided title, excluding the item itself.

    Args:
        title (str): The title of the item for which recommendations are to be generated.

    Returns:
        list or str: A list of 5 similar item titles if the title is found,
                     or an error message string if the title is not in the dataset.
    """

    if title not in data["title"].values:
        return f"Title '{title}' not found in dataset."

    index = data[data["title"] == title].index[0]

    similarity_score = cosine_similarity(feature_matrix[index], feature_matrix)

    # Getting top 5 similar indices
    similar_indices = similarity_score.argsort()[0][-6: -1]

    #Getting top 5 similar songs
    similar_songs = data.iloc[similar_indices]["title"].tolist()

    return similar_songs


In [7]:
# Getting Recommendations

print(get_recommendation("Midnight Dreams"))

['Synthpop Dreams', 'Blues at Midnight', 'Dream Pop Clouds', 'Dancing in the Rain', 'Pop Punk Energy']
