In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

# Load the data
df = pd.read_csv('Hindi_songs.csv')

# Data preprocessing
# Convert duration to seconds
df['duration'] = df['duration'].apply(lambda x: int(x.split(':')[0])*60 + int(x.split(':')[1]))

# Handle missing values if any
df.fillna('', inplace=True)

# Create a combined feature for recommendation
df['combined_features'] = df.apply(lambda row: 
    f"{row['singer']} {row['language']} {row['danceability']} {row['energy']} {row['tempo']}", axis=1)

# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_features'])

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

  df.fillna('', inplace=True)


In [3]:
def get_recommendations(title, cosine_sim=cosine_sim, df=df):
    # Get the index of the song that matches the title
    idx = df[df['song_name'] == title].index[0]
    
    # Get the pairwise similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the songs based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the scores of the 10 most similar songs
    sim_scores = sim_scores[1:11]
    
    # Get the song indices
    song_indices = [i[0] for i in sim_scores]
    
    # Return the top 10 most similar songs
    return df[['song_name', 'singer', 'duration', 'popularity']].iloc[song_indices]

In [4]:
# Save the cosine similarity matrix and dataframe
with open('music_recommender.pkl', 'wb') as f:
    pickle.dump({'cosine_sim': cosine_sim, 'df': df}, f)

# Save the TF-IDF vectorizer
with open('tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(tfidf, f)