In [1]:
pip install fuzzywuzzy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [2]:
pip install python-Levenshtein

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting python-Levenshtein
  Downloading python-Levenshtein-0.12.2.tar.gz (50 kB)
[K     |████████████████████████████████| 50 kB 4.1 MB/s 
Building wheels for collected packages: python-Levenshtein
  Building wheel for python-Levenshtein (setup.py) ... [?25l[?25hdone
  Created wheel for python-Levenshtein: filename=python_Levenshtein-0.12.2-cp37-cp37m-linux_x86_64.whl size=149866 sha256=b1ad4fc28c1b0c4381c8341a3f3b1d1e8c3b4044fa22c8ca547d36b9da69f166
  Stored in directory: /root/.cache/pip/wheels/05/5f/ca/7c4367734892581bb5ff896f15027a932c551080b2abd3e00d
Successfully built python-Levenshtein
Installing collected packages: python-Levenshtein
Successfully installed python-Levenshtein-0.12.2


# Recommenders

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csr_matrix
from typing import List, Dict
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import fuzz

class content_based_recommender:    
    def __init__(self, matrix):
        self.matrix_similar = matrix

    def _print_message(self, song, recom_song):
        rec_items = len(recom_song)
        
        print(f'The {rec_items} recommended songs for {song} are:')
        for i in range(rec_items):
            print(f"Number {i+1}:")
            print(f"{recom_song[i][1]} by {recom_song[i][2]} with {round(recom_song[i][0], 3)} similarity score") 
            print("--------------------")
        
    def recommend(self, recommendation):
        # Get song to find recommendations for
        song = recommendation['song']
        # Get number of songs to recommend
        number_songs = recommendation['number_songs']
        # Get the number of songs most similars from matrix similarities
        recom_song = self.matrix_similar[song][:number_songs]
        # print each item
        self._print_message(song=song, recom_song=recom_song)

class collaborative_based_recommender:
    class Recommender:
        def __init__(self, metric, algorithm, k, data, decode_id_song):
            self.metric = metric
            self.algorithm = algorithm
            self.k = k
            self.data = data
            self.decode_id_song = decode_id_song
            self.data = data
            self.model = self._recommender().fit(data)
        
        def make_recommendation(self, new_song, n_recommendations):
            recommended = self._recommend(new_song=new_song, n_recommendations=n_recommendations)
            print("... Done")
            return recommended 
        
        def _recommender(self):
            return NearestNeighbors(metric=self.metric, algorithm=self.algorithm, n_neighbors=self.k, n_jobs=-1)
        
        def _recommend(self, new_song, n_recommendations):
            # Get the id of the recommended songs
            recommendations = []
            recommendation_ids = self._get_recommendations(new_song=new_song, n_recommendations=n_recommendations)
            # return the name of the song using a mapping dictionary
            recommendations_map = self._map_indeces_to_song_title(recommendation_ids)
            # Translate this recommendations into the ranking of song titles recommended
            for i, (idx, dist) in enumerate(recommendation_ids):
                recommendations.append(recommendations_map[idx])
            return recommendations
                    
        def _get_recommendations(self, new_song, n_recommendations):
            # Get the id of the song according to the text
            recom_song_id = self._fuzzy_matching(song=new_song)
            # Start the recommendation process
            print(f"Starting the recommendation process for {new_song} ...")
            # Return the n neighbors for the song id
            distances, indices = self.model.kneighbors(self.data[recom_song_id], n_neighbors=n_recommendations+1)
            return sorted(list(zip(indices.squeeze().tolist(), distances.squeeze().tolist())), key=lambda x: x[1])[:0:-1]
        
        def _map_indeces_to_song_title(self, recommendation_ids):
            # get reverse mapper
            return {song_id: song_title for song_title, song_id in self.decode_id_song.items()}
        
        def _fuzzy_matching(self, song):
            match_tuple = []
            # get match
            for title, idx in self.decode_id_song.items():
                ratio = fuzz.ratio(title.lower(), song.lower())
                if ratio >= 60:
                    match_tuple.append((title, idx, ratio))
            # sort
            match_tuple = sorted(match_tuple, key=lambda x: x[2])[::-1]
            if not match_tuple:
                print(f"The recommendation system could not find a match for {song}")
                return
            return match_tuple[0][1]

# User Space

In [11]:
import warnings
warnings.filterwarnings("ignore")

user_data = pd.read_csv(r'/content/user_data.csv')
song_data = pd.read_csv(r'/content/song_data.csv')
song_data.drop_duplicates(['song_id'], inplace=True)
content_songs = pd.read_csv(r'/content/lyricsfreak.csv')

print("Enter your name : ", end = "")
user_name = input()
print("\nHello " + user_name, end='\n')

print("Press 1 for Content-based Recommendations", end = '\n')
print("Press 2 for Collaborative-based Recommendations", end = '\n')
print("Press 3 for Popularity-based Recommendations", end = '\n')
recommendation_type = int(input("Enter your choice : "))

if(recommendation_type == 1):
  print("Enter the song number : ", end = "")
  song_n = int(input())
  print("Enter the number of recommendations needed : ", end = "")
  n = int(input())
  content_songs = content_songs.sample(n=5000).drop('link', axis=1).reset_index(drop=True)
  content_songs['text'] = content_songs['text'].str.replace(r'\n', '')
  tfidf = TfidfVectorizer(analyzer='word', stop_words='english')
  lyrics = tfidf.fit_transform(content_songs['text'])
  cosine = cosine_similarity(lyrics)
  similarities = {}
  for i in range(len(cosine)):
      similar_indices = cosine[i].argsort()[:-50:-1]
      similarities[content_songs['song'].iloc[i]] = [(cosine[i][x], content_songs['song'][x], content_songs['artist'][x]) for x in similar_indices][1:]
  recommedations = content_based_recommender(similarities)
  recommendation = {
    "song": content_songs['song'].iloc[song_n],
    "number_songs": n 
  }
  recommedations.recommend(recommendation)


elif(recommendation_type == 2):
  print("Enter the song name : ", end = "")
  song_name = input()
  col = collaborative_based_recommender()
  collab_songs = pd.merge(user_data, song_data, on="song_id", how="left")
  song_user = collab_songs.groupby('user_id')['song_id'].count()
  users_morethan_16 = song_user[song_user > 16].index.to_list()
  songid_morethan_16 = collab_songs[collab_songs['user_id'].isin(users_morethan_16)].reset_index(drop=True)
  songs_features = songid_morethan_16.pivot(index='song_id', columns='user_id', values='listen_count').fillna(0)
  new_songs_features = csr_matrix(songs_features.values)
  uniq = collab_songs.drop_duplicates(subset=['song_id']).reset_index(drop=True)[['song_id', 'title']]
  mapped_songs = {
    song: i for i, song in enumerate(list(uniq.set_index('song_id').loc[songs_features.index].title))
  }
  model = col.Recommender(metric='cosine', algorithm='brute', k=20, data=new_songs_features, decode_id_song=mapped_songs)
  recos = model.make_recommendation(new_song=song_name, n_recommendations=10)
  print(f'The recommendations for "{song_name}" are:')
  for i in recos:    
      print(f">> {i}", end = "\n")

elif(recommendation_type == 3):
  songs_df = pd.merge(user_data, song_data, on = 'song_id', how = "left")
  songs_df['song_name'] = songs_df['title'] + ' - ' + songs_df['artist_name']

  def create_popularity_chart(df, user_id, item_id, n):

      # getting the count of user_ids for each unique item_id as recommendation score
      grouped = df.groupby([item_id]).agg({user_id: 'count'}).reset_index()
      grouped.rename(columns = {user_id: 'score'},inplace=True)
      
      # sort the songs based upon recommendation score
      sorted = grouped.sort_values(['score', item_id], ascending = [0,1])
      
      # getting a recommendation rank based upon score
      sorted['Rank'] = sorted.score.rank(ascending=0, method='first')
          
      # get the top n recommendations
      popularity_recommendations = sorted.head(n)
      return popularity_recommendations

  song_recommendations = create_popularity_chart(songs_df,'user_id','song_name', 10)
  for i in song_recommendations:    
      print(f">> {i['song_name']}", end = "\n")

else:
  print("\nEnter a Valid Number")

Enter your name : Mahes

Hello Mahes
Press 1 for Content-based Recommendations
Press 2 for Collaborative-based Recommendations
Press 3 for Popularity-based Recommendations
Enter your choice : 2
Enter the song name : I believe in miracles
Starting the recommendation process for I believe in miracles ...
... Done
The recommendations for "I believe in miracles" are:
>> Nine Million Bicycles
>> If You Were A Sailboat
>> Shy Boy
>> I Cried For You
>> Spider's Web
>> Piece By Piece
>> On The Road Again
>> Blues In The Night
>> Blue Shoes
>> Thank You Stars
