<h2>KNN_Collaborate</h2>

In [1]:
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import fuzz
import numpy as np

class Collaborate_Recommender:
    def __init__(self, metric, algorithm, k, data, decode_id_song):
        self.metric = metric
        self.algorithm = algorithm
        self.k = k
        self.data = data
        self.decode_id_song = decode_id_song
        self.data = data
        self.model = self._recommender().fit(data)
    
    def make_recommendation(self, new_song1, new_song2, n_recommendations):
        recommended1 = self._recommend(new_song=new_song1, n_recommendations=n_recommendations)
        recommended2 = self._recommend(new_song=new_song2, n_recommendations=n_recommendations)
        
        return recommended1 + recommended2    
    
    def _recommender(self):
        return NearestNeighbors(metric=self.metric, algorithm=self.algorithm, n_neighbors=self.k, n_jobs=-1)
    
    def _recommend(self, new_song, n_recommendations):
        # Get the id of the recommended songs
        recommendations = []
        recommendation_ids = self._get_recommendations(new_song=new_song, n_recommendations=n_recommendations)
        # return the name of the song using a mapping dictionary
        recommendations_map = self._map_indeces_to_song_title(recommendation_ids)
        # Translate this recommendations into the ranking of song titles recommended
        for i, (idx, dist) in enumerate(recommendation_ids):
            recommendations.append(recommendations_map[idx])
        return recommendations
                 
    def _get_recommendations(self, new_song, n_recommendations):
        # Get the id of the song according to the text
        recom_song_id = self._fuzzy_matching(song=new_song)
        # Return the n neighbors for the song id
        distances, indices = self.model.kneighbors(self.data[recom_song_id], n_neighbors=n_recommendations+1)
        return sorted(list(zip(indices.squeeze().tolist(), distances.squeeze().tolist())), key=lambda x: x[1])[:0:-1]
    
    def _map_indeces_to_song_title(self, recommendation_ids):
        # get reverse mapper
        return {song_id: song_title for song_title, song_id in self.decode_id_song.items()}
    
    def _fuzzy_matching(self, song):
        match_tuple = []
        # get match
        for title, idx in self.decode_id_song.items():
            ratio = fuzz.ratio(title.lower(), song.lower())
            if ratio >= 60:
                match_tuple.append((title, idx, ratio))
        # sort
        match_tuple = sorted(match_tuple, key=lambda x: x[2])[::-1]
        if not match_tuple:
            print(f"The recommendation system could not find a match for {song}")
            return
        return match_tuple[0][1]



<h2> Content-based filters (Using lyrics) </h2>

In [57]:
class CB_Lyric_Recommender:
    def __init__(self, matrix):
        self.matrix_similar = matrix

    def _print_message(self, song, recom_song):
        rec_items = len(recom_song)
        res_list = []
        for i in range(4):
            res_list.append({'song' :recom_song[i][1],  'artist': recom_song[i][2]})
#         print(res_list)
        return(res_list)
            
        
    def recommend(self, recommendation):
        # Get song to find recommendations for
        song = recommendation['song']
        # Get number of songs to recommend
        number_songs = recommendation['number_songs']
        # Get the number of songs most similars from matrix similarities
        recom_song = self.matrix_similar[song][:number_songs]
        # print each item
        self._print_message(song=song, recom_song=recom_song)

        
    def recommend2(self, recommendation, cosine_similarities, df):
        # Get song to find recommendations for
        song1 = recommendation['song1']
        # Get song to find recommendations for
        song2 = recommendation['song2']
        # Get number of songs to recommend
        number_songs = recommendation['number_songs']
        # add cos sim
        cossim2 = cosine_similarities[5011] + cosine_similarities[5004]
        cossim2_sort = cossim2.argsort()[:-50:-1] 
        # calc new cos sim
        similarity = [(cossim2[x], df['song'][x], df['artist'][x]) for x in cossim2_sort][2:]
        # Get the number of songs most similars from matrix similarities
        recom_song = similarity[:number_songs]
        # return each item
        return(self._print_message(song=song1, recom_song=recom_song))
        
        
        
        

<h2>Content-based filters (Sigmoid)</h2>

In [61]:
class CB_Feature_Recommender:
    def __init__(self, matrix):
        self.matrix_similar = matrix
        
    def _print_message(self, song, recom_song):
        rec_items = len(recom_song)
        res_list = []
        
        for i in range(4):
            res_list.append({'song':recom_song[i][1], 'artist':recom_song[i][2]})
        
#         print(res_list)
        return res_list
        
        
        
    def recommend(self, recommendation):
        # Get song to find recommendations for
        song = recommendation['song_title']
        
        # Get number of songs to recommend
        number_songs = recommendation['number_songs']
        
        # Get the number of songs most similars from matrix similarities
        recom_song = self.matrix_similar[song][:number_songs]
        
        # print each item
        self._print_message(song=song, recom_song=recom_song)
        
        
    def recommend2(self, recommendation, df, sig_kernel):
        # Get song to find recommendations for
        song1 = recommendation['song1']
        # Get song to find recommendations for
        song2 = recommendation['song2']
        # Get number of songs to recommend
        number_songs = recommendation['number_songs']
        # add cos sim
        sigker2 = sig_kernel[2000] + sig_kernel[54]
        sigker2_sort = sigker2.argsort()[:-50:-1] 
        # calc new cos sim
        similarity = [(sigker2[x], df['song_title'][x], df['artist'][x]) for x in sigker2_sort][2:]
        # Get the number of songs most similars from matrix similarities
        recom_song = similarity[:number_songs]
        # print each item
        result = self._print_message(song=song1, recom_song=recom_song)
        return result

# Main Recommender Program

In [99]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import copy

from scipy.sparse import csr_matrix
from typing import List, Dict
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.preprocessing import MinMaxScaler

from selenium import webdriver   # 웹 브라우저 자동화
from selenium.webdriver.common.by import By
import time    
from tqdm.notebook import tqdm    # for문을 실행할 때 진행상황을 %게이지로알려줍니다.

class Music_Recommender:
    def __init__(self, df_KNN_count, df_KNN_meta, 
                 df_Lyric, df_Feature):
        '''Preprocessing For KNN Collaborative Filtering'''
        #Read userid-songid-listen_count
        self.song_info = df_KNN_count
        self.song_info.columns = ['user_id', 'song_id', 'listen_count']
        #Read song  metadata
        self.song_actual = df_KNN_meta
        self.song_actual.drop_duplicates(['song_id'], inplace=True)
        #Merge the two dataframes above to create input dataframe for recommender systems
        self.songs = pd.merge(self.song_info, self.song_actual, on="song_id", how="left")
        # Get users which have listen to at least 16 songs
        self.song_user = self.songs.groupby('user_id')['song_id'].count()
        self.song_ten_id = self.song_user[self.song_user > 16].index.to_list()   
        # Filtered the dataset to keep only those users with more than 16 listened
        self.df_song_id_more_ten = self.songs[self.songs['user_id'].isin(self.song_ten_id)].reset_index(drop=True)       
        # convert the dataframe into a pivot table
        self.df_songs_features = self.df_song_id_more_ten.pivot(index='song_id', columns='user_id', values='listen_count').fillna(0)      
        # obtain a sparse matrix
        self.mat_songs_features = csr_matrix(self.df_songs_features.values) 
        self.df_unique_songs = self.songs.drop_duplicates(subset=['song_id']).reset_index(drop=True)[['song_id', 'title']]
        self.decode_id_song = {
            song: i for i, song in 
            enumerate(list(self.df_unique_songs.set_index('song_id').loc[self.df_songs_features.index].title))
        }
        
        '''Preprocessing For KNN Collaborative Filtering'''
        # Read lyric dataset
        self.songs_lyrics = df_Lyric
        # Use sample of dataset for faster model
        self.songs_lyrics = self.songs_lyrics.sample(n=5000).drop('link', axis=1)
        # Replace useless words to blanks
        self.songs_lyrics['text'] = self.songs_lyrics['text'].str.replace(r'\n', '')
        # Read and concat Korean songs
        self.kor_songs = pd.read_csv('melon_top100.csv')
        self.songs_lyrics = pd.concat([self.songs_lyrics, self.kor_songs])
        self.songs_lyrics = self.songs_lyrics.reset_index(drop=True)
        
        '''Preprocessing For KNN Collaborative Filtering'''
        # Read music features dataset
        self.songs_feature = df_Feature
        self.feature_cols=['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness',
                         'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence']
        # Scale Features
        self.scaler = MinMaxScaler()
        self.normalized_songs_feature = self.scaler.fit_transform(self.songs_feature[self.feature_cols])
        # Create sigmoid kernel matrix based on given matrix
        self.sig_kernel = sigmoid_kernel(self.normalized_songs_feature)
        
    def KNN_Recommend(self, song1, song2):
        model = Collaborate_Recommender(metric='cosine', algorithm='brute', k=20, data=self.mat_songs_features, decode_id_song=self.decode_id_song)
        result1 = []
        new_recommendations = model.make_recommendation(new_song1=song1, new_song2=song2, n_recommendations=2)
        for i in new_recommendations:
            artist = self.songs[self.songs['title'] == i]
            result1.append({'song': i, 'artist':artist.iloc[0]['artist_name']})
        return result1
    
    def CB_Lyric_Recommend(self, song1, song2):
        tfidf = TfidfVectorizer(analyzer='word', stop_words='english')
        lyrics_matrix = tfidf.fit_transform(self.songs_lyrics['text'])
        cosine_similarities = cosine_similarity(lyrics_matrix)
        similarities = {}
        for i in range(len(cosine_similarities)):
            # Sort each element in cosine_similarities and get the indexes of the songs. 
            similar_indices = cosine_similarities[i].argsort()[:-50:-1] 
            # After that, we'll store in similarities each name of the 50 most similar songs.
            # Except the first one that is the same song.
            similarities[self.songs_lyrics['song'].iloc[i]] = [(cosine_similarities[i][x], self.songs_lyrics['song'][x], self.songs_lyrics['artist'][x]) for x in similar_indices][1:]
            
        model = CB_Lyric_Recommender(similarities)
        recommendation = {
            "song1": self.songs_lyrics['song'].iloc[5011],
            "song2": self.songs_lyrics['song'].iloc[5004],
            "number_songs": 4
        }
        result2 = model.recommend2(recommendation, cosine_similarities, self.songs_lyrics)
        return result2
    
    def CB_Lyric_Recommend(self, song1, song2):
        similarities = {}
        for i in range(len(self.sig_kernel)):
            similar_indices = self.sig_kernel[i].argsort()[:-50:-1]
            similarities[self.songs_feature['song_title'].iloc[i]] = [(self.sig_kernel[i][x], self.songs_feature['song_title'][x], self.songs_feature['artist'][x]) for x in similar_indices][1:]
        
        model = CB_Feature_Recommender(similarities)
        recommendation = {
            "song1": self.songs_feature['song_title'].iloc[2000],
            "song2": self.songs_feature['song_title'].iloc[54],
            "number_songs": 5
        }
        result3 = model.recommend2(recommendation, self.songs_feature, self.sig_kernel)
        return result3
        
    def Crawl_melon(self):
        # 크롬 웹브라우저 실행
        path = "chromedriver.exe"

        driver = webdriver.Chrome(path)
        # 주소는 melon(음원사이트)
        driver.get('http://www.melon.com')
        time.sleep(2)  # 2초 정지

        # 멜론차트 클릭
        more_info_list = driver.find_element(By.CSS_SELECTOR, ".menu_bg.menu01").click()
        time.sleep(2)  # 2초 정지
        
        # 크롤링 코드
        song_list = []    # 전체 크롤링 데이터를 저장할 딕셔너리 생성

        number = 10  # 수집할 글 갯수 정하기

        # 반복문 시작
        more_info_list = driver.find_elements(By.CSS_SELECTOR,".btn.button_icons.type03.song_info")
        for i in range(0,number):

            # 곡정보 더보기 버튼 클릭
            more_info_list[i].click()
            time.sleep(1)
            # 크롤링

            new_song = {}  # 개별 블로그 내용을 담을 딕셔너리 생성
            time.sleep(1)

            # 제목 가져오기
            song = driver.find_element(By.CSS_SELECTOR,".song_name")
            song_name = str(song.text)
            print(i, " : ", song_name)

            # 가수 가져오기
            artist = driver.find_element(By.CSS_SELECTOR,".artist")
            artist_name = str(artist.text)
        #     print(artist_name)

            # 펼치기 버튼 클릭
            driver.find_element(By.CSS_SELECTOR, ".button_more.arrow_d").click()

            # 가사 가져오기
            lyric = driver.find_element(By.CSS_SELECTOR,".lyric.on")
            lyric_text = str(lyric.text)

            #뒤로가기 
            driver.back()
            time.sleep(1)

            #딕셔너리에 저장
            new_song = {'artist' : artist_name, 'song' : song_name, 'text': lyric_text}
            song_list.append(new_song)
        song_list = pd.DataFrame(song_list)
        # 번역이 가능하도록 다음줄 기호를 스페이스바로 바꿈
        song_list['text'] = song_list['text'].str.replace(r'\n', ' ')
        #구글번역 API로 노래 가사 영어로 번역
        import googletrans
        translator = googletrans.Translator()
        for i in range(0,number):
            song_list['text'][i] = translator.translate(song_list['text'][i], dest='en').text
        song_list.to_csv("melon_top100.csv",mode="w",encoding='utf-8', index=False)

    
    def Recommend(self, song1, song2):
        '''
        Recommend Final Playlist
        '''
        res1 = Recommender.KNN_Recommend('I believe in miracles', 'Entre Dos Aguas')
        res2 = Recommender.CB_Lyric_Recommend('I believe in miracles', 'Entre Dos Aguas')
        res3 = Recommender.CB_Lyric_Recommend('I believe in miracles', 'Entre Dos Aguas')
        fin_res = res1 + res2 + res3
        print("<The recommended playlist>")
        i = 1
        for song in fin_res:
            print(str(i) + "- title : " + song['song']+ "  artist : " + song['artist'])
            i = i+1
        
        
        

## Usage

In [100]:
'''First, Import the datasets'''
#Read userid-songid-listen_count
song_info = pd.read_csv('10000.txt',sep='\t',header=None)

#Read song  metadata
song_actual =  pd.read_csv('song_Data_KNN.csv')

#Read lyrics dataset
songs_lyrics = pd.read_csv('songData_lyrics.csv')

#Read features sigmoid dataset
songs_feature = pd.read_csv("songData_sigmoid.csv")

'''Call Main Program'''
#Execute Recommender Program
Recommender = Music_Recommender(song_info, song_actual, songs_lyrics, songs_feature)
Recommender.Recommend('I believe in miracles', 'Entre Dos Aguas')





<The recommended playlist>
1- title : Blue Shoes  artist : Katie Melua
2- title : Thank You Stars  artist : Katie Melua
3- title : Ain't Misbehavin'  artist : Louis Armstrong
4- title : Mars vs. Venus  artist : Usher
5- title : Sippin On Some Syrup  artist : Three 6 Mafia
6- title : Sola (Remix) [feat. Daddy Yankee, Wisin, Farruko, Zion & Lennox]  artist : Anuel Aa
7- title : Fashion Killa  artist : A$AP Rocky
8- title : Independent - House Remix  artist : Hoodfellas
9- title : Sippin On Some Syrup  artist : Three 6 Mafia
10- title : Sola (Remix) [feat. Daddy Yankee, Wisin, Farruko, Zion & Lennox]  artist : Anuel Aa
11- title : Fashion Killa  artist : A$AP Rocky
12- title : Independent - House Remix  artist : Hoodfellas
