### https://www.analyticsindiamag.com/beginners-guide-to-building-a-song-recommender-in-python/ 
### https://github.com/llSourcell/recommender_live/blob/master/Song%20Recommender_Python.ipynb

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import time 
from sklearn.externals import joblib
#import Recommenders as Recommenders
import Evaluation as Evaluation

In [2]:
import numpy as np
import pandas

#Class for Popularity based Recommender System model
class popularity_recommender_py():
    def __init__(self):
        self.train_data = None
        self.user_id = None
        self.item_id = None
        self.item1_id = None
        self.popularity_recommendations = None
        
    #Create the popularity based recommender system model
    def create(self, train_data, user_id, item_id,item1_id):
        self.train_data = train_data
        self.user_id = user_id
        self.item_id = item_id
        self.item1_id = item1_id

        #Get a count of user_ids for each unique song as recommendation score
        train_data_grouped = train_data.groupby([self.item_id,self.item1_id]).agg({self.user_id: 'count'}).reset_index()
        train_data_grouped.rename(columns = {'user_id': 'score'},inplace=True)
    
        #Sort the songs based upon recommendation score
        train_data_sort = train_data_grouped.sort_values(['score', self.item_id,self.item1_id], ascending = [0,1,2])
    
        #Generate a recommendation rank based upon score
        train_data_sort['Rank'] = train_data_sort['score'].rank(ascending=0, method='first')
        
        #Get the top 10 recommendations
        self.popularity_recommendations = train_data_sort.head(10)

    #Use the popularity based recommender system model to
    #make recommendations
    def recommend(self, user_id):    
        user_recommendations = self.popularity_recommendations
        
        #Add user_id column for which the recommendations are being generated
        user_recommendations['user_id'] = user_id
    
        #Bring user_id column to the front
        cols = user_recommendations.columns.tolist()
        cols = cols[-1:] + cols[:-1]
        user_recommendations = user_recommendations[cols]
        
        return user_recommendations


In [27]:
class item_similarity_recommender_py():
    def __init__(self):
        self.train_data = None
        self.user_id = None
        self.item_id = None
        self.item1_id=None
        self.cooccurence_matrix = None
        self.songs_dict = None
        self.rev_songs_dict = None
        self.item_similarity_recommendations = None
        
    #Get unique items (songs) corresponding to a given user
    def get_user_items(self, user):
        user_data = self.train_data[self.train_data[self.user_id] == user]
        user_items = list(user_data[self.item1_id].unique())
        #user_items = user_items[:3]
        
        return user_items
        
    #Get unique users for a given item (song)
    def get_item_users(self, item):
        item= self.train_data[self.train_data[self.item1_id] == item]
        
        item_data=item
        #item_data=pd.DataFrame(item_data)  #[[item_data]]
        item_users = set(item_data[self.user_id].unique())
            
        return item_users
        
    #Get unique items (songs) in the training data
    def get_all_items_train_data(self):
        #x=self.train_data.groupby(self.item_id)
        #x=self.train_data
        list1= list(self.train_data[self.item1_id].unique())
      
        all_items=list1
        return all_items
    
    def get_all_items_genre_train_data(self,genre_type):
        group =self.train_data.groupby(self.item_id)
        groups = sorted(group)
        x=group.get_group(genre_type)
        x=pd.DataFrame(x)
        #df = pd.concat([g for _, g in groups])
        list1= list(x[self.item1_id].unique())
      
        all_items=list1
        return all_items
    
    
        
    #Construct cooccurence matrix
    def construct_cooccurence_matrix(self, user_songs, all_songs):
            
        ####################################
        #Get users for all songs in user_songs.
        ####################################
        user_songs_users = []        
        for i in range(0, len(user_songs)):
            user_songs_users.append(self.get_item_users(user_songs[i]))
            
        ###############################################
        #Initialize the item cooccurence matrix of size 
        #len(user_songs) X len(songs)
        ###############################################
        cooccurence_matrix = np.matrix(np.zeros(shape=(len(user_songs), len(all_songs))), float)
           
        #############################################################
        #Calculate similarity between user songs and all unique songs
        #in the training data
        #############################################################
        for i in range(0,len(all_songs)):
            #Calculate unique listeners (users) of song (item) i
            songs_i_data = self.train_data[self.train_data[self.item1_id] == all_songs[i]]
            users_i = set(songs_i_data[self.user_id].unique())
            
            for j in range(0,len(user_songs)):       
                    
                #Get unique listeners (users) of song (item) j
                users_j = user_songs_users[j]
                    
                #Calculate intersection of listeners of songs i and j
                users_intersection = users_i.intersection(users_j)
                
                #Calculate cooccurence_matrix[i,j] as Jaccard Index
                if len(users_intersection) != 0:
                    #Calculate union of listeners of songs i and j
                    users_union = users_i.union(users_j)
                    
                    cooccurence_matrix[j,i] = float(len(users_intersection))/float(len(users_union))
                else:
                    cooccurence_matrix[j,i] = 0
                    
        
        return cooccurence_matrix

    
    #Use the cooccurence matrix to make top recommendations
    def generate_top_recommendations(self, user, cooccurence_matrix, all_songs, user_songs):
        print("Non zero values in cooccurence_matrix :%d" % np.count_nonzero(cooccurence_matrix))
        
        #Calculate a weighted average of the scores in cooccurence matrix for all user songs.
        user_sim_scores = cooccurence_matrix.sum(axis=0)/float(cooccurence_matrix.shape[0])
        user_sim_scores = np.array(user_sim_scores)[0].tolist()
 
        #Sort the indices of user_sim_scores based upon their value
        #Also maintain the corresponding score
        sort_index = sorted(((e,i) for i,e in enumerate(list(user_sim_scores))), reverse=True)
    
        #Create a dataframe from the following
        columns = ['user_id', 'song', 'score', 'rank']
        #index = np.arange(1) # array of numbers for the number of samples
        df = pandas.DataFrame(columns=columns)
         
        #Fill the dataframe with top 10 item based recommendations
        rank = 1 
        for i in range(0,len(sort_index)):
            if ~np.isnan(sort_index[i][0]) and all_songs[sort_index[i][1]] not in user_songs and rank <= 20:
                df.loc[len(df)]=[user,all_songs[sort_index[i][1]],sort_index[i][0],rank]
                rank = rank+1
        
        #Handle the case where there are no recommendations
        if df.shape[0] == 0:
            print("The current user has no songs for training the item similarity based recommendation model.")
            return -1
        else:
            return df
 
    #Create the item similarity based recommender system model
    def create(self, train_data, user_id, item_id,item1_id):
        self.train_data = train_data
        self.user_id = user_id
        self.item_id = item_id
        self.item1_id = item1_id
        

    #Use the item similarity based recommender system model to
    #make recommendations
    def recommend(self, user):
        
        ########################################
        #A. Get all unique songs for this user
        ########################################
        user_songs = self.get_user_items(user)    
            
        print("No. of unique songs for the user: %d" % len(user_songs))
        
        ######################################################
        #B. Get all unique items (songs) in the training data
        ######################################################
        all_songs = self.get_all_items_train_data()
        
        print("no. of unique songs in the training set: %d" % len(all_songs))
         
        ###############################################
        #C. Construct item cooccurence matrix of size 
        #len(user_songs) X len(songs)
        ###############################################
        cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
        
        #######################################################
        #D. Use the cooccurence matrix to make recommendations
        #######################################################
        df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
                
        return df_recommendations
    
    #Get similar items to given items
    def get_similar_items(self, item_list):
        
        user_songs = item_list
        
        ######################################################
        #B. Get all unique items (songs) in the training data
        ######################################################
        
        all_songs = self.get_all_items_train_data()
        
        print("no. of unique songs in the training set: %d" % len(all_songs))
         
        ###############################################
        #C. Construct item cooccurence matrix of size 
        #len(user_songs) X len(songs)
        ###############################################
        cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
        
        #######################################################
        #D. Use the cooccurence matrix to make recommendations
        #######################################################
        user = ""
        df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
         
        return df_recommendations
    
    def get_similar_items_genre(self, item_list,genre_type):
        
        user_songs = item_list
        
        ######################################################
        #B. Get all unique items (songs) in the training data
        ######################################################
        
        all_songs = self.get_all_items_genre_train_data(genre_type)
        
        print("no. of unique songs in the training set: %d" % len(all_songs))
         
        ###############################################
        #C. Construct item cooccurence matrix of size 
        #len(user_songs) X len(songs)
        ###############################################
        cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
        
        #######################################################
        #D. Use the cooccurence matrix to make recommendations
        #######################################################
        user = ""
        df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
         
        return df_recommendations

In [4]:
#Read userid-songid-listen_count triplets
#This step might take time to download data from external sources
#triplets_file = 'https://static.turi.com/datasets/millionsong/10000.txt'
#songs_metadata_file = 'https://static.turi.com/datasets/millionsong/song_data.csv'
#songs_metadata_file = 'https://static.turi.com/datasets/millionsong/song_data.csv'


#song_df_1 = pandas.read_table(triplets_file,header=None)
#song_df_1.columns = ['user_id', 'song_id', 'listen_count']

#Read user metadata
song_df_1 =  pandas.read_csv('file:///C:/Users/suhas/OneDrive/Desktop/digitalmarketingandanalytics/Final Project/useridSongIdRatingListenCount.csv')


#Read song  metadata
song_df_2 =  pandas.read_csv('file:///C:/Users/suhas/OneDrive/Desktop/digitalmarketingandanalytics/Final Project/song_data.csv')

#Merge the two dataframes above to create input dataframe for recommender systems
song_df = pandas.merge(song_df_1, song_df_2.drop_duplicates(['song_id']), on="song_id", how = 'left')

  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
#triplets_file = ('file:///C:/Users/suhas/OneDrive/Desktop/digitalmarketingandanalytics/Final Project/MillionSongs.json')
#song_df_1 = pandas.read_table(triplets_file,header=None)
#song_df_1.columns = ['user_id', 'song_id', 'listen_count']
#song_df_1

In [6]:
#song_df.to_csv(r'C:/Users/suhas/OneDrive/Desktop/digitalmarketingandanalytics/Final Project/newusers.csv')

In [7]:
%%capture
song_df.head(10000)

In [8]:
len(song_df)

1045025

In [9]:
%%capture
song_df = song_df.head(10000)

#Merge song title and artist_name columns to make a merged column
song_df['song'] = song_df['title'].map(str) + " - " + song_df['artist_name']
song_df

In [10]:
%%capture
song_df = song_df.head(10000)

#Merge song title and artist_name columns to make a merged column
song_df['songGenre'] = song_df['song'].map(str) + " - " + song_df['Genre']
song_df.head(10000)

In [11]:
%%capture
song_grouped = song_df.groupby(['songGenre']).agg({'listen_count': 'count'}).reset_index()
grouped_sum = song_grouped['listen_count'].sum()
song_grouped['percentage']  = song_grouped['listen_count'].div(grouped_sum)*100
song_grouped.sort_values(['listen_count', 'songGenre'], ascending = [0,1])

In [12]:
users = song_df['user_id'].unique()

In [13]:
len(users)

458

In [14]:
###Fill in the code here
songs = song_df['song'].unique()
len(songs)

319

In [15]:
%%capture
train_data, test_data = train_test_split(song_df, test_size = 0.20, random_state=0)
print(train_data.head(5))

In [16]:
#pm = Recommenders.popularity_recommender_py()
pm = popularity_recommender_py()
pm.create(train_data, 'user_id', 'songGenre','Genre')

In [17]:

user_id = users[10]
pm.recommend(user_id)

Unnamed: 0,user_id,songGenre,Genre,score,Rank
1858,d6589314c0a9bcbca4fee0c93b14bc402363afea,Te Quiero Pa´Mi - Don Omar - Rock Music,Rock Music,22,1.0
108,d6589314c0a9bcbca4fee0c93b14bc402363afea,All of Me - John Legend - Rock Music,Rock Music,17,2.0
2039,d6589314c0a9bcbca4fee0c93b14bc402363afea,Vacaciones - Wisin - Country Music,Country Music,16,3.0
482,d6589314c0a9bcbca4fee0c93b14bc402363afea,Desde Esa Noche - Thalía - Country Music,Country Music,14,4.0
1052,d6589314c0a9bcbca4fee0c93b14bc402363afea,Llegaste tú (feat. Reykon) - Sofia Reyes - Roc...,Rock Music,14,5.0
1464,d6589314c0a9bcbca4fee0c93b14bc402363afea,Quiero Que Vuelvas - Alejandro Fernandez - Pop...,Pop Music,14,6.0
1902,d6589314c0a9bcbca4fee0c93b14bc402363afea,Thinking Out Loud - Ed Sheeran - Electronic Da...,Electronic Dance Music,14,7.0
17,d6589314c0a9bcbca4fee0c93b14bc402363afea,24K Magic - Bruno Mars - Rock Music,Rock Music,13,8.0
265,d6589314c0a9bcbca4fee0c93b14bc402363afea,By Your Side - Jonas Blue - Electronic Dance M...,Electronic Dance Music,13,9.0
1113,d6589314c0a9bcbca4fee0c93b14bc402363afea,Materialista - Silvestre Dangond - Rock Music,Rock Music,13,10.0


In [18]:
###Fill in the code here
user_id = users[8]
pm.recommend(user_id)

Unnamed: 0,user_id,songGenre,Genre,score,Rank
1858,b64cdd1a0bd907e5e00b39e345194768e330d652,Te Quiero Pa´Mi - Don Omar - Rock Music,Rock Music,22,1.0
108,b64cdd1a0bd907e5e00b39e345194768e330d652,All of Me - John Legend - Rock Music,Rock Music,17,2.0
2039,b64cdd1a0bd907e5e00b39e345194768e330d652,Vacaciones - Wisin - Country Music,Country Music,16,3.0
482,b64cdd1a0bd907e5e00b39e345194768e330d652,Desde Esa Noche - Thalía - Country Music,Country Music,14,4.0
1052,b64cdd1a0bd907e5e00b39e345194768e330d652,Llegaste tú (feat. Reykon) - Sofia Reyes - Roc...,Rock Music,14,5.0
1464,b64cdd1a0bd907e5e00b39e345194768e330d652,Quiero Que Vuelvas - Alejandro Fernandez - Pop...,Pop Music,14,6.0
1902,b64cdd1a0bd907e5e00b39e345194768e330d652,Thinking Out Loud - Ed Sheeran - Electronic Da...,Electronic Dance Music,14,7.0
17,b64cdd1a0bd907e5e00b39e345194768e330d652,24K Magic - Bruno Mars - Rock Music,Rock Music,13,8.0
265,b64cdd1a0bd907e5e00b39e345194768e330d652,By Your Side - Jonas Blue - Electronic Dance M...,Electronic Dance Music,13,9.0
1113,b64cdd1a0bd907e5e00b39e345194768e330d652,Materialista - Silvestre Dangond - Rock Music,Rock Music,13,10.0


In [19]:
x = song_df[['Genre','songGenre']]
x.head()


Unnamed: 0,Genre,songGenre
0,Electro,Que Raro - Feid - Electro
1,Indie Rock,Ya No Me Duele Más - Remix - Silvestre Dangond...
2,Dubstep,Ruleta Rusa - Kevin Roldan - Dubstep
3,Indie Rock,Just Hold On - Steve Aoki - Indie Rock
4,Electro,Tu y Yo (feat. Daddy Yankee) - Tommy Torres - ...


In [20]:
#is_model = Recommenders.item_similarity_recommender_py()
is_model = item_similarity_recommender_py()
is_model.create(train_data, 'user_id','Genre','songGenre')

In [32]:
#Print the songs for the user in training data
user_id = users[7]
user_items = is_model.get_user_items(user_id)
#user_items=user_items[:3]
#print(len(user_item))
#
print("------------------------------------------------------------------------------------")
print("Training data songs for the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")

count=0

for user_item in user_items:
    if count<3:
        print(user_item)
        count=count+1
    
       

print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend songs for the user using personalized model
is_model.recommend(user_id)

------------------------------------------------------------------------------------
Training data songs for the user userid: 9bb911319fbc04f01755814cb5edb21df3d1a336:
------------------------------------------------------------------------------------
Si Ella Quisiera - Remix - Justin Quiles - Dubstep
Bad Things (with Camila Cabello) - Machine Gun Kelly - Jazz
Llegaste tú (feat. Reykon) - Sofia Reyes - Rock Music
----------------------------------------------------------------------
Recommendation process going on:
----------------------------------------------------------------------
No. of unique songs for the user: 4
no. of unique songs in the training set: 2161
Non zero values in cooccurence_matrix :416


Unnamed: 0,user_id,song,score,rank
0,9bb911319fbc04f01755814cb5edb21df3d1a336,Privado - Rvssian - Pop Music,0.125,1
1,9bb911319fbc04f01755814cb5edb21df3d1a336,Un Polvo - Maluma - Dubstep,0.125,2
2,9bb911319fbc04f01755814cb5edb21df3d1a336,Caminar de Tu Mano - Río Roma - Dubstep,0.083333,3
3,9bb911319fbc04f01755814cb5edb21df3d1a336,Embriágame (feat. Don Omar) - Remix - Zion & L...,0.083333,4
4,9bb911319fbc04f01755814cb5edb21df3d1a336,Tu No Vive Asi (feat. Mambo Kingz & DJ Luian) ...,0.083333,5
5,9bb911319fbc04f01755814cb5edb21df3d1a336,Otra Vez (feat. J Balvin) - Zion & Lennox - Po...,0.083333,6
6,9bb911319fbc04f01755814cb5edb21df3d1a336,Qué Gano Olvidándote - Reik - Country Music,0.083333,7
7,9bb911319fbc04f01755814cb5edb21df3d1a336,Falsas Mentiras (feat. Ozuna) - Los De La Nazz...,0.083333,8
8,9bb911319fbc04f01755814cb5edb21df3d1a336,Side To Side - Ariana Grande - Country Music,0.08125,9
9,9bb911319fbc04f01755814cb5edb21df3d1a336,El Perdedor - Maluma - Techno,0.064706,10


In [22]:
user_id = users[9]
#Fill in the code here
user_items = is_model.get_user_items(user_id)
#
print("------------------------------------------------------------------------------------")
print("Training data songs for the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")

for user_item in user_items:
    print(user_item)

print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend songs for the user using personalized model
is_model.recommend(user_id)

------------------------------------------------------------------------------------
Training data songs for the user userid: 17aa9f6dbdf753831da8f38c71b66b64373de613:
------------------------------------------------------------------------------------
Sin Contrato - Maluma - Rhythm and Blues
Don't Wanna Know - Maroon 5 - Jazz
Heathens - Twenty One Pilots - Electronic Dance Music
Love Me Now - John Legend - Jazz
Sofia - Alvaro Soler - Techno
Ya Me Enteré - Reik - Rhythm and Blues
Quiero Que Vuelvas - Alejandro Fernandez - Jazz
Soy Peor - Bad Bunny - Dubstep
Bajito - Jencarlos - Electro
De Pies a Cabeza - Maná - Dubstep
----------------------------------------------------------------------
Recommendation process going on:
----------------------------------------------------------------------
No. of unique songs for the user: 10
no. of unique songs in the training set: 2161
Non zero values in cooccurence_matrix :2190


Unnamed: 0,user_id,song,score,rank
0,17aa9f6dbdf753831da8f38c71b66b64373de613,Run Up (feat. PARTYNEXTDOOR & Nicki Minaj) - M...,0.076786,1
1,17aa9f6dbdf753831da8f38c71b66b64373de613,Work from Home - Fifth Harmony - Jazz,0.074286,2
2,17aa9f6dbdf753831da8f38c71b66b64373de613,This Girl (Kungs Vs. Cookin' On 3 Burners) - K...,0.063571,3
3,17aa9f6dbdf753831da8f38c71b66b64373de613,Closer - The Chainsmokers - Country Music,0.063571,4
4,17aa9f6dbdf753831da8f38c71b66b64373de613,Love Me Now - John Legend - Dubstep,0.062778,5
5,17aa9f6dbdf753831da8f38c71b66b64373de613,We Don't Talk Anymore (feat. Selena Gomez) - C...,0.059028,6
6,17aa9f6dbdf753831da8f38c71b66b64373de613,Kill Em With Kindness - Selena Gomez - Jazz,0.057692,7
7,17aa9f6dbdf753831da8f38c71b66b64373de613,Love Me Now - John Legend - Electronic Dance M...,0.057123,8
8,17aa9f6dbdf753831da8f38c71b66b64373de613,Can't Feel My Face - The Weeknd - Dubstep,0.056786,9
9,17aa9f6dbdf753831da8f38c71b66b64373de613,Llegaste tú (feat. Reykon) - Sofia Reyes - Rhy...,0.056313,10


In [34]:
is_model.get_similar_items('Techno')

no. of unique songs in the training set: 2161
Non zero values in cooccurence_matrix :0


Unnamed: 0,user_id,song,score,rank
0,,Para Enamorarte - CNCO - Electro,0.0,1
1,,"0 Sentimientos (Remix) [feat. Noriel, Darkiel,...",0.0,2
2,,I Got You - Bebe Rexha - Techno,0.0,3
3,,Quiero Que Vuelvas - Alejandro Fernandez - Ele...,0.0,4
4,,Desde Que Estamos Juntos - Melendi - Electroni...,0.0,5
5,,Final Song - MØ - Techno,0.0,6
6,,Que Raro - Feid - Dubstep,0.0,7
7,,La Gozadera - Gente De Zona - Country Music,0.0,8
8,,Hasta la Raíz - Natalia Lafourcade - Rhythm an...,0.0,9
9,,Can't Feel My Face - The Weeknd - Electro,0.0,10


In [35]:
is_model.get_similar_items_genre('songGenre','Electro')

no. of unique songs in the training set: 222
Non zero values in cooccurence_matrix :0


Unnamed: 0,user_id,song,score,rank
0,,Para Enamorarte - CNCO - Electro,0.0,1
1,,Quiero Que Vuelvas - Alejandro Fernandez - Ele...,0.0,2
2,,Can't Feel My Face - The Weeknd - Electro,0.0,3
3,,Quédate - Manuel Medrano - Electro,0.0,4
4,,How Would You Feel (Paean) - Ed Sheeran - Electro,0.0,5
5,,Tan Fácil - CNCO - Electro,0.0,6
6,,Tu No Vive Asi (feat. Mambo Kingz & DJ Luian) ...,0.0,7
7,,Say You Won't Let Go - James Arthur - Electro,0.0,8
8,,Enamorándonos - Cabas - Electro,0.0,9
9,,I Took A Pill In Ibiza - Seeb Remix - Mike Pos...,0.0,10


In [None]:
songGenre = 'Indie'
###Fill in the code here
is_model.get_similar_items([])

In [None]:
start = time.time()

#Define what percentage of users to use for precision recall calculation
user_sample = 0.05

#Instantiate the precision_recall_calculator class
pr = Evaluation.precision_recall_calculator(test_data, train_data, pm, is_model)

#Call method to calculate precision and recall values
(pm_avg_precision_list, pm_avg_recall_list, ism_avg_precision_list, ism_avg_recall_list) = pr.calculate_measures(user_sample)

end = time.time()
print(end - start)

In [None]:

import pylab as pl

#Method to generate precision and recall curve
def plot_precision_recall(m1_precision_list, m1_recall_list, m1_label, m2_precision_list, m2_recall_list, m2_label):
    pl.clf()    
    pl.plot(m1_recall_list, m1_precision_list, label=m1_label)
    pl.plot(m2_recall_list, m2_precision_list, label=m2_label)
    pl.xlabel('Recall')
    pl.ylabel('Precision')
    pl.ylim([0.0, 0.20])
    pl.xlim([0.0, 0.20])
    pl.title('Precision-Recall curve')
    #pl.legend(loc="upper right")
    pl.legend(loc=9, bbox_to_anchor=(0.5, -0.2))
    pl.show()

In [None]:
print("Plotting precision recall curves.")

plot_precision_recall(pm_avg_precision_list, pm_avg_recall_list, "popularity_model",
                      ism_avg_precision_list, ism_avg_recall_list, "item_similarity_model")

In [None]:
import os
filename = "/tmp/not_exist/filenames.pkl"
os.makedirs(os.path.dirname(filename), exist_ok=True)
data = 'sadasdas'
with open('/tmp/not_exist/filenames.pkl', 'wb') as f:
    pickle.dump(data, f)

In [None]:
print("Plotting precision recall curves for a larger subset of data (100,000 rows) (user sample = 0.005).")

#Read the persisted files 
pm_avg_precision_list = joblib.load('pm_avg_precision_list_3.pkl')
pm_avg_recall_list = joblib.load('pm_avg_recall_list_3.pkl')
ism_avg_precision_list = joblib.load('ism_avg_precision_list_3.pkl')
ism_avg_recall_list = joblib.load('ism_avg_recall_list_3.pkl')

print("Plotting precision recall curves.")
plot_precision_recall(pm_avg_precision_list, pm_avg_recall_list, "popularity_model",
                      ism_avg_precision_list, ism_avg_recall_list, "item_similarity_model")

In [None]:

print("Plotting precision recall curves for a larger subset of data (100,000 rows) (user sample = 0.005).")

pm_avg_precision_list = joblib.load('pm_avg_precision_list_2.pkl')
pm_avg_recall_list = joblib.load('pm_avg_recall_list_2.pkl')
ism_avg_precision_list = joblib.load('ism_avg_precision_list_2.pkl')
ism_avg_recall_list = joblib.load('ism_avg_recall_list_2.pkl')

print("Plotting precision recall curves.")
plot_precision_recall(pm_avg_precision_list, pm_avg_recall_list, "popularity_model",
                      ism_avg_precision_list, ism_avg_recall_list, "item_similarity_model")

In [None]:
#Get unique items (songs) in the training data
def get_all_items_train_data(x,item_id,item1_id):
    x=[x.groupby(item_id)]
    #list1= list(x[item1_id].unique())
      
    #all_items=list1
    return x

In [None]:
x=get_all_items_train_data(song_df,'Genre','songGenre')
print(x)

In [None]:
list1= list(x[self.item1_id].unique())
      


In [None]:
list1= list(x['songGenre'].unique())
list1


In [None]:

x=group.get_group("Techno")
x=pd.DataFrame(x)

In [None]:
group = song_df.groupby('Genre')
groups = sorted(group)
df = pd.concat([g for _, g in groups])
df

In [None]:
 list1= list(df[self.item1_id].unique())

In [39]:
#Read user metadata
test =  pandas.read_csv('file:///C:/Users/suhas/OneDrive/Desktop/digitalmarketingandanalytics/Final Project/Salesforce/Book2.csv')

In [40]:
test

Unnamed: 0,Spotify Song: Song Name,Artist Name,Genre,Song Length(Seconds),Frequency,Spotify Song: ID
0,Starboy,The Weeknd,Jazz,374027,8165,a054P00000uaFeA
1,Hasta el Amanecer,Nicky Jam,Jazz,238132,2718,a054P00000uaFZw
2,Faded,Alan Walker,Indie Rock,244088,4369,a054P00000uaFZx
3,CAN'T STOP THE FEELING! (Original Song from Dr...,Justin Timberlake,Country Music,190566,4262,a054P00000uaFet
4,Borro Cassette,Maluma,Rhythm and Blues,221518,8455,a054P00000uaFe5
5,One Dance,Drake,Rock Music,199157,7547,a054P00000uaFXe
6,Gyal You A Party Animal - Remix,Charly Black,Electro,368441,1745,a054P00000uaFej
7,ReggaetÃ³n Lento (Bailemos),CNCO,Electronic Dance Music,247640,5543,a054P00000uaFMx
8,Let Me Love You,DJ Snake,Dubstep,180871,7190,a054P00000uaFYI
9,24K Magic,Bruno Mars,Rhythm and Blues,292571,4503,a054P00000uaFWe


In [1]:
!pip3 install gspread oauth2client df2gspread



In [None]:
!conda install -c conda-forge df2gspread