### Read the dataset 

In [129]:
import json

# read file
with open('./data/mpd.slice.3000-3999.json', 'r') as myfile:
    data=myfile.read()

# parse file
dat = json.loads(data)

# show values
print(dat['info'])

{'generated_on': '2017-12-03 08:41:42.057563', 'slice': '3000-3999', 'version': 'v1'}


In [130]:
class Playlist:
    def __init__(self, name, pid):
        self.name = name
        self.pid = pid
    
    def getName(self):
        return self.name
    
    def getPid(self):
        return self.pid
    
    def getKey(self):
        return self.name + "_" + str(self.pid)
    
    def __key(self):
        return (self.name, self.pid)

    def __hash__(self):
        return hash(self.__key())
    
    def __eq__(self, other):
        if isinstance(other, Playlist):
            return self.__key() == other.__key()
        return False

### (Input) Pick a playlist as the user playlist from the dataset

In [131]:
userPlaylistIndex = 25 # change index to pick a different playlist 

# using a playlist that already exists as the user playlist
#userPlaylist = dat['playlists'][userPlaylistIndex]
userPlaylist = Playlist(dat['playlists'][userPlaylistIndex]['name'], dat['playlists'][userPlaylistIndex]['pid'])

print("User playlist name: %s. Number of songs in playlist: %d" % (userPlaylist.getKey(), dat['playlists'][userPlaylistIndex]['num_tracks']))

User playlist name: country_3025. Number of songs in playlist: 75


In [132]:
from scipy.stats.stats import pearsonr
import operator

def getSongsListFromPlaylistIndex(userPlaylistIndex, data):
    userPlaylist = data['playlists'][userPlaylistIndex]
    # store in a list the number of songs of the user playlist
    userPlaylistTrackIds = []
    for track in userPlaylist['tracks']:
        userPlaylistTrackIds.append(track['track_uri'])
        
    return userPlaylistTrackIds

def getPlaylistKeyFromIndex(userPlaylistIndex, data):
    pList = data['playlists'][userPlaylistIndex]
    
    return Playlist(pList['name'], pList['pid']).getKey()

def getTopPlaylistsWithSongsInCommon(userPlaylistIndex, topN, data):
    
    userPlaylistSongList = getSongsListFromPlaylistIndex(userPlaylistIndex, data)
    userPlaylistKey = getPlaylistKeyFromIndex(userPlaylistIndex, data)
    
    # how many songs a playlist has in common to the user playlist 
    playlistToSongsInCommon = {}

    for playlist in data['playlists']:
        playlistKey = Playlist(playlist['name'], str(playlist['pid']))
        if userPlaylistKey == playlistKey:
            continue #skip the user playlist
        for track in playlist['tracks']:
            if track['track_uri'] in userPlaylistSongList:
                if playlistKey in playlistToSongsInCommon:
                    playlistToSongsInCommon[playlistKey] = playlistToSongsInCommon[playlistKey] + 1
                else:
                     playlistToSongsInCommon[playlistKey] = 1        
            
    sorted_x = sorted(playlistToSongsInCommon.items(), key=operator.itemgetter(1), reverse=True)[:topN]
    
    if len(sorted_x) == 0:
        print("Unable to find playlist with songs in common.")
    elif len(sorted_x) < topN :
        print("Only able to find %d common playlists. Returning %d." % (len(sorted_x), len(sorted_x)))
        
    return sorted_x


### (Recommendation 1) 
### Top playlists based on similar songs in user playlist

In [133]:
print("User playlist name: %s. Number of songs in playlist: %d" % (userPlaylist.getKey(), dat['playlists'][userPlaylistIndex]['num_tracks']))
print()
topN = 10
for pList in getTopPlaylistsWithSongsInCommon(userPlaylistIndex, topN, dat):
    print("Name: %s with %d common songs." % (pList[0].getKey(), pList[1]))

User playlist name: country_3025. Number of songs in playlist: 75

Name: country_3025 with 75 common songs.
Name: Country_3626 with 27 common songs.
Name: country_3672 with 26 common songs.
Name: Summer 17_3134 with 25 common songs.
Name: Country_3342 with 24 common songs.
Name: country _3688 with 24 common songs.
Name: Country _3530 with 20 common songs.
Name: country_3089 with 19 common songs.
Name: Country?_3927 with 19 common songs.
Name: Country_3313 with 18 common songs.


### (Recommendation 2)
### Use the list of songs from the top playlists in "Recommnedation 1" to discover new playlists.  

In [134]:
def getTopPlaylistsFromSongsList(listOfSongs, topN, data):    
    
    # how many songs a playlist has in common to the user playlist 
    playlistToSongsInCommon = {}

    for playlist in data['playlists']:
        playlistKey = Playlist(playlist['name'], str(playlist['pid']))
        for track in playlist['tracks']:
            if track['track_uri'] in listOfSongs:
                if playlistKey in playlistToSongsInCommon:
                    playlistToSongsInCommon[playlistKey] = playlistToSongsInCommon[playlistKey] + 1
                else:
                     playlistToSongsInCommon[playlistKey] = 1        
            
    sorted_x = sorted(playlistToSongsInCommon.items(), key=operator.itemgetter(1), reverse=True)[:topN]
    
    if len(sorted_x) == 0:
        print("Unable to find playlist with songs in common.")
    elif len(sorted_x) < topN :
        print("Only able to find %d common playlists. Returning %d." % (len(sorted_x), len(sorted_x)))
        
    return sorted_x

In [135]:
topN = 3
listOfSongsFromUserPlaylist = getSongsListFromPlaylistIndex(userPlaylistIndex, dat)
listOfSongsFromTopPlaylists = set()
topPlaylists = []
for pList in getTopPlaylistsWithSongsInCommon(userPlaylistIndex, topN, dat):
    topPlaylists.append(pList[0])

for playlist in dat['playlists']:
    playlistKey = Playlist(playlist['name'], str(playlist['pid']))
    if playlistKey not in topPlaylists:
         continue #skip the user playlist
    for track in playlist['tracks']:
          if track['track_uri'] not in listOfSongsFromUserPlaylist:
                listOfSongsFromTopPlaylists.add(track['track_uri'])
    

In [137]:
topN = 10
print("Songs list size %d" % len(listOfSongsFromTopPlaylists))
for pList in getTopPlaylistsFromSongsList(listOfSongsFromTopPlaylists, topN, dat):
    if pList[0] not in topPlaylists and pList != userPlaylist:
        print("Name: %s with %d common songs." % (pList[0].getKey(), pList[1]))

Songs list size 328
Name: Country_3342 with 103 common songs.
Name: Country?_3927 with 97 common songs.
Name: Country_3810 with 89 common songs.
Name: Country _3530 with 88 common songs.
Name: country lovin _3497 with 70 common songs.
Name: country_3248 with 69 common songs.
Name: country_3089 with 68 common songs.
Name: country _3688 with 67 common songs.
