In [1]:
import sys
import time
import json
import os

import recommend as r
from train_updated import train
from api.spotifyAPI import SpotifyAPI
from utils import load_graph
from utils import load_features
from sklearn.neighbors import NearestNeighbors
import pandas as pd
from dgl import load_graphs
import matplotlib.pyplot as plt
import networkx as nx
import dgl

import torch
import numpy as np

Using backend: pytorch


# Loading Data

In [2]:
%%time
# Loading Graph ~ 3min
feat_dir = "../../data/a13group1/460k_songset_features.csv"
scratch_pickle_dir = "../../data/a13group1/"
feat_data, uri_map = load_features(feat_dir, scratch_pickle_dir, False, playlist_num=100000)
graph_dir = ("../../data/a13group1/graph_460k.gpickle")
dgl_G, weights = load_graph(graph_dir, uri_map)

Loading feature data...
Feature data shape: torch.Size([461880, 13])
Loading graph data...
Graph Info:
 Name: G
Type: Graph
Number of nodes: 461880
Number of edges: 106486690
Average degree: 461.1011
CPU times: user 9min 7s, sys: 1min 40s, total: 10min 48s
Wall time: 10min 32s


In [3]:
#dgl_G_cuda = dgl_G.to('cuda')
#weights = weights.to('cuda')
#feat_data = feat_data.to('cuda')

# Training Model (optional)

In [4]:
# %%time
# # Training the Model. GPU ~ 00:00:40. CPU ~ 00:53:00.
# with open('../../config/model-params.json') as fh:
#             model_cfg = json.load(fh)
# model, pred, measures = train(dgl_G, weights.to('cpu'), feat_data, cuda=False, feat_dim=13, emb_dim=10, test_data=False)

# # Put everything on CPU
# model = model.to('cpu')
# pred = pred.to('cpu')

# torch.save(model, '460k_1epoch_model.pt')
# torch.save(pred, '460k_1epoch_pred.pt')

# with open("460k_1epoch_measures.json", "w") as out_measures:
#     json.dump(measures, out_measures)

In [5]:
# print('Final Training Classification Report: ')
# print('AUC: 0.8236135679618413')
# print('Final Loss: 0.5051630735397339')
# print('Training Time: ~ 1 Hour (CPU)')
# pd.DataFrame(data={'Measure': [0.0, 1.0, 'accuracy', 'macro avg', 'weighted avg'], 'Precision': [0.49, 0.90, None, 0.69, 0.80], 'Recall': [0.75, 0.74, None, 0.75, 0.75], 'F1-Score': [0.59, 0.82, 0.75, 0.70, 0.76], 'Support': [168110, 518464, 686574, 686574, 686574]})

# Loading Pre-Existing Model

In [3]:
# Load Pre-Existing Model
model = torch.load('460k_model.pt')
pred = torch.load('460k_pred.pt')
model.eval()
pred.eval()

MLPPredictor(
  (W1): Linear(in_features=20, out_features=10, bias=True)
  (W2): Linear(in_features=10, out_features=1, bias=True)
)

# Create Predictions

In [4]:
dgl_G.edata[dgl.EID] = torch.tensor(np.arange(dgl_G.number_of_edges()))

In [26]:
feats_splitted = np.array_split(np.arange(461880), 500)

In [61]:
preds = torch.tensor([])
z = torch.tensor([])
for i in feats_splitted:
    temp_graph = dgl.node_subgraph(dgl_G, i)
    
    # Create Embeddings
    z_i = model(temp_graph, feat_data[i], weights)

    # Create Predictions
    # In the form of the strength of the connection between source, destination from dgl_G.edges()
    pred_i = pred(temp_graph, z_i)
    preds = torch.cat((preds, pred_i), 0)
    z = torch.cat((z, z_i), 0)

In [54]:
preds.shape

torch.Size([425332])

In [62]:
z.shape

torch.Size([461880, 10])

In [None]:
# # Create Embeddings
# z = model(dgl_G, feat_data, weights)

# # Create Predictions
# # In the form of the strength of the connection between source, destination from dgl_G.edges()
# preds = pred(dgl_G, z)

In [63]:
# Create Nearest Neighbors
neigh = NearestNeighbors(n_neighbors=25, radius=0.4)
neigh.fit(feat_data)

NearestNeighbors(n_neighbors=25, radius=0.4)

In [64]:
# Get a random playlist
item = r.get_random_playlist()

/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.9000-9999.json


In [65]:
item

{'name': 'Happy :)',
 'collaborative': 'false',
 'pid': 9360,
 'modified_at': 1500854400,
 'num_tracks': 10,
 'num_albums': 10,
 'num_followers': 2,
 'tracks': [{'pos': 0,
   'artist_name': 'Phoenix',
   'track_uri': 'spotify:track:6ZrlXT6mUMEgomN128iekU',
   'artist_uri': 'spotify:artist:1xU878Z1QtBldR7ru9owdU',
   'track_name': 'Lisztomania',
   'album_uri': 'spotify:album:6YXmQrXOjJoMheJ2IA5NqK',
   'duration_ms': 241640,
   'album_name': 'Wolfgang Amadeus Phoenix'},
  {'pos': 1,
   'artist_name': 'Michael Jackson',
   'track_uri': 'spotify:track:2rN2WzftSKZzDLFApWECWz',
   'artist_uri': 'spotify:artist:3fMbdgg4jU18AjLCKBhRSm',
   'track_name': 'Will You Be There (Theme from "Free Willy")',
   'album_uri': 'spotify:album:48TFZhC2UmuzNL23WshdwW',
   'duration_ms': 352893,
   'album_name': 'FREE WILLY - ORIGINAL MOTION PICTURE SOUNDTRACK'},
  {'pos': 2,
   'artist_name': 'Lake Street Dive',
   'track_uri': 'spotify:track:6FxKh5EdB8S22V4i3BORsr',
   'artist_uri': 'spotify:artist:3nuc29

In [66]:
# Get playlist information
item, original_tracks, seeds = r.get_playlist_info(item)
original_tracks

Playlist ID: 9360
Playlist Length: 10


['Lisztomania---Phoenix',
 'Will You Be There (Theme from "Free Willy")---Michael Jackson',
 'Rabid Animal---Lake Street Dive',
 'Dancing On Quicksand---Bad Suns',
 'The Sweet Escape---Gwen Stefani',
 'Ants Marching---Dave Matthews Band',
 'Rock the Casbah - Remastered---The Clash',
 'Stare Into The Sun---Graffiti6',
 'Feel It Still---Portugal. The Man',
 'anywayican---WALK THE MOON']

In [67]:
# Get recommendations
uri_recs = r.recommend(seeds, dgl_G, z, pred, neigh, feat_data, uri_map)
uri_recs

['4izhmF4sQgGemZpLC68bC5',
 '3zByVQLvdXUaDTubfWkpCk',
 '0WqIKmW4BTrj3eJFmnCKMv',
 '7KXjTSCq5nL1LoYtL7XAwS',
 '6fwdbPMwP1zVStm8FybmkO',
 '03LpkqucyYKcYclDs8HuxO',
 '6fwdbPMwP1zVStm8FybmkO',
 '7990Xs9HQx7FXVIDVPEwj9',
 '0HscBDeP30qAUEJoZzxNrj',
 '3zByVQLvdXUaDTubfWkpCk']

# Decode Results with Spotify API

In [68]:
# Translate to Song Names
client_id = 'ad2536ed7a914d66b89b80fb3a500787'
client_secret = '8c5f45fb008d4bc5bf909ec46d076b65'

spotify = SpotifyAPI(client_id, client_secret)

In [69]:
%%time
rec_track_names = r.get_rec_names(uri_recs, spotify, 2)

CPU times: user 22.7 ms, sys: 8.56 ms, total: 31.3 ms
Wall time: 2.39 s


In [70]:
original_tracks

['Lisztomania---Phoenix',
 'Will You Be There (Theme from "Free Willy")---Michael Jackson',
 'Rabid Animal---Lake Street Dive',
 'Dancing On Quicksand---Bad Suns',
 'The Sweet Escape---Gwen Stefani',
 'Ants Marching---Dave Matthews Band',
 'Rock the Casbah - Remastered---The Clash',
 'Stare Into The Sun---Graffiti6',
 'Feel It Still---Portugal. The Man',
 'anywayican---WALK THE MOON']

In [71]:
rec_track_names.sort(reverse=True, key=lambda x: x[1])

In [72]:
rec_track_names

[('Sweet Caroline---Neil Diamond', 2),
 ('X (feat. Future)---21 Savage', 2),
 ('John Cougar, John Deere, John 3:16---Keith Urban', 1),
 ('The Tiki, Tiki, Tiki Room---The Mellomen', 1),
 ('Crazy In Love (feat. Jay-Z)---Beyoncé', 1),
 ('Before You Start Your Day---Twenty One Pilots', 1),
 ('Rock And Roll All Nite---KISS', 1),
 ('HUMBLE.---Kendrick Lamar', 1)]

Note: We kept repeats because a song that is recommended as the best more than once throughout the playlist should be ranked higher

# Making Batch Recommendations ~ 15 min

In [73]:
# %%time 
# all_accs = []
# for each in range(200):
#     item = r.get_random_playlist()
#     item, original_tracks, seeds = r.get_playlist_info(item)
#     uri_recs = r.recommend(seeds, dgl_G, z, pred, neigh, feat_data, uri_map)
    
#     checks = []
#     for i in range(len(uri_recs)):
#         well = uri_map[uri_recs[i]] in dgl_G.out_edges(uri_map[seeds[i]])[1]
#         checks.append(well)
        
#     acc = np.count_nonzero(checks) / len(checks)
#     all_accs.append({'pid': item['pid'], 'seeds': seeds, 'recs':uri_recs, 'accuracy': acc})
    
# with open("../analysis/batch_recommendations.json", "w") as final:
#     json.dump(all_accs, final)

/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.2000-2999.json
Playlist ID: 2157
Playlist Length: 14
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.2000-2999.json
Playlist ID: 2599
Playlist Length: 60
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.9000-9999.json
Playlist ID: 9797
Playlist Length: 74
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.5000-5999.json
Playlist ID: 5756
Playlist Length: 5
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.5000-5999.json
Playlist ID: 5134
Playlist Length: 91
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.7000-7999.json
Playlist ID: 7691
Playlist Length: 45
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.6000-6999.json
Playlist ID: 6270
Playlist Length: 217
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.9000-9999.json
Playlist ID: 9585
Playlist Length: 48
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.1000-1999.json
Playlist ID: 1581
Playlist Length: 37
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.2000-2999.json
Playlist ID: 2957


/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.7000-7999.json
Playlist ID: 7058
Playlist Length: 11
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.1000-1999.json
Playlist ID: 1059
Playlist Length: 166
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.7000-7999.json
Playlist ID: 7879
Playlist Length: 21
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.4000-4999.json
Playlist ID: 4545
Playlist Length: 212
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.5000-5999.json
Playlist ID: 5274
Playlist Length: 31
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.2000-2999.json
Playlist ID: 2588
Playlist Length: 91
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.8000-8999.json
Playlist ID: 8076
Playlist Length: 15
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.8000-8999.json
Playlist ID: 8114
Playlist Length: 32
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.0-999.json
Playlist ID: 765
Playlist Length: 81
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.9000-9999.json
Playlist ID: 9032
Pla

/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.0-999.json
Playlist ID: 18
Playlist Length: 68
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.5000-5999.json
Playlist ID: 5526
Playlist Length: 62
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.3000-3999.json
Playlist ID: 3933
Playlist Length: 56
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.1000-1999.json
Playlist ID: 1069
Playlist Length: 136
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.8000-8999.json
Playlist ID: 8099
Playlist Length: 70
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.4000-4999.json
Playlist ID: 4641
Playlist Length: 59
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.8000-8999.json
Playlist ID: 8463
Playlist Length: 117
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.6000-6999.json
Playlist ID: 6321
Playlist Length: 44
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.4000-4999.json
Playlist ID: 4249
Playlist Length: 107
/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.2000-2999.json
Playlist ID: 2136
Pla

In [74]:
# with open("../analysis/uri_map_170k.json", "w") as final:
#     json.dump([uri_map], final)