In [1]:
import sys
import time
import json
import os

import recommend as r
from train_updated import train
from api.spotifyAPI import SpotifyAPI
from utils import load_graph
from utils import load_features
from sklearn.neighbors import NearestNeighbors
import pandas as pd
from dgl import load_graphs
import matplotlib.pyplot as plt
import networkx as nx
import dgl

import torch
import numpy as np

Using backend: pytorch


# Loading Data

In [2]:
%%time
# Loading Graph ~ 3min
feat_dir = "../../data/a13group1/features/merged_features.csv"
double_edge_dir = "../../data/a13group1/double_edges_170k.bin"
feat_data, uri_map = load_features(feat_dir)
dgl_G = nx.read_gpickle("../../data/a13group1/graph_170k.gpickle")
dgl_G, weights = load_graph(dgl_G, uri_map)

CPU times: user 1min 58s, sys: 26 s, total: 2min 24s
Wall time: 2min 19s


In [3]:
#dgl_G_cuda = dgl_G.to('cuda')
#weights = weights.to('cuda')
#feat_data = feat_data.to('cuda')

# Training Model (optional)

In [4]:
# %%time
# # Training the Model. GPU ~ 00:00:40. CPU ~ 00:53:00.
# with open('../../config/model-params.json') as fh:
#             model_cfg = json.load(fh)
# model, pred, losses = train(dgl_G, weights.to('cpu'), feat_data, cuda=False, feat_dim=14, emb_dim=10, test_data=False)

# # Put everything on CPU
# model = model.to('cpu')
# pred = pred.to('cpu')

# torch.save(model, '170k_model.pt')
# torch.save(pred, '170k_pred.pt')

In [5]:
print('Final Training Classification Report: ')
print('AUC: 0.8236135679618413')
print('Final Loss: 0.5051630735397339')
print('Training Time: ~ 1 Hour (CPU)')
pd.DataFrame(data={'Measure': [0.0, 1.0, 'accuracy', 'macro avg', 'weighted avg'], 'Precision': [0.49, 0.90, None, 0.69, 0.80], 'Recall': [0.75, 0.74, None, 0.75, 0.75], 'F1-Score': [0.59, 0.82, 0.75, 0.70, 0.76], 'Support': [168110, 518464, 686574, 686574, 686574]})

Final Training Classification Report: 
AUC: 0.8236135679618413
Final Loss: 0.5051630735397339
Training Time: ~ 1 Hour (CPU)


Unnamed: 0,Measure,Precision,Recall,F1-Score,Support
0,0.0,0.49,0.75,0.59,168110
1,1.0,0.9,0.74,0.82,518464
2,accuracy,,,0.75,686574
3,macro avg,0.69,0.75,0.7,686574
4,weighted avg,0.8,0.75,0.76,686574


# Loading Pre-Existing Model

In [6]:
# Load Pre-Existing Model
model = torch.load('170k_model.pt')
pred = torch.load('170k_pred.pt')
model.eval()
pred.eval()

MLPPredictor(
  (W1): Linear(in_features=20, out_features=10, bias=True)
  (W2): Linear(in_features=10, out_features=1, bias=True)
)

# Create Predictions

In [7]:
dgl_G.edata[dgl.EID] = torch.tensor(np.arange(dgl_G.number_of_edges()))

In [8]:
# Create Embeddings
z = model(dgl_G, feat_data, weights)

# Create Predictions
# In the form of the strength of the connection between source, destination from dgl_G.edges()
preds = pred(dgl_G, z)

In [9]:
# Create Nearest Neighbors
neigh = NearestNeighbors(n_neighbors=25, radius=0.4)
neigh.fit(feat_data)

NearestNeighbors(n_neighbors=25, radius=0.4)

In [21]:
# Get a random playlist
item = r.get_random_playlist()

/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.6000-6999.json


In [22]:
item

{'name': 'Workout Mix',
 'collaborative': 'false',
 'pid': 6568,
 'modified_at': 1487894400,
 'num_tracks': 66,
 'num_albums': 57,
 'num_followers': 1,
 'tracks': [{'pos': 0,
   'artist_name': 'DJ Snake',
   'track_uri': 'spotify:track:67awxiNHNyjMXhVgsHuIrs',
   'artist_uri': 'spotify:artist:540vIaP2JwjQb9dm3aArA4',
   'track_name': 'Turn Down for What',
   'album_uri': 'spotify:album:3zo0Hxh9rjJsdw2JAKReE3',
   'duration_ms': 213733,
   'album_name': 'Turn Down for What'},
  {'pos': 1,
   'artist_name': 'Calvin Harris',
   'track_uri': 'spotify:track:07nH4ifBxUB4lZcsf44Brn',
   'artist_uri': 'spotify:artist:7CajNmpbOovFoOoasH2HaY',
   'track_name': 'Blame',
   'album_uri': 'spotify:album:48zisMeiXniWLzOQghbPqS',
   'duration_ms': 212960,
   'album_name': 'Motion'},
  {'pos': 2,
   'artist_name': 'Calvin Harris',
   'track_uri': 'spotify:track:6YUTL4dYpB9xZO5qExPf05',
   'artist_uri': 'spotify:artist:7CajNmpbOovFoOoasH2HaY',
   'track_name': 'Summer',
   'album_uri': 'spotify:album:48

In [23]:
# Get playlist information
item, original_tracks, seeds = r.get_playlist_info(item)
original_tracks

Playlist ID: 6568
Playlist Length: 66


['Turn Down for What---DJ Snake',
 'Blame---Calvin Harris',
 'Summer---Calvin Harris',
 'I Need Your Love---Calvin Harris',
 'Under Control---Calvin Harris',
 'Feel So Close - Radio Edit---Calvin Harris',
 'Burn---Ellie Goulding',
 'Anything Could Happen---Ellie Goulding',
 'Wasted---Tiësto',
 'Red Lights---Tiësto',
 "Hold On, We're Going Home---Drake",
 'Shake It Off---Taylor Swift',
 'Glad You Came---The Wanted',
 'Kick Out The Epic Motherf**ker---Dada Life',
 'So Young So High---Dada Life',
 'Born To Rage - USA Version---Dada Life',
 'Alive---Krewella',
 'Under Control---Calvin Harris',
 'Dare You - Radio Edit---Hardwell',
 'Bad (feat. Vassy) - Radio Edit---David Guetta',
 'Latch---Disclosure',
 'Prayer in C - Robin Schulz Radio Edit---Lilly Wood and The Prick',
 'Written In Reverse---Tiësto',
 'Rather Be (feat. Jess Glynne)---Clean Bandit',
 'Hideaway---Kiesza',
 "We Can't Stop---Miley Cyrus",
 'Party In The U.S.A.---Miley Cyrus',
 'i---Kendrick Lamar',
 'Talking Body---Tove Lo',
 

In [24]:
# Get recommendations
uri_recs = r.recommend(seeds, dgl_G, z, pred, neigh, feat_data, uri_map)
uri_recs

['6p8NuHm8uCGnn2Dtbtf7zE',
 '5HGibWoxnkYSkl6mHmAlOE',
 '4O1CExxinEpKZi3861NlTK',
 '7yyRTcZmCiyzzJlNzGC9Ol',
 '5HGibWoxnkYSkl6mHmAlOE',
 '4O1CExxinEpKZi3861NlTK',
 '4O1CExxinEpKZi3861NlTK',
 '4Km5HrUvYTaSUfiSGPJeQR',
 '4O1CExxinEpKZi3861NlTK',
 '4O1CExxinEpKZi3861NlTK',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '4O1CExxinEpKZi3861NlTK',
 '4O1CExxinEpKZi3861NlTK',
 '03fT3OHB9KyMtGMt2zwqCT',
 '0LWQWOFoz5GJLqcHk1fRO2',
 '27GmP9AWRs744SzKcpJsTZ',
 '4O1CExxinEpKZi3861NlTK',
 '7BKLCZ1jbUBVqRi2FVlTVw',
 '4O1CExxinEpKZi3861NlTK',
 '4O1CExxinEpKZi3861NlTK',
 '4Km5HrUvYTaSUfiSGPJeQR',
 '34ceTg8ChN5HjrqiIYCn9Q',
 '03fT3OHB9KyMtGMt2zwqCT',
 '4Km5HrUvYTaSUfiSGPJeQR',
 '4O1CExxinEpKZi3861NlTK',
 '4O1CExxinEpKZi3861NlTK',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '4O1CExxinEpKZi3861NlTK',
 '1zWZvrk13cL8Sl3VLeG57F',
 '4O1CExxinEpKZi3861NlTK',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '4O1CExxinEpKZi3861NlTK',
 '5S5rw0WLVCAux5B5bWCehK',
 '7BKLCZ1jbUBVqRi2FVlTVw',
 '6PqD1ZZ0dVGjx1a6O4cuY8',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '5HGibWoxnkYSkl6mHmAlOE',
 

# Decode Results with Spotify API

In [25]:
# Translate to Song Names
client_id = 'ad2536ed7a914d66b89b80fb3a500787'
client_secret = '8c5f45fb008d4bc5bf909ec46d076b65'

spotify = SpotifyAPI(client_id, client_secret)

In [26]:
%%time
rec_track_names = r.get_rec_names(uri_recs, spotify, 2)

CPU times: user 51.2 ms, sys: 4.73 ms, total: 56 ms
Wall time: 2.42 s


In [27]:
original_tracks

['Turn Down for What---DJ Snake',
 'Blame---Calvin Harris',
 'Summer---Calvin Harris',
 'I Need Your Love---Calvin Harris',
 'Under Control---Calvin Harris',
 'Feel So Close - Radio Edit---Calvin Harris',
 'Burn---Ellie Goulding',
 'Anything Could Happen---Ellie Goulding',
 'Wasted---Tiësto',
 'Red Lights---Tiësto',
 "Hold On, We're Going Home---Drake",
 'Shake It Off---Taylor Swift',
 'Glad You Came---The Wanted',
 'Kick Out The Epic Motherf**ker---Dada Life',
 'So Young So High---Dada Life',
 'Born To Rage - USA Version---Dada Life',
 'Alive---Krewella',
 'Under Control---Calvin Harris',
 'Dare You - Radio Edit---Hardwell',
 'Bad (feat. Vassy) - Radio Edit---David Guetta',
 'Latch---Disclosure',
 'Prayer in C - Robin Schulz Radio Edit---Lilly Wood and The Prick',
 'Written In Reverse---Tiësto',
 'Rather Be (feat. Jess Glynne)---Clean Bandit',
 'Hideaway---Kiesza',
 "We Can't Stop---Miley Cyrus",
 'Party In The U.S.A.---Miley Cyrus',
 'i---Kendrick Lamar',
 'Talking Body---Tove Lo',
 

In [28]:
rec_track_names.sort(reverse=True, key=lambda x: x[1])

In [29]:
rec_track_names

[('Get Me Some Of That---Thomas Rhett', 19),
 ('Slippery (feat. Gucci Mane)---Migos', 18),
 ('Bad and Boujee (feat. Lil Uzi Vert)---Migos', 4),
 ('Play It Again---Luke Bryan', 3),
 ("That's My Kind Of Night---Luke Bryan", 3),
 ('Closer---The Chainsmokers', 2),
 ('Broccoli (feat. Lil Yachty)---Shelley FKA DRAM', 2),
 ("Look At Me Now (feat. Lil' Wayne & Busta Rhymes)---Chris Brown", 1),
 ('One Dance---Drake', 1),
 ('T-Shirt---Thomas Rhett', 1),
 ('Jumpman---Drake', 1),
 ('Miss Independent---Ne-Yo', 1),
 ('Do I Make You Wanna---Billy Currington', 1),
 ('Congratulations---Post Malone', 1),
 ("Why Didn't I Think Of That---Doug Stone", 1),
 ('I Like The Sound Of That---Rascal Flatts', 1),
 ('Caroline---Aminé', 1),
 ('Into Your Arms---Capital Kings', 1),
 ('goosebumps---Travis Scott', 1),
 ('Slide (feat. Frank Ocean & Migos)---Calvin Harris', 1),
 ('I Want U---Alison Wonderland', 1)]

Note: We kept repeats because a song that is recommended as the best more than once throughout the playlist should be ranked higher

# Making Batch Recommendations ~ 15 min

In [19]:
# %%time 
# all_accs = []
# for each in range(200):
#     item = r.get_random_playlist()
#     item, original_tracks, seeds = r.get_playlist_info(item)
#     uri_recs = r.recommend(seeds, dgl_G, z, pred, neigh, feat_data, uri_map)
    
#     checks = []
#     for i in range(len(uri_recs)):
#         well = uri_map[uri_recs[i]] in dgl_G.out_edges(uri_map[seeds[i]])[1]
#         checks.append(well)
        
#     acc = np.count_nonzero(checks) / len(checks)
#     all_accs.append({'pid': item['pid'], 'seeds': seeds, 'recs':uri_recs, 'accuracy': acc})
    
# with open("../analysis/batch_recommendations.json", "w") as final:
#     json.dump(all_accs, final)

In [20]:
# with open("../analysis/uri_map_170k.json", "w") as final:
#     json.dump([uri_map], final)