In [2]:
import sys
import time
import json
import os

import recommend as r
from train_updated import train
from api.spotifyAPI import SpotifyAPI
from utils import load_graph
from utils import load_features
from sklearn.neighbors import NearestNeighbors
import pandas as pd
from dgl import load_graphs
import matplotlib.pyplot as plt
import networkx as nx
import dgl

import torch
import numpy as np

Using backend: pytorch


# Loading Data

In [5]:
%%time
# Loading Graph ~ 3min
feat_dir = "../../data/a13group1/460k_songset_features.csv"
scratch_pickle_dir = "../../data/a13group1/"
feat_data, uri_map = load_features(feat_dir, scratch_pickle_dir, False, playlist_num=100000)
graph_dir = ("../../data/a13group1/graph_460k.gpickle")
dgl_G, weights = load_graph(graph_dir, uri_map)

Loading feature data...
Feature data shape: torch.Size([461880, 13])
Loading graph data...
Graph Info:
 Name: G
Type: Graph
Number of nodes: 461880
Number of edges: 106486690
Average degree: 461.1011
CPU times: user 6min 13s, sys: 31.9 s, total: 6min 45s
Wall time: 6min 34s


In [3]:
#dgl_G_cuda = dgl_G.to('cuda')
#weights = weights.to('cuda')
#feat_data = feat_data.to('cuda')

# Training Model (optional)

In [None]:
%%time
# Training the Model. GPU ~ 00:00:40. CPU ~ 00:53:00.
with open('../../config/model-params.json') as fh:
            model_cfg = json.load(fh)
model, pred, measures = train(dgl_G, weights.to('cpu'), feat_data, cuda=False, feat_dim=13, emb_dim=10, test_data=False)

# Put everything on CPU
model = model.to('cpu')
pred = pred.to('cpu')

torch.save(model, '460k_1epoch_model.pt')
torch.save(pred, '460k_1epoch_pred.pt')

with open("460k_1epoch_measures.json", "w") as out_measures:
    json.dump(measures, out_measures)

Train pos edge: 168589834
Validation pos edge: 306960
Cuda enabled: False

Training starts:
-----
In epoch 1 batch 1, loss: 0.6922053098678589
-----
In epoch 1 batch 2, loss: 0.6965621709823608
-----
In epoch 1 batch 3, loss: 0.6925550699234009
-----
In epoch 1 batch 4, loss: 0.6910004019737244
-----
In epoch 1 batch 5, loss: 0.6893577575683594
-----
In epoch 1 batch 6, loss: 0.6879245042800903
-----
In epoch 1 batch 7, loss: 0.6840960383415222
-----
In epoch 1 batch 8, loss: 0.6814653873443604
-----
In epoch 1 batch 9, loss: 0.6781801581382751
-----
In epoch 1 batch 10, loss: 0.67368084192276
-----
In epoch 1 batch 11, loss: 0.667395830154419
-----
In epoch 1 batch 12, loss: 0.6607205271720886
-----
In epoch 1 batch 13, loss: 0.6496045589447021
-----
In epoch 1 batch 14, loss: 0.6448951959609985
-----
In epoch 1 batch 15, loss: 0.6294053792953491
-----
In epoch 1 batch 16, loss: 0.6173455715179443
-----
In epoch 1 batch 17, loss: 0.6101223230361938
-----
In epoch 1 batch 18, loss: 0.5

In [None]:
measures

In [5]:
print('Final Training Classification Report: ')
print('AUC: 0.8236135679618413')
print('Final Loss: 0.5051630735397339')
print('Training Time: ~ 1 Hour (CPU)')
pd.DataFrame(data={'Measure': [0.0, 1.0, 'accuracy', 'macro avg', 'weighted avg'], 'Precision': [0.49, 0.90, None, 0.69, 0.80], 'Recall': [0.75, 0.74, None, 0.75, 0.75], 'F1-Score': [0.59, 0.82, 0.75, 0.70, 0.76], 'Support': [168110, 518464, 686574, 686574, 686574]})

Final Training Classification Report: 
AUC: 0.8236135679618413
Final Loss: 0.5051630735397339
Training Time: ~ 1 Hour (CPU)


Unnamed: 0,Measure,Precision,Recall,F1-Score,Support
0,0.0,0.49,0.75,0.59,168110
1,1.0,0.9,0.74,0.82,518464
2,accuracy,,,0.75,686574
3,macro avg,0.69,0.75,0.7,686574
4,weighted avg,0.8,0.75,0.76,686574


# Loading Pre-Existing Model

In [9]:
# Load Pre-Existing Model
model = torch.load('170k_model.pt')
pred = torch.load('170k_pred.pt')
model.eval()
pred.eval()

MLPPredictor(
  (W1): Linear(in_features=20, out_features=10, bias=True)
  (W2): Linear(in_features=10, out_features=1, bias=True)
)

# Create Predictions

In [10]:
dgl_G.edata[dgl.EID] = torch.tensor(np.arange(dgl_G.number_of_edges()))

In [11]:
# Create Embeddings
z = model(dgl_G, feat_data, weights)

# Create Predictions
# In the form of the strength of the connection between source, destination from dgl_G.edges()
preds = pred(dgl_G, z)

In [12]:
# Create Nearest Neighbors
neigh = NearestNeighbors(n_neighbors=25, radius=0.4)
neigh.fit(feat_data)

NearestNeighbors(n_neighbors=25, radius=0.4)

In [13]:
# Get a random playlist
item = r.get_random_playlist()

/teams/DSC180A_FA21_A00/a13group1/data/mpd.slice.7000-7999.json


In [14]:
item

{'name': 'idk',
 'collaborative': 'false',
 'pid': 7009,
 'modified_at': 1492041600,
 'num_tracks': 20,
 'num_albums': 17,
 'num_followers': 1,
 'tracks': [{'pos': 0,
   'artist_name': 'Drake',
   'track_uri': 'spotify:track:4ckuS4Nj4FZ7i3Def3Br8W',
   'artist_uri': 'spotify:artist:3TVXtAsR1Inumwj472S9r4',
   'track_name': 'Sneakin’',
   'album_uri': 'spotify:album:2z3NlPY0n0gHJPCPqrzA6V',
   'duration_ms': 251333,
   'album_name': 'Sneakin’'},
  {'pos': 1,
   'artist_name': 'Nebu Kiniza',
   'track_uri': 'spotify:track:6JjEVlMkfHWMeYavkEYzNO',
   'artist_uri': 'spotify:artist:5lCY3tqdQxbeg5igSlObaT',
   'track_name': 'Gassed Up',
   'album_uri': 'spotify:album:6YCcGxz3l1shgr3F7XbNee',
   'duration_ms': 193911,
   'album_name': 'Gassed Up'},
  {'pos': 2,
   'artist_name': 'Lil Uzi Vert',
   'track_uri': 'spotify:track:2ANLarE8yHVsLWW21nj79M',
   'artist_uri': 'spotify:artist:4O15NlyKLIASxsJ0PrXPfz',
   'track_name': 'You Was Right',
   'album_uri': 'spotify:album:7mgdTKTCdfnLoa1HXHvLYM

In [15]:
# Get playlist information
item, original_tracks, seeds = r.get_playlist_info(item)
original_tracks

Playlist ID: 7009
Playlist Length: 20


['Sneakin’---Drake',
 'Gassed Up---Nebu Kiniza',
 'You Was Right---Lil Uzi Vert',
 'Fire Squad - Live---J. Cole',
 'A Tale of 2 Citiez - Live---J. Cole',
 'Deja Vu---J. Cole',
 '20 Joints---Berner',
 'Moves---Big Sean',
 'Look At Me!---XXXTENTACION',
 'Rent Money---Future',
 'No Heart---21 Savage',
 'Dear Mama---2Pac',
 'Neighbors---J. Cole',
 '4 Your Eyez Only---J. Cole',
 'Miss America---J. Cole',
 'Like Toy Soldiers---Eminem',
 'T-Shirt---Migos',
 'Get Your Walk On---Xzibit',
 'X---Xzibit',
 'I Need A Doctor---Dr. Dre']

In [16]:
# Get recommendations
uri_recs = r.recommend(seeds, dgl_G, z, pred, neigh, feat_data, uri_map)
uri_recs

['5CG9Ps5ynNjpKJHmwc95pa',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '03fT3OHB9KyMtGMt2zwqCT',
 '6u36hRCvYdPyPBB4oAQs7B',
 '4Km5HrUvYTaSUfiSGPJeQR',
 '4O1CExxinEpKZi3861NlTK',
 '1zWZvrk13cL8Sl3VLeG57F',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '5CG9Ps5ynNjpKJHmwc95pa',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '6p8NuHm8uCGnn2Dtbtf7zE',
 '1soxUgYIZb1qx1c7o1Lc7z',
 '5HGibWoxnkYSkl6mHmAlOE',
 '6ltPEsP4edATzvinHOzvk2',
 '4Km5HrUvYTaSUfiSGPJeQR',
 '6p8NuHm8uCGnn2Dtbtf7zE']

# Decode Results with Spotify API

In [17]:
# Translate to Song Names
client_id = 'ad2536ed7a914d66b89b80fb3a500787'
client_secret = '8c5f45fb008d4bc5bf909ec46d076b65'

spotify = SpotifyAPI(client_id, client_secret)

In [18]:
%%time
rec_track_names = r.get_rec_names(uri_recs, spotify, 2)

CPU times: user 53.9 ms, sys: 15.8 ms, total: 69.7 ms
Wall time: 2.43 s


In [19]:
original_tracks

['Sneakin’---Drake',
 'Gassed Up---Nebu Kiniza',
 'You Was Right---Lil Uzi Vert',
 'Fire Squad - Live---J. Cole',
 'A Tale of 2 Citiez - Live---J. Cole',
 'Deja Vu---J. Cole',
 '20 Joints---Berner',
 'Moves---Big Sean',
 'Look At Me!---XXXTENTACION',
 'Rent Money---Future',
 'No Heart---21 Savage',
 'Dear Mama---2Pac',
 'Neighbors---J. Cole',
 '4 Your Eyez Only---J. Cole',
 'Miss America---J. Cole',
 'Like Toy Soldiers---Eminem',
 'T-Shirt---Migos',
 'Get Your Walk On---Xzibit',
 'X---Xzibit',
 'I Need A Doctor---Dr. Dre']

In [20]:
rec_track_names.sort(reverse=True, key=lambda x: x[1])

In [21]:
rec_track_names

[('Slippery (feat. Gucci Mane)---Migos', 9),
 ('Bad and Boujee (feat. Lil Uzi Vert)---Migos', 2),
 ('Somewhere On A Beach---Dierks Bentley', 2),
 ('Play It Again---Luke Bryan', 1),
 ('Drunk On A Plane---Dierks Bentley', 1),
 ('T-Shirt---Thomas Rhett', 1),
 ('Get Me Some Of That---Thomas Rhett', 1),
 ("That's My Kind Of Night---Luke Bryan", 1),
 ('Still D.R.E.---Dr. Dre', 1),
 ('Game Got Switched---Ludacris', 1)]

Note: We kept repeats because a song that is recommended as the best more than once throughout the playlist should be ranked higher

# Making Batch Recommendations ~ 15 min

In [19]:
# %%time 
# all_accs = []
# for each in range(200):
#     item = r.get_random_playlist()
#     item, original_tracks, seeds = r.get_playlist_info(item)
#     uri_recs = r.recommend(seeds, dgl_G, z, pred, neigh, feat_data, uri_map)
    
#     checks = []
#     for i in range(len(uri_recs)):
#         well = uri_map[uri_recs[i]] in dgl_G.out_edges(uri_map[seeds[i]])[1]
#         checks.append(well)
        
#     acc = np.count_nonzero(checks) / len(checks)
#     all_accs.append({'pid': item['pid'], 'seeds': seeds, 'recs':uri_recs, 'accuracy': acc})
    
# with open("../analysis/batch_recommendations.json", "w") as final:
#     json.dump(all_accs, final)

In [20]:
# with open("../analysis/uri_map_170k.json", "w") as final:
#     json.dump([uri_map], final)