In [1]:
#import os
#from google.colab import drive
#drive.mount("/gdrive/")
#os.chdir("/gdrive/My Drive/FMA_Project/Jonathan")

Drive already mounted at /gdrive/; to attempt to forcibly remount, call drive.mount("/gdrive/", force_remount=True).


# Import Packages and Libraries

In [2]:
from collections import defaultdict, Counter
from utils import f1_m, precision_m, recall_m, init_dict, cosine_similarity
import numpy as np
import os
import sys
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import optimizers
import time
from tqdm import tqdm

# Load data for recommendations

In [9]:
images = np.load("./test/features.npy")
genres = np.load("./test/classes.npy")
names = np.load("./test/names.npy")
images = images[...,tf.newaxis].astype("float32") #Add axis for grey scale
images = images/255.0 #Scale images between 0 and 1
all_genre_counts = Counter(genres)
print(all_genre_counts)

Counter({'International': 1050, 'Experimental': 860, 'Electronic': 850, 'Pop': 830, 'Hip-Hop': 560, 'Rock': 480, 'Folk': 310, 'Instrumental': 60})


# Evaluation of Recommendation Engine

In [10]:
dependencies = {
    'f1_m': f1_m,
    'precision_m': precision_m,
    'recall_m': recall_m

}

In [11]:
best_models_path = "./best_models"
best_models = {m[:-3]:os.path.join(best_models_path, m) for m in os.listdir(best_models_path)}

In [12]:
for model_name, model_path in best_models.items():
    
    print(f"Currently evaluating the recommender engine using {model_name} model")

    #load model for recommendation
    loaded_model = load_model(model_path, custom_objects=dependencies)
    loaded_model.set_weights(loaded_model.get_weights())
    
    #use the bottleneck layers as it contains all the latent features of each song
    matrix_size = loaded_model.layers[-2].output.shape[1]
    new_model = Model(loaded_model.inputs, loaded_model.layers[-2].output)
    
    #Get the average accuracy of genre recommendations per genre
    test_songs = defaultdict(init_dict)

    #get latent features from the bottleneck layer for each song
    print("Calculating latent features for each song...")
    total = len(images)
    for image, name in tqdm(zip(images, names), total=total, file=sys.stdout):
        img_reshaped = np.expand_dims(image, axis=0)
        pred = new_model.predict(img_reshaped)
        test_songs[name] += pred
        
    test_songs_scaled = {k:v/10 for k,v in test_songs.items()}
    
    top_k = [5, 10] #no of rec
    for k in top_k:
        genre_based_accuracy = defaultdict(int)

        #iterate through each song in the test dataset and the cosine distance from all songs and get k nearest songs
        print("Obtaining recommendation accuracy for each genre...")
        for song, features in tqdm(test_songs_scaled.items(), file=sys.stdout):
            distance_array = list(map(lambda x: cosine_similarity(features, x), test_songs_scaled.values()))
            distance_array_with_songs = list(zip(distance_array, test_songs_scaled.keys()))
            #arranged in descending order
            final_array = sorted(distance_array_with_songs, key=lambda x: x[0], reverse=True)

            #get the genre of the k nearest songs and compare with the genre of the anchor song
            genre_list = list()
            for i in final_array[0:k+1]:
              genre_index = np.where(names==i[1])[0]
              genre_list.append(genres[genre_index][0])

            anchor_genre = [genre_list[0]]*k
            recom_genre = genre_list[1:]

            accuracy = sum([x==y for x,y in zip(anchor_genre,recom_genre)])/k

            #Add the accuracy to the list
            genre_based_accuracy[genre_list[0]] += accuracy

        #scale the list by count of songs in each genre
        genre_based_accuracy_percentage = {k:round(((v/int(all_genre_counts.get(k)/10)))*100,2) for k, v in genre_based_accuracy.items()}

        print(f"For {model_name} the genre based accuracy for top {k} recommendations are")
        print(genre_based_accuracy_percentage)

Currently evaluating the recommender engine using CNN model
Calculating latent features for each song...
100%|██████████| 5000/5000 [02:38<00:00, 31.60it/s]
Obtaining recommendation accuracy for each genre...
100%|██████████| 500/500 [00:05<00:00, 97.20it/s]
For CNN the genre based accuracy for top 5 recommendations are
{'International': 97.52, 'Experimental': 79.07, 'Pop': 77.35, 'Electronic': 90.35, 'Hip-Hop': 92.86, 'Folk': 80.0, 'Rock': 89.58, 'Instrumental': 23.33}
Obtaining recommendation accuracy for each genre...
100%|██████████| 500/500 [00:05<00:00, 94.11it/s]
For CNN the genre based accuracy for top 10 recommendations are
{'International': 96.76, 'Experimental': 78.6, 'Pop': 77.95, 'Electronic': 91.18, 'Hip-Hop': 92.68, 'Folk': 76.77, 'Rock': 88.12, 'Instrumental': 18.33}
Currently evaluating the recommender engine using CNNLSTM model
Calculating latent features for each song...
100%|██████████| 5000/5000 [02:38<00:00, 31.50it/s]
Obtaining recommendation accuracy for each ge

# Music Recommendations Depending on Inputted Anchor Song and number of recommendations (5 or 10)

In [None]:
print(*np.unique(names),sep="\n")
song = input("Kindly select a song from above: ")
#load model for recommendation
loaded_model = load_model(best_models["CNNLSTM"], custom_objects=dependencies)
loaded_model.set_weights(loaded_model.get_weights())

#use the bottleneck layers as it contains all the latent features of each song
matrix_size = loaded_model.layers[-2].output.shape[1]
new_model = Model(loaded_model.inputs, loaded_model.layers[-2].output)

#Get the average accuracy of genre recommendations per genre
test_songs = defaultdict(init_dict)

print("Calculating latent features for each song...")
total = len(images)
for image, name in tqdm(zip(images, names), total=total, file=sys.stdout):
    img_reshaped = np.expand_dims(image, axis=0)
    pred = new_model.predict(img_reshaped)
    test_songs[name] += pred

test_songs_scaled = {k:v/10 for k,v in test_songs.items()}

song_distance_array = list(map(lambda x: cosine_similarity(test_songs_scaled[song], x), test_songs_scaled.values()))
song_distance_array_with_songs = list(zip(song_distance_array, test_songs_scaled.keys()))
#arrange in descending order
probable_songs = sorted(song_distance_array_with_songs, key=lambda x: x[0], reverse=True)

rec_no=int(input("How many recommendations would you like: "))
for song in probable_songs[1:rec_no+1]:
  print(song[1])

"(Me up at does)" by E. E. Cummings
"Ella Fitzgerald" by James A. Emanuel
"Jane Icin (For Jane - In Turkish)" by Charles Bukowski
(a) The Forest
(b) I Know How
02
03
04
05
05 Russian Dance
06
07
09 jungle
5
7th Sea Dub in Twilight
8 Bit Raceway
96
::Flowers
::Intro
???
A Dyslexic Dumb Dead Voice
A Frog's Cheese
A Man Called Dave
A Place to Call Home
A palavra / O primeiro passo
A72
Abre caminhos
Abstract Concepts - What up in the streets
Accordion
Adubando
African Era
After All That's Happened
After the War (ft. Alex, MoShang, HEJ31)
Alarm Clock
All You Need Is Toys
Ambassadors
Ambiguity
And I know that you're happy (ballad of the lonesome spaceboy)
And The Pond Life Flourished
Animateur commercial intime
Apollo Wakes
Around Under
At Teh Disko
Audhumla
Aunt Sqrot & Narcoleptus Git
B.
Bad Boy
Baile me por Tangos
Banish
Barcelona Afrobeat 01
Barcelona Afrobeat 03
Barcelona Afrobeat 04
Barcelona Afrobeat 07
Batuta
Beautiful Android
Belladonna
Beloved Girlfriend
Berries
Big Blue Day
Big Fo

In [None]:
print(*np.unique(names),sep="\n")
song = input("Kindly select a song from above: ")
#load model for recommendation
loaded_model = load_model(best_models["CNNLSTM"], custom_objects=dependencies)
loaded_model.set_weights(loaded_model.get_weights())

#use the bottleneck layers as it contains all the latent features of each song
matrix_size = loaded_model.layers[-2].output.shape[1]
new_model = Model(loaded_model.inputs, loaded_model.layers[-2].output)

#Get the average accuracy of genre recommendations per genre
test_songs = defaultdict(init_dict)

print("Calculating latent features for each song...")
total = len(images)
for image, name in tqdm(zip(images, names), total=total, file=sys.stdout):
    img_reshaped = np.expand_dims(image, axis=0)
    pred = new_model.predict(img_reshaped)
    test_songs[name] += pred

test_songs_scaled = {k:v/10 for k,v in test_songs.items()}

song_distance_array = list(map(lambda x: cosine_similarity(test_songs_scaled[song], x), test_songs_scaled.values()))
song_distance_array_with_songs = list(zip(song_distance_array, test_songs_scaled.keys()))
probable_songs = sorted(song_distance_array_with_songs, key=lambda x: x[0], reverse=True)

rec_no=int(input("How many recommendations would you like: "))
for song in probable_songs[1:rec_no+1]:
  print(song[1])

"(Me up at does)" by E. E. Cummings
"Ella Fitzgerald" by James A. Emanuel
"Jane Icin (For Jane - In Turkish)" by Charles Bukowski
(a) The Forest
(b) I Know How
02
03
04
05
05 Russian Dance
06
07
09 jungle
5
7th Sea Dub in Twilight
8 Bit Raceway
96
::Flowers
::Intro
???
A Dyslexic Dumb Dead Voice
A Frog's Cheese
A Man Called Dave
A Place to Call Home
A palavra / O primeiro passo
A72
Abre caminhos
Abstract Concepts - What up in the streets
Accordion
Adubando
African Era
After All That's Happened
After the War (ft. Alex, MoShang, HEJ31)
Alarm Clock
All You Need Is Toys
Ambassadors
Ambiguity
And I know that you're happy (ballad of the lonesome spaceboy)
And The Pond Life Flourished
Animateur commercial intime
Apollo Wakes
Around Under
At Teh Disko
Audhumla
Aunt Sqrot & Narcoleptus Git
B.
Bad Boy
Baile me por Tangos
Banish
Barcelona Afrobeat 01
Barcelona Afrobeat 03
Barcelona Afrobeat 04
Barcelona Afrobeat 07
Batuta
Beautiful Android
Belladonna
Beloved Girlfriend
Berries
Big Blue Day
Big Fo