In [19]:
import joblib

In [20]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [21]:
folder = "drive/MyDrive/IS3107/data/"
artist = joblib.load(folder+'artist_df.pkl')
album = joblib.load(folder+'album_df.pkl')
audio = joblib.load(folder+'audio_df.pkl')
track = joblib.load(folder+'track_df.pkl')

In [140]:
import pandas as pd
import joblib
from sklearn.cluster import KMeans
import statistics
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

le = LabelEncoder()
scaler = StandardScaler()

def preprocess(track, audio):
    track = track.rename(columns={"track_id":"id"})
    track['id'] = track['id'].astype(str)
    audio['id'] = audio['id'].astype(str)
    merged_df = pd.merge(track, audio, on='id', how='left')
    merged_df = merged_df.dropna().drop_duplicates(subset=['track_name'])
    merged_df = merged_df[['id', 'artist_id', 'track_name', 'popularity', 'album_id',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo','duration_ms',
       'time_signature']]
    merged_df['artist_id_encoded'] = le.fit_transform(merged_df['artist_id'])
    merged_df = merged_df.reset_index()
    return merged_df

def find_recommendation(name, input_df, df):
    feat = ['danceability', 'energy', 'key', 'loudness', 'speechiness','acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
    X = df[feat]
    input = input_df[feat]
    X = pd.DataFrame(scaler.fit_transform(X), columns = feat)
    input = pd.DataFrame(scaler.transform(input), columns = feat)
    # add weight for same artist
    X['artist_id_encoded'] = df['artist_id_encoded'] * 100
    input['artist_id_encoded'] = input_df['artist_id_encoded'] * 100
    kmeans = KMeans(n_clusters=X.shape[0]//15, random_state=42)
    kmeans.fit(X)
    labels = kmeans.predict(X)
    df['labels'] = labels
    label = statistics.mode(kmeans.predict(input))
    input_features = input.values.flatten()
    similar_songs = df[df['labels'] == label]
    similar_songs = list(filter(lambda x:x not in name, list(similar_songs["track_name"])))
    return similar_songs

def main(track_names):
    df = preprocess(track,audio)
    df['input'] = df['track_name'].apply(lambda x:x in track_names)
    choice = df[df['input']==True].reset_index()
    songs = find_recommendation(track_names, choice, df)
    return songs

In [141]:
preprocess(track, audio)

Unnamed: 0,index,id,artist_id,track_name,popularity,album_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,artist_id_encoded
0,0,6AQbmUe0Qwf5PZnt4HmTXv,78rUTD7y6Cy67W1RVzYs7t,Boy's a liar Pt. 2,96,6cVfHBcp3AdpYY0bBglkLN,0.696,0.809000,5.0,-8.254,1.0,0.0500,0.2520,0.000128,0.2480,0.8570,132.962,131013.0,4.0,1094
1,3,0yLdNVWF3Srea0uzk55zFn,5YGY8feqx7naU7z4HrwZM6,Flowers,100,7I0tjwFtxUwBC1vgyeMAax,0.707,0.681000,0.0,-4.325,1.0,0.0668,0.0632,0.000005,0.0322,0.6460,117.999,200455.0,4.0,823
2,5,59uQI0PADDKeE6UZDTJEe8,4oUHIQIBe0LHzYfvXNW4QM,Last Night,88,7fOmdhRrRohTzToL617xkk,0.517,0.675000,6.0,-5.382,1.0,0.0357,0.4590,0.000000,0.1510,0.5180,203.853,163855.0,4.0,731
3,7,7oDd86yk8itslrA9HRP2ki,1Xyo4u8uXC1ZmMpatF05PJ,Die For You - Remix,95,6Exo0MYoL3XammoTDeihFy,0.531,0.525000,1.0,-6.500,0.0,0.0671,0.2320,0.000000,0.4410,0.5020,66.900,232857.0,4.0,230
4,11,0DWdj2oZMBFSzRsi2Cvfzf,790FomKkXshlbRYZFtlgla,TQG,96,4kS7bSuU0Jm9LYMosFU2x5,0.720,0.630000,4.0,-3.547,0.0,0.2770,0.6730,0.000000,0.0936,0.6070,179.974,199440.0,4.0,1095
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5835,8092,78NvEoQpN4pOGLWrmrw9AM,77IKXFvO7SpWrq8hflrUXc,Tempo (Commentary),6,5mMhmWAzd3vTvhzShSoiXx,0.799,0.361000,1.0,-11.569,1.0,0.3970,0.1450,0.000000,0.1080,0.3300,111.104,161838.0,5.0,1090
5836,8093,70UILYcP8Q8kvQhFKvLP5o,5VWWftIdcTzXDWTicX5l0c,Connections,21,1lFzJaaCrSAoFfqvAQWu0e,0.198,0.000956,9.0,-36.318,0.0,0.0486,0.9900,0.905000,0.1010,0.1110,106.713,169673.0,3.0,817
5837,8094,41hD0lo1wIP9GX25JE4Kfu,5VWWftIdcTzXDWTicX5l0c,Serenitivity,23,6Vz2CBGS1UcQ1GDTopeXbF,0.198,0.065700,7.0,-20.771,1.0,0.0375,0.9830,0.963000,0.0684,0.0372,64.911,213788.0,4.0,817
5838,8095,04WLCoFu2UZg8u2dwNE71v,6UnhGhByBdAIpd9ZEtYO1g,ZZAFFRIC RHYTHM$lOWER,48,6taaDXI7sNhT7NMeSI6lvj,0.743,0.223000,2.0,-14.090,0.0,0.1520,0.0861,0.848000,0.0918,0.8890,187.942,30672.0,4.0,981


In [143]:
main(["HOT", "Darl+ing"])

Unnamed: 0,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,artist_id_encoded
0,0.543663,0.798181,-0.057637,-0.028681,-0.521494,-0.093794,-0.394143,0.186878,1.548513,0.381429,109400
1,0.606648,0.241661,-1.448540,0.675034,-0.391260,-0.711712,-0.394609,-0.962501,0.697874,-0.112383,82300
2,-0.481281,0.215574,0.220544,0.485717,-0.632349,0.583690,-0.394629,-0.329757,0.181847,2.720993,73100
3,-0.401118,-0.436598,-1.170360,0.285475,-0.388934,-0.159251,-0.394629,1.214821,0.117344,-1.798766,23000
4,0.681085,0.019922,-0.335817,0.814380,1.238221,1.284085,-0.394629,-0.635477,0.540647,1.932932,109500
...,...,...,...,...,...,...,...,...,...,...,...
5835,1.133434,-1.149639,-1.170360,-0.622424,2.168467,-0.443991,-0.394629,-0.558781,-0.576067,-0.339934,109000
5836,-2.307856,-2.715043,1.055086,-5.055166,-0.532347,2.321585,3.037644,-0.596064,-1.458957,-0.484847,81700
5837,-2.307856,-2.433548,0.498725,-2.270575,-0.618395,2.298675,3.257613,-0.769695,-1.756479,-1.864407,81700
5838,0.812782,-1.749637,-0.892179,-1.073955,0.269215,-0.636763,2.821468,-0.645064,1.677519,2.195894,98100




263


['silver light',
 'soft layers',
 'Wonder',
 'Circles',
 '_WORLD',
 'To you',
 'Rock with you',
 'irides',
 'planet radio',
 'disconnect']