In [210]:
import pandas as pd
import numpy as np
import ast
from ast import literal_eval
import arrow
from sklearn.neighbors import NearestNeighbors
import pickle 

In [217]:
df = pd.read_csv("songs_data.csv", sep=',', converters={'Artist Genres': lambda x: x.split(", ")}, keep_default_na=False)
df["Artist Genres"].head()

0        [acid house,ambient house,big beat,hip house]
1                        [dance pop,miami hip hop,pop]
2                                      [dance pop,pop]
3    [album rock,art rock,british invasion,classic ...
4      [album rock,british invasion,classic rock,rock]
Name: Artist Genres, dtype: object

In [226]:
#extract simpler genres
genres = ['pop', 'rock', 'hip hop', "house", "disco", "soul", "r&b"]
song_genres = []
for i in range(0, df.shape[0]):
    row = str(df["Artist Genres"].values[i][0])
    song_genres.append('')
    for g in genres:
        if row.find(g) != -1:
            song_genres[i] = g
            break

df["Genre"] = song_genres
df_all = pd.read_csv("songs_data.csv")
df_all["Genre"] = song_genres
df_all["Index"] = df_all.index

In [230]:
included_variables = ["Popularity", "Danceability","Energy","Loudness","Speechiness","Acousticness","Instrumentalness","Liveness","Tempo","Valence", "Album Release Date"]
indy_included = included_variables + ["Index"]
print(indy_included)

['Popularity', 'Danceability', 'Energy', 'Loudness', 'Speechiness', 'Acousticness', 'Instrumentalness', 'Liveness', 'Tempo', 'Valence', 'Album Release Date', 'Index']


In [227]:
print(df_all["Album Release Date"])
df_all["Album Release Date"] = pd.to_datetime(df_all["Album Release Date"], format="mixed",  errors='coerce').view('int64') 
print(df_all["Album Release Date"])

0       1992-08-03
1       2009-10-23
2       1999-01-12
3       2014-10-20
4       1969-12-05
           ...    
9994    2022-06-17
9995    2005-10-24
9996    2000-08-14
9997    2023-01-06
9998    2023-05-19
Name: Album Release Date, Length: 9999, dtype: object
0        712800000000000000
1       1256256000000000000
2        916099200000000000
3       1413763200000000000
4         -2332800000000000
               ...         
9994    1655424000000000000
9995    1130112000000000000
9996     966211200000000000
9997    1672963200000000000
9998    1684454400000000000
Name: Album Release Date, Length: 9999, dtype: int64


  df_all["Album Release Date"] = pd.to_datetime(df_all["Album Release Date"], format="mixed",  errors='coerce').view('int64')


In [204]:
def clean_and_norm(df_in):
    #normalize the data
    df_ints = pd.DataFrame(df_in, columns = indy_included)
    
    normed=(df_ints-df_ints.mean())/df_ints.std()
    normed=(df_ints-df_ints.min())/(df_ints.max()-df_ints.min())

    #clean data
    for column in indy_included:
        mean = float(normed[column].mean(skipna = True))
        normed[column] = normed[column].replace(np.NaN, mean)
    return normed

In [242]:
def knn_fit(data):
    #clean data
    normed = clean_and_norm(g_data)

    #grab normed data with only included variables
    train_data = pd.DataFrame(normed, columns = included_variables)
    train_data = train_data.iloc[1:,]
    
    knn = NearestNeighbors(metric='cosine', algorithm='auto')
    knn.fit(train_data)

    song_i = int(data["Index"].iloc[0]) 
    test = pd.DataFrame(data, columns = included_variables)
    test = test.iloc[song_i:song_i+1:,]

    distances, indices = knn.kneighbors(test, n_neighbors=3)
    
    print(data["Track Name"].iloc[song_i], "by", data["Artist Name(s)"].iloc[song_i])
    print()
    print("Closest to:")
    
    for ind in indices[0]:
        print(data["Track Name"].iloc[ind + 1], "by", data["Artist Name(s)"].iloc[ind + 1])
    print()

In [243]:
genre_data = []
for genre in genres:
    print(genre)
    g_data = df_all[df_all['Genre'] == genre]
    knn_fit(g_data)
    genre_data.append(normed)
    print("------------------------")

pop
From the Bottom of My Broken Heart by Britney Spears

Closest to:
Takeaway (feat. Lennon Stella) by The Chainsmokers, ILLENIUM, Lennon Stella
Take Yourself Home by Troye Sivan
If I Only Had Time by John Rowles

------------------------
rock
Something About The Way You Look Tonight - Edit Version by Elton John

Closest to:
Space Oddity - 2015 Remaster by David Bowie
Show Me the Way by Brian Cadd
Creep by Radiohead

------------------------
hip hop
It's Like That by Run–D.M.C., Jason Nevins

Closest to:
WITHOUT YOU by The Kid LAROI
WITHOUT YOU by The Kid LAROI
It's You by Ali Gatie

------------------------
house
Justified & Ancient - Stand by the Jams by The KLF

Closest to:
Faded by Alan Walker
Holy Water by Besomorph, Lucifer
Changed the Way You Kiss Me - Radio Edit by Example

------------------------
disco
Born to Be Alive - The Original by Patrick Hernandez

Closest to:
So Sad The Song by Gladys Knight, The Pips
I Love to Love (But My Baby Loves to Dance) by Tina Charles
Theme 