In [43]:
import psycopg2
import numpy as np

print('connecting to postgres...')
connection = psycopg2.connect(user="max",
                              password="password",
                              host="127.0.0.1",
                              port="5432",
                              database="final")
print()
print(connection)
cursor = connection.cursor()

connecting to postgres...

<connection object at 0x7fa302f1d508; dsn: 'user=max password=xxx dbname=final host=127.0.0.1 port=5432', closed: 0>


In [44]:
q = """
select * from popular_spotify_features;
"""

cursor.execute(q)

In [45]:
spotify = cursor.fetchall()

In [46]:
print(spotify[500])

('2 Chainz Like Me', '2 chainz like me', Decimal('0.598'), Decimal('0.431'), Decimal('0'), Decimal('-5.689'), Decimal('0'), Decimal('0.0523'), Decimal('0.319'), Decimal('0.437'), Decimal('0.402'), Decimal('0.188'), Decimal('127.204'), 'spotify:track:6ZFmOtCTbxLo46bEJMPRME', 'https://api.spotify.com/v1/audio-analysis/6ZFmOtCTbxLo46bEJMPRME', Decimal('229867'), 4)


so that's pretty messy. Let's load it up into a dataframe

In [59]:
import pandas as pd
import numpy as np

columns = [
    'lastfm_artist_track',
    'spotify_artist_track',
    'danceability',
    'energy',
    'key',
    'loudness',
    'mode', 
    'speechiness',
    'acousticness',
    'instrumentalness',
    'liveness',
    'valence',
    'tempo',
    'uri',
    'analysis_url', 
    'duration_ms',
    'time_signature' 
]

df = pd.DataFrame(spotify, columns=columns)

# want to convert SQL Decimal(val) to float(val)
feats = [
    'danceability',
    'energy',
    'key',
    'loudness',
    'mode', 
    'speechiness',
    'acousticness',
    'instrumentalness',
    'liveness',
    'valence',
    'tempo', 
    'duration_ms',
    'time_signature' 
]

df[feats] = df[feats].astype(float)
df = df.drop_duplicates('spotify_artist_track')
df.index = df['spotify_artist_track']
df.head()

Unnamed: 0_level_0,lastfm_artist_track,spotify_artist_track,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,uri,analysis_url,duration_ms,time_signature
spotify_artist_track,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
!!! am/fm,!!! AM/FM,!!! am/fm,0.796,0.908,4.0,-6.188,0.0,0.0834,0.0387,0.324,0.045,0.676,115.981,spotify:track:0Y9SHiZGeXgShe4SKYl5sw,https://api.spotify.com/v1/audio-analysis/0Y9S...,295173.0,4.0
!!! all my heroes are weirdos,!!! All My Heroes Are Weirdos,!!! all my heroes are weirdos,0.505,0.992,4.0,-5.891,0.0,0.0918,0.00134,0.0537,0.0766,0.609,122.345,spotify:track:2nTla8UUqmBNKfcOvKc4bN,https://api.spotify.com/v1/audio-analysis/2nTl...,184080.0,4.0
!!! all u writers,!!! All U Writers,!!! all u writers,0.874,0.814,11.0,-6.873,0.0,0.045,0.00412,0.464,0.069,0.741,121.976,spotify:track:2pPhbWZBm3Q6b35jV691qG,https://api.spotify.com/v1/audio-analysis/2pPh...,322623.0,4.0
!!! bend over beethoven,!!! Bend Over Beethoven,!!! bend over beethoven,0.63,0.927,9.0,-6.54,1.0,0.0457,0.0031,0.739,0.182,0.611,120.654,spotify:track:5PRM1a5qRJpsFzkz8VyRUY,https://api.spotify.com/v1/audio-analysis/5PRM...,486813.0,4.0
!!! break in case of anything,!!! Break in Case of Anything,!!! break in case of anything,0.708,0.913,4.0,-4.599,0.0,0.104,0.0284,0.817,0.304,0.509,106.987,spotify:track:7owkuXT1aSBjCD3PoyHdDQ,https://api.spotify.com/v1/audio-analysis/7owk...,219840.0,4.0


In [60]:
df.shape

(8334, 17)

In [61]:
X = df[feats]
names = df.spotify_artist_track

In [62]:
df.shape

(8334, 17)

### baseline model: distance

In [63]:
# start by standard scalar
from sklearn.preprocessing import StandardScaler

std_scl = StandardScaler()
X_scaled = std_scl.fit_transform(X)

In [66]:
def find_n_nearest(song, n=15):
    try:
        song_feats = df[feats].loc[song].values
        song_feats = std_scl.transform(song_feats.reshape(1, -1))
    except KeyError:
        return f'{song} not in database'
    
    differences = X_scaled - song_feats
    distances = map(np.linalg.norm, differences)
    sorted_distances = sorted(enumerate(distances), key=lambda t: t[1])
    indices = [t[0] for t in sorted_distances]
    songs = df.iloc[indices]['spotify_artist_track'].values
    
    return songs[1:n+1]
    

In [68]:
find_n_nearest('action bronson muslim wedding')

array(['$uicideboy$ low key', 'alessia cara outlaws',
       'american me columbian neck tie', "aesop rock 1,000 o'clock",
       'angel haze a tribe called red',
       'afi i am trying very hard to be here',
       "a lot like birds ted bundy's thanksgiving dinner",
       '50 cent do you think about me', 'amy winehouse october song',
       "alestorm the sunk'n norwegian",
       "amy winehouse moody's mood for love", 'all time low lullabies',
       'a tribe called quest mobius',
       'alkaline trio message from kathlene',
       'an albatross i will swim into the lazer eye'], dtype=object)

In [53]:
names

spotify_artist_track
!!! am/fm                                                                                        !!! am/fm
!!! all my heroes are weirdos                                                !!! all my heroes are weirdos
!!! all u writers                                                                        !!! all u writers
!!! bend over beethoven                                                            !!! bend over beethoven
!!! break in case of anything                                                !!! break in case of anything
!!! californiyeah                                                                        !!! californiyeah
!!! dear can                                                                                  !!! dear can
!!! even when the water's cold                                              !!! even when the water's cold
!!! except death                                                                          !!! except death
!!! freedom! '15

In [57]:
for name in names:
    print(name)

!!! am/fm
!!! all my heroes are weirdos
!!! all u writers
!!! bend over beethoven
!!! break in case of anything
!!! californiyeah
!!! dear can
!!! even when the water's cold
!!! except death
!!! freedom! '15
!!! get that rhythm right
shinedown get up
jeff beck hammerhead
govi thief of hearts
!!! hello? is this thing on?
!!! infinifold
s3rl intensify
!!! jamie, my intentions are bass
!!! must be the moon
!!! myth takes
!!! one girl / one boy
!!! pardon my freedom
!!! slyd
!!! steady as the sidewalk cracks
!!! take ecstasy with me
!!! the most certain sure
!!! theme from space island
!!! wannagain wannagain
!!! when the going gets tough, the tough get karazzee
!!! yadnus
"blue" gene tyranny next time might be your time
"weird al" yankovic angry white boy polka
1,2,3 work
"weird al" yankovic another one rides the bus
"weird al" yankovic dare to be stupid
"weird al" yankovic eat it
"weird al" yankovic fat
"weird al" yankovic handy
"weird al" yankovic hardware store
"weird al" yankovic like