In [2]:
import torch
import torch.nn as nn
import numpy as np
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

from albums.electronic import electronic
from albums.folk import folk
from albums.hip_hop import hip_hop
from albums.jazz import jazz
from albums.pop import pop
from albums.rock import rock
from functions import genres

from env import *

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_credentials_manager = SpotifyClientCredentials(SPOTIPY_CID, SPOTIPY_SECRET)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

import os


In [3]:
min_tempo = 0
max_tempo = 222.605
min_loudness = -60.0
max_loudness = 2.383

albums = electronic + folk + hip_hop + jazz + pop + rock
len(albums)

668

In [None]:
with open('.album_cache', 'rb') as f: 
    album_cache = pickle.load(f)
with open('.song_cache', 'rb') as f: 
    song_cache = pickle.load(f)

In [None]:
X = np.array([])
Y = np.array([])

file = open("data.csv", 'w')
file.write('artist,album,song,genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,electronic,folk,hip_hop,jazz,pop,rock\n')

num_albums = 0

for album in albums:
    # make a zero array of length genres ex [0, 0, 0, 0, 0, 1]
    # with the 1 indiciating which genre it is

    genre_array = np.zeros(len(genres))
    genre_array[genres.index(album[2])] = 1

    if (album[0], album[1]) in album_cache:
        results = album_cache[(album[0], album[1])]
    else:
        results = sp.search(q=f'artist:{album[0]} album:{album[1]}', type='album')
        album_cache[(album[0], album[1])] = results

    try:
        album_id = results['albums']['items'][0]['id']
        tracks = sp.album_tracks(album_id)['items']

        for track in tracks:
            track_id = track['id']
            if track_id not in song_cache:
                f = sp.audio_features(track_id)[0]
                song_cache[track_id] = f
            else:
                f = song_cache[track_id]
            
            song_data = np.array([
                f['danceability'], 
                f['energy'], 
                (f['loudness'] - min_loudness) / (max_loudness - min_loudness), 
                f['speechiness'], 
                f['acousticness'], 
                f['instrumentalness'], 
                f['liveness'], 
                f['valence'], 
                (f['tempo'] - min_tempo) / (max_tempo - min_tempo)
            ])

            # print(track)
            file.write(f"{album[0].replace(',', '')},{album[1].replace(',', '')},{track['name'].replace(',', '')},{album[2]},")
            for val in song_data:
                file.write(f"{val},")
            s = ""
            for val in genre_array:
                s += (f'{val},')
            file.write(s[:-1])
            file.write('\n')

            X = np.append(X, song_data)
            Y = np.append(Y, genre_array)
        
        num_albums+= 1

    except Exception as e:
        pass
        # print(album)
    print("Did an album")

file.close()
X = np.reshape(X, (-1 , 9))
Y = np.reshape(Y, (-1, len(genres)))

# print(f'min_tempo = {min_tempo}')
# print(f'max_tempo = {max_tempo}')
# print(f'min_loudness = {min_loudness}')
# print(f'max_loudness = {max_loudness}')
X.shape, Y.shape, num_albums

In [None]:
with open('.album_cache', 'wb') as f: 
    pickle.dump(album_cache, f)
with open('.song_cache', 'wb') as f: 
    pickle.dump(song_cache, f)

In [4]:
db = pd.read_csv('data.csv')
db['bias'] = 1
db = db[db.get('genre') != 'pop'].reset_index().drop(columns=['index', 'pop'])
# db.groupby("album").count().sort_values(by='artist')
genres = ['electronic', 'folk', 'hip_hop', 'jazz', 'rock']

db#.sort_values(by='tempo', ascending=False).head(20)

Unnamed: 0,artist,album,song,genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,electronic,folk,hip_hop,jazz,rock,bias
0,Portishead,Dummy,Mysterons,electronic,0.657,0.498,0.776991,0.2620,0.40500,0.000479,0.107,0.460,0.737854,1.0,0.0,0.0,0.0,0.0,1
1,Portishead,Dummy,Sour Times,electronic,0.698,0.633,0.854720,0.0406,0.05080,0.272000,0.264,0.550,0.422654,1.0,0.0,0.0,0.0,0.0,1
2,Portishead,Dummy,Strangers,electronic,0.659,0.617,0.863104,0.0380,0.03550,0.160000,0.185,0.250,0.377700,1.0,0.0,0.0,0.0,0.0,1
3,Portishead,Dummy,It Could Be Sweet,electronic,0.668,0.520,0.783899,0.0444,0.69800,0.016500,0.130,0.556,0.691642,1.0,0.0,0.0,0.0,0.0,1
4,Portishead,Dummy,Wandering Star,electronic,0.639,0.370,0.787346,0.0723,0.50400,0.587000,0.117,0.707,0.360064,1.0,0.0,0.0,0.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5468,At the Drive-In,Relationship of Command,Quarantined,rock,0.234,0.886,0.890932,0.0972,0.04010,0.022700,0.327,0.404,0.642942,0.0,0.0,0.0,0.0,1.0,1
5469,At the Drive-In,Relationship of Command,Cosmonaut,rock,0.346,0.979,0.924435,0.1170,0.01180,0.003080,0.099,0.345,0.460686,0.0,0.0,0.0,0.0,1.0,1
5470,At the Drive-In,Relationship of Command,Non-Zero Possibility,rock,0.294,0.612,0.856339,0.0355,0.60600,0.113000,0.104,0.187,0.334107,0.0,0.0,0.0,0.0,1.0,1
5471,At the Drive-In,Relationship of Command,Extracurricular,rock,0.305,0.894,0.894314,0.0436,0.00189,0.000000,0.238,0.467,0.437596,0.0,0.0,0.0,0.0,1.0,1


In [5]:
X = np.array([])
for i in range(4, 13):
    X = np.append(X, torch.tensor(db.iloc[:, i]))
X = np.append(X, torch.tensor(db.iloc[:, -1]))
X = np.transpose(np.reshape(X, (10, -1)))

Y = np.array([])
for i in range(13, 13+len(genres)):
    Y = np.append(Y, torch.tensor(db.iloc[:, i]))
Y = np.transpose(np.reshape(Y, (len(genres), -1)))

# e_m = max(np.linalg.eigvals(np.matmul(np.transpose(X), X)))
# lr = 1/e_m
# print(lr)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

X = torch.from_numpy(X).float()
Y = torch.from_numpy(Y).float()
X, Y = shuffle(X, Y)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33)
X_train, X_test = X_train.to(device), X_test.to(device)
Y_train, Y_test = Y_train.to(device), Y_test.to(device)

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

(torch.Size([3666, 10]),
 torch.Size([1807, 10]),
 torch.Size([3666, 5]),
 torch.Size([1807, 5]))

In [6]:
model = nn.Sequential(
  nn.Linear(10, 1024),
  nn.ReLU(),
  nn.Linear(1024, 512),
  nn.ReLU(),
  nn.Linear(512, 256),
  nn.ReLU(),
  nn.Linear(256, 128),
  nn.ReLU(),
  nn.Linear(128, len(genres)),
  nn.ReLU()
).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
loss_fn = torch.nn.MSELoss()

In [12]:
for epoch in range(1, 1 + 50):
  losses = np.array([])
  for i, x in enumerate(X_train):
    y=Y_train[i]
#     print(y)
    guess = model(x).to(device)
    # print(x, y, guess)
    loss = loss_fn(guess, y).to(device)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    losses = np.append(losses, loss.item())
  if epoch % 5 == 0:
    print("LOSS", np.mean(losses))

LOSS 0.107344786009873
LOSS 0.10149264546528952
LOSS 0.09869373491390988
LOSS 0.09675134128131145
LOSS 0.09510030804421477
LOSS 0.09363353899065545
LOSS 0.09215135483889585
LOSS 0.09074927533633699
LOSS 0.0894078646035976
LOSS 0.0880627194589888


In [14]:
with open('model', 'wb') as f: 
    pickle.dump(model, f)

In [15]:
with open('model', 'rb') as f: 
    model  = pickle.load(f).to(device)

In [16]:
# 66% accuracy

num_wrong = 0
for i, x in enumerate(X_test):
    y=Y_test[i]

    t = model(x)
    # print(x, y, guess)
    value = (t == max(t)).nonzero(as_tuple=True)[0].detach() if max(t) != min(t) else 0
    if (y[value] != 1):
#         print(t)
#         print(y)
        num_wrong += 1
1 - num_wrong / len(X_test)

0.6624239070282236

In [17]:
def get_genre(name, artist):
  results = sp.search(q="track:" + name + " artist:" + artist, type="track")
  track_id = results['tracks']['items'][0]['id']

  f = sp.audio_features(track_id)[0]
  song = np.array([
      f['danceability'], 
      f['energy'], 
      (f['loudness'] - min_loudness) / (max_loudness - min_loudness), 
      f['speechiness'], 
      f['acousticness'], 
      f['instrumentalness'], 
      f['liveness'], 
      f['valence'], 
      (f['tempo'] - min_tempo) / (max_tempo - min_tempo),
      1
  ])
  song = torch.tensor(song).reshape(10).float().to(device)

  t = model(song).to(device)
  value = (t == max(t)).nonzero(as_tuple=True)[0].detach() if max(t) != min(t) else 0

  to_return = (f'{name} by {artist} is {genres[value]} ({round(int(10000*max(t)))/100}% confidence)\n')
  
  if max(t) < .6:
    t_list = list(t)
    t_list[t_list.index(max(t_list))] = torch.tensor(0).to(device)
    secondary_genre = genres[t_list.index(max(t_list))]
    sum = 0
    for val in t_list:
      sum += val.item()
    secondary_confidence = max(t_list) / sum
    if secondary_confidence > .6:
      to_return += f'Secondary genre: {secondary_genre} ({round(int(10000*secondary_confidence))/100}% confidence)\n'

  return to_return
  

In [130]:
# for my own sanity
test_songs = [
    ('Oxford Town', 'Bob Dylan'),
    ('Wesleys Theory', 'Kendrick Lamar'),
    ('Aquemini', 'Outkast'),
    ('Runaway', 'Kanye West'),
    ('Around the World', 'Daft Punk'),
    ('Tekka', 'Sweet Trip'),
    ('Roygbiv', 'Boards of Canada'),
    ('Let Down', 'Radiohead'),
    ('Five Years', 'David Bowie'),
#     ('Blank Space', 'Taylor Swift'),
#     ('Gimmie Love', 'Carly Rae Jepsen'),
    ('Hacker', 'Death Grips'),
#     ('Hyperballad', 'Bjork'),
    ('Made Up Dreams', 'Built to Spill'),
    ('Oh Comely', 'Neutral Milk Hotel'),
    ('A Love Supreme, Pt. IV', 'John Coltrane'),
    ('A Day in the Life', 'Wes Montgomery'),
]
tests = [get_genre(x[0], x[1]) for x in test_songs]
for test in tests:
  print(test)

Oxford Town by Bob Dylan is folk (80.56% confidence)

Wesleys Theory by Kendrick Lamar is hip_hop (87.86% confidence)

Aquemini by Outkast is hip_hop (96.71% confidence)

Runaway by Kanye West is hip_hop (68.0% confidence)

Around the World by Daft Punk is electronic (90.12% confidence)

Tekka by Sweet Trip is rock (57.23% confidence)

Roygbiv by Boards of Canada is electronic (78.55% confidence)

Let Down by Radiohead is rock (89.96% confidence)

Five Years by David Bowie is rock (47.77% confidence)
Secondary genre: folk (82.97% confidence)

Hacker by Death Grips is hip_hop (75.24% confidence)

Made Up Dreams by Built to Spill is rock (97.9% confidence)

Oh Comely by Neutral Milk Hotel is folk (59.36% confidence)
Secondary genre: jazz (68.97% confidence)

A Love Supreme, Pt. IV by John Coltrane is folk (62.59% confidence)

A Day in the Life by Wes Montgomery is jazz (55.77% confidence)
Secondary genre: folk (77.03% confidence)



In [19]:
print('Enter song:')
song = input()

print('Enter artist:')
artist = input()

try:
  print(get_genre(song, artist))
except Exception as e:
  print('Could not find the song!')
  # print(e)

Enter song:
basketball shoes
Enter artist:
black country, new road
basketball shoes by black country, new road is rock (46.63% confidence)



In [20]:
# let's try some other models, linreg and KNN

# linreg first, ~62% accuracy
w_star = ((torch.linalg.pinv(X_train.T @ X_train) @ X_train.T) @ Y_train)
print(w_star.shape)

num_wrong = 0
for i, x in enumerate(X_test):
    y=Y_test[i]

    guess = x @ w_star
#     print(guess)
#     print(y)
    # print(x, y, guess)
    value = (guess == max(guess)).nonzero(as_tuple=True)[0].detach()
    if (max(guess) == min(guess)): #all zeros case
        value = 0
        
    if (y[value] != 1):
#         print(t)
#         print(y)
        num_wrong += 1
1 - num_wrong / len(X_test)

torch.Size([10, 5])


0.6236856668511345

In [21]:
#k-nn with k = 3
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train.cpu(), Y_train.cpu())

 #~60% accuracy

num_wrong = 0
for i, x in enumerate(X_test):
    y=Y_test[i]

    guess = knn.predict(np.array([x.cpu().numpy()]))
#     print(guess)
    r=np.where(guess[0]==1)[0]
    value = r[0] if r.size>0 else 0
    if (y[value] != 1):
#         print(t)
#         print(y)
        num_wrong += 1
1 - num_wrong / len(X_test)

0.60431654676259