In [35]:
import torch
import torch.nn as nn
import numpy as np
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

from albums.electronic import electronic
from albums.folk import folk
from albums.hip_hop import hip_hop
from albums.jazz import jazz
from albums.pop import pop
from albums.rock import rock
from functions import genres

from env import *

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_credentials_manager = SpotifyClientCredentials(SPOTIPY_CID, SPOTIPY_SECRET)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

import os


In [11]:
albums = electronic + folk + hip_hop + jazz + pop + rock
len(albums)

711

In [12]:
with open('.album_cache', 'rb') as f: 
    album_cache = pickle.load(f)
with open('.song_cache', 'rb') as f: 
    song_cache = pickle.load(f)

In [13]:
X = np.array([])
Y = np.array([])

min_tempo = 0
max_tempo = 222.605
min_loudness = -60.0
max_loudness = 2.383

file = open("data.csv", 'w')
file.write('artist,album,song,genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,electronic,folk,hip_hop,jazz,pop,rock\n')

num_albums = 0

for album in albums:
    # make a zero array of length genres ex [0, 0, 0, 0, 0, 1]
    # with the 1 indiciating which genre it is

    genre_array = np.zeros(len(genres))
    genre_array[genres.index(album[2])] = 1

    if (album[0], album[1]) in album_cache:
        results = album_cache[(album[0], album[1])]
    else:
        results = sp.search(q=f'artist:{album[0]} album:{album[1]}', type='album')
        album_cache[(album[0], album[1])] = results

    try:
        album_id = results['albums']['items'][0]['id']
        tracks = sp.album_tracks(album_id)['items']

        for track in tracks:
            track_id = track['id']
            if track_id not in song_cache:
                f = sp.audio_features(track_id)[0]
                song_cache[track_id] = f
            else:
                f = song_cache[track_id]
            
            song_data = np.array([
                f['danceability'], 
                f['energy'], 
                (f['loudness'] - min_loudness) / (max_loudness - min_loudness), 
                f['speechiness'], 
                f['acousticness'], 
                f['instrumentalness'], 
                f['liveness'], 
                f['valence'], 
                (f['tempo'] - min_tempo) / (max_tempo - min_tempo)
            ])

            # print(track)
            file.write(f"{album[0].replace(',', '')},{album[1].replace(',', '')},{track['name'].replace(',', '')},{album[2]},")
            for val in song_data:
                file.write(f"{val},")
            s = ""
            for val in genre_array:
                s += (f'{val},')
            file.write(s[:-1])
            file.write('\n')

            X = np.append(X, song_data)
            Y = np.append(Y, genre_array)
        
        num_albums+= 1

    except Exception as e:
        pass
        # print(album)

file.close()
X = np.reshape(X, (-1 , 9))
Y = np.reshape(Y, (-1, 6))

print(f'min_tempo = {min_tempo}')
print(f'max_tempo = {max_tempo}')
print(f'min_loudness = {min_loudness}')
print(f'max_loudness = {max_loudness}')
X.shape, Y.shape, num_albums

min_tempo = 0
max_tempo = 222.605
min_loudness = -60.0
max_loudness = 2.383


((7423, 9), (7423, 6), 584)

In [14]:
with open('.album_cache', 'wb') as f: 
    pickle.dump(album_cache, f)
with open('.song_cache', 'wb') as f: 
    pickle.dump(song_cache, f)

In [33]:
e_m = max(np.linalg.eigvals(np.matmul(np.transpose(X), X)))
lr = 1/e_m
lr

6.913516419191394e-05

In [42]:
X, Y = shuffle(X, Y)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33)

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((4973, 9), (2450, 9), (4973, 6), (2450, 6))

In [48]:
model = nn.Sequential(
  nn.Linear(9, 128),
  nn.ReLU(),
  nn.Linear(128, 128),
  nn.ReLU(),
  nn.Linear(128, 6),
  nn.Softmax(-1)
)
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [49]:
for epoch in range(1, 21):
  losses = []
  for i, x in enumerate(X_train):
    y=Y_train[i]
    x = torch.tensor(x).reshape(9).float()
    y = torch.tensor(y).reshape(6).float()

    guess = model(x)
    # print(x, y, guess)

    loss = nn.functional.mse_loss(guess, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


    losses.append(loss.item())
  if epoch % 5 == 0:
    print("LOSS", np.mean(losses))

LOSS 0.1384357521316067
LOSS 0.1379837873875915
LOSS 0.13757509046784594
LOSS 0.1372051534100411


In [50]:
with open('model', 'wb') as f: 
    pickle.dump(model, f)

In [51]:
with open('model', 'rb') as f: 
    model  = pickle.load(f) 

In [53]:
num_wrong = 0
for i, x in enumerate(X_test):
    y=Y_test[i]
    x = torch.tensor(x).reshape(9).float()
    y = torch.tensor(y).reshape(6).float()

    t = model(x)
    # print(x, y, guess)
    value = (t == max(t)).nonzero(as_tuple=True)[0].detach()
    if (y[value] != 1):
        print(t)
        print(y)
        num_wrong += 1
num_wrong / len(X_test)

tensor([0.1627, 0.1562, 0.1975, 0.1486, 0.1563, 0.1787],
       grad_fn=<SoftmaxBackward0>)
tensor([0., 0., 0., 0., 0., 1.])
tensor([0.1629, 0.1580, 0.1935, 0.1563, 0.1516, 0.1778],
       grad_fn=<SoftmaxBackward0>)
tensor([0., 0., 0., 1., 0., 0.])
tensor([0.1642, 0.1561, 0.1943, 0.1527, 0.1547, 0.1779],
       grad_fn=<SoftmaxBackward0>)
tensor([0., 0., 0., 1., 0., 0.])
tensor([0.1631, 0.1626, 0.1954, 0.1431, 0.1500, 0.1858],
       grad_fn=<SoftmaxBackward0>)
tensor([0., 0., 0., 0., 0., 1.])
tensor([0.1637, 0.1536, 0.1962, 0.1467, 0.1570, 0.1828],
       grad_fn=<SoftmaxBackward0>)
tensor([0., 0., 0., 0., 0., 1.])
tensor([0.1634, 0.1574, 0.1952, 0.1535, 0.1537, 0.1768],
       grad_fn=<SoftmaxBackward0>)
tensor([0., 0., 0., 0., 1., 0.])
tensor([0.1635, 0.1535, 0.1930, 0.1516, 0.1503, 0.1881],
       grad_fn=<SoftmaxBackward0>)
tensor([1., 0., 0., 0., 0., 0.])
tensor([0.1681, 0.1590, 0.1869, 0.1648, 0.1467, 0.1745],
       grad_fn=<SoftmaxBackward0>)
tensor([1., 0., 0., 0., 0., 0.])


0.7465306122448979

In [3]:
def get_genre(name, artist):
  results = sp.search(q="track:" + name + " artist:" + artist, type="track")
  track_id = results['tracks']['items'][0]['id']

  f = sp.audio_features(track_id)[0]
  song = np.array([
      f['danceability'], 
      f['energy'], 
      f['loudness']/loudness_max, 
      f['speechiness'], 
      f['acousticness'], 
      f['instrumentalness'], 
      f['liveness'], 
      f['valence'], 
      f['tempo']/tempo_max
  ])
  song = torch.tensor(song).reshape(9).float()

  t = model(song)
  value = (t == max(t)).nonzero(as_tuple=True)[0].detach()

  to_return = (f'{name} by {artist} is {genres[value]} ({round(int(10000*max(t)))/100}% confidence)\n')
  
  if max(t) < .6:
    t_list = list(t)
    t_list[t_list.index(max(t_list))] = torch.tensor(0)
    secondary_genre = genres[t_list.index(max(t_list))]
    sum = 0
    for val in t_list:
      sum += val.item()
    secondary_confidence = max(t_list) / sum
    if secondary_confidence > .5:
      to_return += f'Secondary genre: {secondary_genre} ({round(int(10000*secondary_confidence))/100}% confidence)\n'

  return to_return
  

In [4]:
with open('get_genre', 'wb') as f: 
    pickle.dump(get_genre, f)

In [7]:
test_songs = [
  ('Pink Moon', 'Nick Drake'),
  # ('Fools Errand', 'Fleet Foxes'),
  # ('Oxford Town', 'Bob Dylan'),
  ('Wesleys Theory', 'Kendrick Lamar'),
  # ('Aquemini', 'Outkast'),
  # ('Runaway', 'Kanye West'),
  # ('Blank Space', "Taylor Swift"),
  ('Gimmie Love', "Carly Rae Jepsen"),
  # ('Shape of You', 'Ed Sheeran'),
  ('Around the World', 'Daft Punk'),
  # ('Tekka', 'Sweet Trip'),
  # ('Roygbiv', 'Boards of Canada'),
  ('Let Down', 'Radiohead'),
  ('Chicago', 'Sufjan Stevens'),
  ('Five Years', 'David Bowie'),
]
tests = [get_genre(x[0], x[1]) for x in test_songs]
for test in tests:
  print(test)

Pink Moon by Nick Drake is folk (63.34% confidence)

Wesleys Theory by Kendrick Lamar is hip-hop (78.98% confidence)

Gimmie Love by Carly Rae Jepsen is pop (80.96% confidence)

Around the World by Daft Punk is electronic (93.15% confidence)

Let Down by Radiohead is rock (94.95% confidence)

Chicago by Sufjan Stevens is pop (43.63% confidence)
Secondary genre: folk (76.55% confidence)

Five Years by David Bowie is pop (45.65% confidence)
Secondary genre: rock (54.0% confidence)



In [8]:
print('Enter song:')
song = input()

print('Enter artist:')
artist = input()

try:
  print(get_genre(song, artist))
except Exception as e:
  print('Could not find the song!')
  # print(e)

Enter song:
Enter artist:
bohemian rhapsody by queen is pop (37.53% confidence)
Secondary genre: rock (52.08% confidence)

