In [1]:
import pandas as pd, torch, torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import numpy as np

## Make dataset

In [2]:
device = torch.device("cuda")
full_dataset = pd.read_csv("https://raw.githubusercontent.com/cristobalvch/Spotify-Machine-Learning/master/data/data_moods.csv")
func_dataset = full_dataset[["danceability", "acousticness", "energy", 
                        "instrumentalness", "liveness", "valence",
                        "loudness", "speechiness", "tempo", "key",
                        "time_signature"]]
func_dataset = func_dataset.to_numpy()
mood_dataset = pd.DataFrame()
mood_dataset["mood"] = full_dataset["mood"]
le = LabelEncoder() 
mood_dataset["label"] = le.fit_transform(mood_dataset["mood"])
data_output = mood_dataset["label"].values

## Model

In [3]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)  
        self.layer2 = nn.Linear(hidden_size, output_size)  

    def forward(self, x):
        x = self.layer1(x)
        x = nn.ReLU()(x)
        x = self.layer2(x)
        x = nn.Softmax(dim=1)(x)
        return x

## Training with StratifiedKFold

In [4]:
skf = StratifiedKFold(n_splits=5, random_state=1, shuffle=True)

for train_index, test_index in skf.split(func_dataset, data_output):
        X_train_skf, X_test_skf = func_dataset[train_index], func_dataset[test_index]
        y_train_skf, y_test_skf = data_output[train_index], data_output[test_index]

        X_train = np.array(X_train_skf)
        X_test = X_test_skf
        y_train = y_train_skf
        y_test = y_test_skf

        # Normalization
        X_train_norm = (X_train - X_train.mean() / X_train.std())
        X_train_norm = torch.tensor(X_train_norm)
        y_train = torch.tensor(y_train)

        # Make the split data into TensorDataset & DataLoader objects
        inputs = X_train_norm.to(device)
        labels = y_train.to(device)

        train_ds = TensorDataset(inputs, labels)

        torch.manual_seed(1)
        batch_size = 2
        train_dl = DataLoader(train_ds, batch_size, shuffle=True)

        # Training setup
        input_size = X_train_norm.shape[1]
        hidden_size = 32
        output_size = 4
        
        model = Model(input_size, hidden_size, output_size)
        model.to(device)

        learning_rate = 0.001

        loss_fn = nn.CrossEntropyLoss()
        
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        # Training loop
        num_epochs = 33
        loss_hist = [0] * num_epochs
        accuracy_hist = [0] * num_epochs

        for epoch in range(num_epochs):
            for x_batch, y_batch in train_dl:
                pred = model(x_batch.float())
                loss = loss_fn(pred, y_batch.long())
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
            
                loss_hist[epoch] += loss.item()*y_batch.size(0)
                is_correct = (torch.argmax(pred, dim=1) == y_batch).float()
                accuracy_hist[epoch] += is_correct.sum()

            loss_hist[epoch] /= len(train_dl.dataset)
            accuracy_hist[epoch] /= len(train_dl.dataset)

## Evaluation

In [5]:

model.eval()
X_test_tensor = torch.tensor(X_test).float()
y_test_tensor = torch.tensor(y_test)

with torch.no_grad():
    pred = model(X_test_tensor.to(device))
    predicted_labels = torch.argmax(pred, dim=1)
    is_correct = (predicted_labels == y_test_tensor.to(device)).float()
    accuracy = is_correct.sum() / len(is_correct)
    print('Avg. test accuracy: %.3f' % (accuracy.item() * 100) + "%")

    predicted_labels = predicted_labels.cpu().numpy()
    predicted_moods = pd.Series(predicted_labels).map({0: 'Calm', 1: 'Energetic', 2: 'Happy', 3: 'Sad'})
    test_moods = pd.Series(y_test).map({0: 'Calm', 1: 'Energetic', 2: 'Happy', 3: 'Sad'})
    
    print(classification_report(test_moods, predicted_moods))

Avg. test accuracy: 78.832%
              precision    recall  f1-score   support

        Calm       0.95      1.00      0.97        39
   Energetic       0.60      0.87      0.71        31
       Happy       0.65      0.46      0.54        28
         Sad       0.94      0.74      0.83        39

    accuracy                           0.79       137
   macro avg       0.78      0.77      0.76       137
weighted avg       0.81      0.79      0.78       137



In [6]:
# Save model
torch.save(model, "lyrics_model.pt")

## Plot loss and accuracy history

In [None]:
""" fig = plt.figure(figsize=(12, 5))
fig.suptitle('ReLU activation function', fontsize=20, y=1.05)
fig.set_facecolor('white')

ax = fig.add_subplot(1, 2, 1)
ax.plot(loss_hist, lw=3)
ax.set_title('Training loss', size=15)
ax.set_xlabel('Epoch', size=15)
ax.tick_params(axis='both', which='major', labelsize=15)

ax = fig.add_subplot(1, 2, 2)
tensor_list = [tensor.to('cpu') for tensor in accuracy_hist]
accuracy_hist = torch.stack(tensor_list).cpu().numpy()
ax.plot(accuracy_hist, lw=3)
ax.set_title('Training accuracy', size=15)
ax.set_xlabel('Epoch', size=15)
ax.tick_params(axis='both', which='major', labelsize=15)
plt.tight_layout()
 
plt.show() """

## Test


In [18]:
# Find data for Chop Suey! by System of a Down | Should be Energetic
""" song_to_predict = {'danceability': [0.42], 
'acousticness': [0.000353], 
'energy': [0.929], 
'instrumentalness': [0.000747], 
'liveness': [0.122], 
'valence': [0.3], 
'loudness': [-3.899], 
'speechiness': [0.121], 
'tempo': [127.204], 
'key': [7], 
'time_signature': [4]}
 """
# Clair de Lune - Debussy | Should be Calm
""" song_to_predict = {'danceability': [0.288], 
 'acousticness': [0.994], 
 'energy': [0.00341], 
 'instrumentalness': [0.914], 
 'liveness': [0.0655], 
 'valence': [0.054], 
 'loudness': [-36.278], 
 'speechiness': [0.047], 
 'tempo': [74.662], 
 'key': [1], 
 'time_signature': [4]} """
# Walking on Sunshine - Katrina & The Waves | Should be Happy
""" song_to_predict = {'danceability': [0.596], 
'acousticness': [0.0116], 
'energy': [0.869], 
'instrumentalness': [0.173], 
'liveness': [0.0678], 
'valence': [0.944], 
'loudness': [-11.97], 
'speechiness': [0.037], 
'tempo': [109.902], 
'key': [10], 
'time_signature': [4]} """
# Hurt - Johnny Cash | Should be Sad
song_to_predict = {'danceability': [0.534], 
'acousticness': [0.649], 
'energy': [0.392], 
'instrumentalness': [0.000227], 
'liveness': [0.0948], 
'valence': [0.163], 
'loudness': [-7.636], 
'speechiness': [0.0267], 
'tempo': [90.379], 
'key': [9], 
'time_signature': [4]}

df_song_to_predict = pd.DataFrame(song_to_predict)

# Normalize
df_song_to_predict_norm = (df_song_to_predict - X_train.mean() / X_train.std())
df_song_to_predict_norm = torch.tensor(df_song_to_predict_norm.values)

# Use saved model
model = torch.load('lyrics_model.pt', map_location=torch.device('cpu'))
model.eval()

# Predict
inputs = df_song_to_predict_norm
with torch.no_grad():
  pred = model(inputs.float())
label_id = torch.argmax(pred)

if label_id == 0:
  output_label = "Calm"
if label_id == 1:
  output_label = "Energetic"
if label_id == 2:
  output_label = "Happy"
if label_id == 3:
  output_label = "Sad"

predlist = pred.tolist()[0]
print({'Calm': f'{(predlist[0] * 100):.3f}%', 'Energetic': f'{(predlist[1] * 100):.3f}%', 'Happy': f'{(predlist[2] * 100):.3f}%', 'Sad': f'{(predlist[3] * 100):.3f}%'})
print('--->', output_label)

{'Calm': '0.060%', 'Energetic': '0.117%', 'Happy': '3.426%', 'Sad': '96.396%'}
---> Sad
