In [56]:
from google.colab import userdata

my_secret_key = userdata.get("comet_ml_api_key")

In [57]:
! pip install --upgrade comet_ml --quiet
from comet_ml import start

experiment = start(api_key=my_secret_key, project_name="lstm_mer", workspace="nikzagl")

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/nikzagl/lstm-mer/b72463418a8c4eb5bd6cf7cc00c5f991

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/content/drive/MyDrive/MER Project' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.


In [58]:
from google.colab import drive

drive.mount("/content/drive/")

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [59]:
%cd "/content/drive/MyDrive/MER Project"

/content/drive/MyDrive/MER Project


In [60]:
from torch import nn
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader

In [61]:
import fnmatch
import os

files = []
for root, dirnames, filenames in os.walk("MP3-Example"):
    for filename in fnmatch.filter(filenames, "*mp3"):
        files.append(os.path.join(root, filename))

In [62]:
import re

track_ids = list()
for file in files:
    track_id = re.split("\.|-", file)[-2]
    track_ids.append(track_id)

In [63]:
import pandas as pd

musicinfo_df = pd.read_csv("Music Info.csv")

In [64]:
track_ids = pd.DataFrame(track_ids).rename({0: "track_id"}, axis=1)
print(track_ids)
track_ids = track_ids.join(musicinfo_df.set_index("track_id"), on="track_id")

                track_id
0     TRAFNQO12903CBB254
1     TRAAEJQ128F92C484E
2     TRACTQD128F14B0F9D
3     TRANLAK128F429F8D0
4     TRAMMYK128E07936F9
...                  ...
1495  TRXJGBY128F930137D
1496  TRSVTIE128F428079E
1497  TRVBLFJ128F426AAB9
1498  TRXMDGW128F426CDB3
1499  TRPYIKK128F932B961

[1500 rows x 1 columns]


In [65]:
def classify_emotions(energy, valence):
    e = energy - 0.5
    v = valence - 0.5
    if v > 0 and e > 0:
        return "Joy"
    elif v < 0 and e > 0:
        return "Anger"
    elif v < 0 and e < 0:
        return "Sad"
    else:
        return "Pleasure"

In [66]:
emotions = track_ids.apply(lambda x: classify_emotions(x.energy, x.valence), axis=1)

In [67]:
from sklearn.preprocessing import LabelEncoder

In [68]:
le = LabelEncoder()

In [69]:
labels = le.fit_transform(emotions)

In [70]:
tensor_df = torch.load("mfcc_tensor.pt")

  tensor_df = torch.load("mfcc_tensor.pt")


In [71]:
from sklearn.model_selection import train_test_split

tensor_df_train, tensor_df_test, labels_train, labels_test = train_test_split(
    tensor_df, labels, random_state=42
)

In [72]:
train_dataset = TensorDataset(tensor_df_train, torch.tensor(labels_train))

In [73]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import random

hyper_params = {"seed": 42, "batch_size": 16, "num_epochs": 25, "learning_rate": 1e-5}
# Logging hyperparamters
# experiment.log_parameters(hyper_params)
random.seed(hyper_params["seed"])
torch.manual_seed(hyper_params["seed"])

<torch._C.Generator at 0x7f0a7bf20e70>

In [75]:
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score

# Получение весов классов
class_weights = compute_class_weight(
    "balanced", classes=np.unique(labels_train), y=labels_train
)

In [76]:
class LSTMCNNModel(nn.Module):

    def __init__(self, out_feature=4, n_hidden=256, n_layers=2, drop_prob=0.5):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.conv = nn.Conv2d(1, 1, kernel_size=5)

        self.lstm = nn.LSTM(
            1168 * 16,
            self.n_hidden,
            self.n_layers,
            dropout=self.drop_prob,
            batch_first=True,
        )

        self.dropout = nn.Dropout(drop_prob)

        self.fc = nn.Linear(n_hidden, out_feature)

    def forward(self, x):
        x = x.unsqueeze(dim=1)
        # x.shape (batch, seq_len, n_features)
        x = self.conv(x)
        batch_size, channels, height, width = x.size()
        x = x.view(batch_size, channels, -1)  # (batch_size, channels, height * width)
        l_out, l_hidden = self.lstm(x)
        # out.shape (batch, seq_len, n_hidden*direction)
        out = self.dropout(l_out)

        # out.shape (batch, out_feature)
        out = self.fc(out[:, -1, :])

        # return the final output and the hidden state
        return out

In [77]:
lstm_model = LSTMCNNModel(n_hidden=512, n_layers=2).to(device)

In [78]:
test_dataset = TensorDataset(tensor_df_test, torch.tensor(labels_test))
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [79]:
from torch.utils.data import DataLoader

# Создание датасета и загрузчика данных
train_loader = DataLoader(
    train_dataset, batch_size=hyper_params["batch_size"], shuffle=True
)

# Оптимизатор
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=hyper_params["learning_rate"])
loss_function = torch.nn.CrossEntropyLoss(
    weight=torch.tensor(class_weights).float().to(device)
)
num_epochs = hyper_params["num_epochs"]
lstm_model.train()
step = 0
# Цикл обучения
for epoch in range(num_epochs):
    losses = list()
    lstm_model.train()
    for audio, labels in train_loader:
        optimizer.zero_grad()
        audio = audio.to(device)
        labels = labels.to(device)
        outputs = lstm_model(audio)
        loss = loss_function(outputs, labels)
        print(loss)
        experiment.log_metric(name="loss", value=loss.item(), step=step)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        step += 1
    print(f"Epoch: {epoch + 1}, loss: {np.mean(np.array(losses))}")
    labels_pred = list()
    lstm_model.eval()
    with torch.no_grad():
        for audio, labels in test_loader:
            audio = audio.to(device)
            _, outputs = torch.max(lstm_model(audio), 1)
            labels_pred += outputs.cpu().tolist()
    print(labels_test)
    print(labels_pred)
    f1_weighted = f1_score(labels_test, labels_pred, average="weighted")
    experiment.log_metric(name="f1_score_weighted", value=f1_weighted, epoch=epoch)

tensor(1.3785, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3801, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3674, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3557, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3845, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3603, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3800, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3790, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3812, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3925, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3785, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3764, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3499, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3728, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3795, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3816, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3832, device='cuda:0', grad_fn=

In [86]:
test_labels = list()
labels_pred = list()

In [87]:
lstm_model.eval()
with torch.no_grad():
    for audio, labels in test_loader:
        audio = audio.to(device)
        _, outputs = torch.max(lstm_model(audio), 1)
        labels_pred += outputs.cpu().tolist()
    experiment.log_confusion_matrix(labels_test, labels_pred)

In [88]:
from sklearn.metrics import classification_report

In [89]:
print(classification_report(labels_test, labels_pred))

              precision    recall  f1-score   support

           0       0.60      0.68      0.64       131
           1       0.54      0.33      0.41       129
           2       0.33      0.09      0.14        35
           3       0.49      0.85      0.62        80

    accuracy                           0.54       375
   macro avg       0.49      0.49      0.45       375
weighted avg       0.53      0.54      0.51       375



In [None]:
print(labels_pred)

In [55]:
experiment.end()

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : controversial_quokka_9505
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/nikzagl/lstm-mer/2c78c98b64ab406abbea8349301b2a22
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     f1_score_weighted : 0.4954244279642465
[1;38;5;39mCOMET INFO:[0m     loss [247]        : (1.224399447441101, 1.4154999256134033)
[1;38;5;39mCOMET INFO:[0m   Others:
[1;38;5;39mCOMET INFO:[0m     notebook_url : https://colab.research.google.com/notebook#fileId=1baYPqUckVYDwzQtWxo6VmVppV-3IbyTR
[1;38;5;39mCOMET 