In [1]:
from torch import nn
import torch

In [2]:
from torch.nn.utils.rnn import pad_sequence

In [3]:
import numpy as np

In [4]:
import pandas as pd

In [5]:
import os

In [6]:
from einops import rearrange

# Defining Dataseat

In [7]:
class MusicDataset(torch.utils.data.Dataset):   
    def __init__(self, np_file_paths, labels, seq_len=10000):
        self.seq_len = seq_len
        self.files = np_file_paths
        self.padder = torch.zeros(96, seq_len)
        self.labels = labels
#         self.labels = []
#         for i in range(len(self.files)):
#             label = np.random.randint(0, 10, size=15)
#             label[label > 8] = 0
#             label[label >= 1] = 1
#             self.labels.append(label)
#         for i in range(len(self.files)):
#             label = np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
#             self.labels.append(label)
    def __len__(self):
        return len(self.files)

#     def __getitem__(self, index):
#         return (self.X[index:index+self.seq_len], self.y[index+self.seq_len-1])
    def __getitem__(self, index):
        x = np.load(self.files[index])
        x = torch.from_numpy(x).float()
        x = x[:,:self.seq_len]
        x = pad_sequence([x.T, self.padder.T], padding_value=-90, batch_first=True)[0].T
        x = x.unsqueeze(0)
#         input,label_ids,label
        item = {"input": x, "label_ids":[index], "labels": torch.tensor(self.labels[index])}
        return item
#         return x

In [8]:
from datasets import load_dataset

In [9]:
from datasets import disable_caching
disable_caching()

In [10]:
test_files = ['00/1164200.mp3',
 '00/12100.mp3',
 '00/1295900.mp3',
 '00/985000.mp3',
 '00/1398500.mp3',
 '00/1210700.mp3',
 '00/818600.mp3',
 '00/1339600.mp3',
 '00/506100.mp3',
 '00/390000.mp3',
 '00/16000.mp3',
 '00/1052800.mp3',
 '00/699100.mp3']

In [11]:
good_files = ['01/16101.mp3',
 '01/1052801.mp3',
 '01/12101.mp3',
 '01/1121101.mp3',
 '01/986601.mp3',
 '01/1125001.mp3',
 '01/1086601.mp3',
 '01/1219101.mp3',
 '01/759301.mp3',
 '01/1018801.mp3',
 '01/824301.mp3',
 '01/1167301.mp3',
 '01/1380601.mp3',
 '01/661601.mp3',
 '01/1398501.mp3',
 '01/390001.mp3',
 '01/80501.mp3',
 '01/1125401.mp3',
 '01/399201.mp3',
 '01/1210701.mp3',
 '01/554901.mp3',
 '01/292501.mp3',
 '01/842401.mp3',
 '01/1157701.mp3',
 '01/1245901.mp3',
 '01/1062501.mp3',
 '01/1189901.mp3',
 '01/1398801.mp3',
 '01/1357701.mp3',
 '01/1164201.mp3',
 '01/1396501.mp3',
 '01/1304001.mp3',
 '01/913701.mp3',
 '01/718301.mp3',
 '01/1381001.mp3',
 '01/1264201.mp3',
 '01/361701.mp3',
 '01/1420701.mp3',
 '01/1406401.mp3',
 '01/708401.mp3',
 '01/1009701.mp3',
 '01/846501.mp3',]

In [12]:
def files_to_labels(files):
    col_names = ['TRACK_ID',
     'ARTIST_ID',
     'ALBUM_ID',
     'PATH',
     'DURATION',
     'TAGS',
     'TAGS2',
     'TAGS3',
     'TAGS4',
     'TAGS5',
     'TAGS6',
     'TAGS7',
     'TAGS8',
     'TAGS9']
    MOODPATH = "/mnt/c/Users/aag12/Downloads/autotagging_moodtheme.tsv.txt"
    df = pd.read_csv(MOODPATH, sep='\t', names=col_names)
    df = df[df["PATH"].isin(files)]
    inds = {'fast': 0,
     'sexy': 1,
     'mellow': 2,
     'heavy': 3,
     'horror': 4,
     'travel': 5,
     'holiday': 6,
     'groovy': 7,
     'funny': 8,
     'retro': 9,
     'hopeful': 10,
     'powerful': 11,
     'cool': 12,
     'nature': 13,
     'game': 14}

    final_labels = []
    for i in range(len(df)):
        curr = np.zeros(len(inds))
        moods = list(df.iloc[i])[5:]
        for theme in moods:
            if type(theme) == str and "mood" in theme:
                check = theme.split("---")[-1]
                if check in inds:
                    curr[inds[check]] = 1
            else:
                pass

        final_labels.append(curr)
    return final_labels

In [13]:
train_labels = files_to_labels(good_files)

In [14]:
test_labels = files_to_labels(test_files)

In [15]:
train_np_files = [("/mnt/c/Users/aag12/Documents/subset_moodtheme/" + g).replace(".mp3", ".npy") for g in good_files]
test_np_files = [("/mnt/c/Users/aag12/Documents/subset_moodtheme/" + g).replace(".mp3", ".npy") for g in test_files]

In [16]:
import time

In [17]:
# SEQ_LEN = 10000
SEQ_LEN = 1000

In [18]:
train_dataset = MusicDataset(train_np_files, train_labels, seq_len=SEQ_LEN)
test_dataset = MusicDataset(test_np_files, test_labels, seq_len=SEQ_LEN)

# MODEL TESTING
run dataset stuff first

In [19]:
inp = train_dataset[0]['input']
# inp = inp.unsqueeze(0)
inp.shape

torch.Size([1, 96, 1000])

### Structure we want

* downsample with CNN
* feed into transformer
* feed into linear layer
* profit

In [20]:
inp.shape

torch.Size([1, 96, 1000])

In [21]:
conv = nn.Conv2d(1, 1, kernel_size=5, stride=3,padding=2)
cout = conv(inp)
cout.shape

torch.Size([1, 32, 334])

In [22]:
# reshape so its function of 16
cout = cout[:,:,:320]
cout.shape

torch.Size([1, 32, 320])

In [23]:
patch_size = 16

In [24]:
# put into patches
patched = rearrange(cout, 'b (h s1) (w s2) -> b (h w) (s1 s2)', s1=patch_size, s2=patch_size)
patched.shape

torch.Size([1, 40, 256])

In [25]:
class Patcher(torch.nn.Module):
    def forward(self, x):
        x = x[:,:,:320]
        return rearrange(x, 'b (h s1) (w s2) -> b (h w) (s1 s2)', s1=patch_size, s2=patch_size)

Copying from:  https://n8henrie.com/2021/08/writing-a-transformer-classifier-in-pytorch/

In [26]:
encoder_layer = nn.TransformerEncoderLayer(d_model=256, nhead=1)
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=1)

In [27]:
transed = transformer_encoder(patched)
transed.shape

torch.Size([1, 40, 256])

In [28]:
layer_norm = nn.LayerNorm(256)
layered = layer_norm(transed)
layered.shape

torch.Size([1, 40, 256])

In [29]:
first = layered[:,0]

In [30]:
class BertPooler(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.dense = nn.Linear(hidden_size, hidden_size)
        self.activation = nn.Tanh()

    def forward(self, hidden_states):
        # We "pool" the model by simply taking the hidden state corresponding
        # to the first token.
        first_token_tensor = hidden_states[:, 0]
        pooled_output = self.dense(first_token_tensor)
        pooled_output = self.activation(pooled_output)
        return pooled_output

In [31]:
bp = BertPooler(256)
bp(layered).shape

torch.Size([1, 256])

In [32]:
lined = nn.Linear(256, 15)

In [33]:
lined(first).shape

torch.Size([1, 15])

In [34]:
drop = nn.Dropout(0.1)

In [35]:
conv(inp).shape

torch.Size([1, 32, 334])

In [36]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.flatten()

In [37]:
Flatten()(conv(inp)).shape

torch.Size([10688])

In [38]:
mod = nn.Sequential(
    conv,
    Flatten()
#     Patcher(),
#     transformer_encoder,
#     layer_norm,
#     bp,
#     drop,
#     lined
)

In [39]:
mod(inp).shape

torch.Size([10688])

In [40]:
model = mod

In [42]:
sum(p.numel() for p in model.parameters())

26

In [43]:
# heads = [nn.Sequential(nn.Linear(256, 2), nn.Sigmoid()) for i in range(15)]
heads = [nn.Sequential(nn.Linear(10688, 2), nn.Sigmoid()) for i in range(15)]

In [44]:
heads[0](mod(inp))

tensor([6.0540e-06, 9.9960e-01], grad_fn=<SigmoidBackward0>)

## Try Training

In [45]:
from torch.utils.data import DataLoader
# from transformers import AdamW
import torch.optim as optim

In [55]:
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# device = torch.device('cpu')
device = torch.device('cuda')
# optimizer = optim.AdamW(model.parameters(), lr=1e-3)
optimizer = optim.AdamW(model.parameters(), lr=1e-2)
criterion = nn.CrossEntropyLoss()
# losses = [nn.CrossEntropyLoss() for i in range(15)]

In [56]:
device

device(type='cuda')

In [57]:
model = model.to(device)

In [58]:
heads = [h.to(device) for h in heads]

In [59]:
for h in heads:
    h.train()

In [60]:
optimizer.zero_grad()

In [61]:
inputs = train_dataset[0]['input'].to(device)

In [62]:
labels =  train_dataset[0]['labels'].to(device)

In [None]:
model(inputs)

In [None]:
model = model.to(device)
heads = [h.to(device) for h in heads]
model.train()
for h in heads:
    h.train()
for epoch in range(1):
    total_loss = 0
#     for batch in train_loader:
    
    for batch in train_dataset:
        optimizer.zero_grad()
        inputs = batch['input'].to(device)
#         input_ids = batch['input_ids'].to(device)
#         attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(inputs)
        per_head_outputs = [heads[i](outputs) for i in range(15)]
        these_losses = []
        for i in range(15):
            target = torch.tensor([0, 0])
            target[int(labels[i])] = 1
            target = target.to(device)
#             cur_loss = losses[i](per_head_outputs[i], target.float())
            cur_loss = criterion(per_head_outputs[i], target.float())
            these_losses.append(cur_loss)
#         loss = outputs[0]
#         loss = criterion(outputs, labels)
        loss = sum(these_losses)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        break
    print(f"Epoch: {epoch}; Loss: {total_loss}")

model.eval()
for h in heads:
    h.eval()

# Defining Dataseat

In [7]:
class MusicDataset(torch.utils.data.Dataset):   
    def __init__(self, np_file_paths, labels, seq_len=10000):
        self.seq_len = seq_len
        self.files = np_file_paths
        self.padder = torch.zeros(96, seq_len)
        self.labels = labels
#         self.labels = []
#         for i in range(len(self.files)):
#             label = np.random.randint(0, 10, size=15)
#             label[label > 8] = 0
#             label[label >= 1] = 1
#             self.labels.append(label)
#         for i in range(len(self.files)):
#             label = np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
#             self.labels.append(label)
    def __len__(self):
        return len(self.files)

#     def __getitem__(self, index):
#         return (self.X[index:index+self.seq_len], self.y[index+self.seq_len-1])
    def __getitem__(self, index):
        x = np.load(self.files[index])
        x = torch.from_numpy(x).float()
        x = x[:,:self.seq_len]
        x = pad_sequence([x.T, self.padder.T], padding_value=-90, batch_first=True)[0].T
        x = x.unsqueeze(0)
#         input,label_ids,label
        item = {"input": x, "label_ids":[index], "labels": torch.tensor(self.labels[index])}
        return item
#         return x

In [8]:
from datasets import load_dataset

In [9]:
from datasets import disable_caching
disable_caching()

In [10]:
test_files = ['00/1164200.mp3',
 '00/12100.mp3',
 '00/1295900.mp3',
 '00/985000.mp3',
 '00/1398500.mp3',
 '00/1210700.mp3',
 '00/818600.mp3',
 '00/1339600.mp3',
 '00/506100.mp3',
 '00/390000.mp3',
 '00/16000.mp3',
 '00/1052800.mp3',
 '00/699100.mp3']

In [11]:
good_files = ['01/16101.mp3',
 '01/1052801.mp3',
 '01/12101.mp3',
 '01/1121101.mp3',
 '01/986601.mp3',
 '01/1125001.mp3',
 '01/1086601.mp3',
 '01/1219101.mp3',
 '01/759301.mp3',
 '01/1018801.mp3',
 '01/824301.mp3',
 '01/1167301.mp3',
 '01/1380601.mp3',
 '01/661601.mp3',
 '01/1398501.mp3',
 '01/390001.mp3',
 '01/80501.mp3',
 '01/1125401.mp3',
 '01/399201.mp3',
 '01/1210701.mp3',
 '01/554901.mp3',
 '01/292501.mp3',
 '01/842401.mp3',
 '01/1157701.mp3',
 '01/1245901.mp3',
 '01/1062501.mp3',
 '01/1189901.mp3',
 '01/1398801.mp3',
 '01/1357701.mp3',
 '01/1164201.mp3',
 '01/1396501.mp3',
 '01/1304001.mp3',
 '01/913701.mp3',
 '01/718301.mp3',
 '01/1381001.mp3',
 '01/1264201.mp3',
 '01/361701.mp3',
 '01/1420701.mp3',
 '01/1406401.mp3',
 '01/708401.mp3',
 '01/1009701.mp3',
 '01/846501.mp3',]

In [12]:
def files_to_labels(files):
    col_names = ['TRACK_ID',
     'ARTIST_ID',
     'ALBUM_ID',
     'PATH',
     'DURATION',
     'TAGS',
     'TAGS2',
     'TAGS3',
     'TAGS4',
     'TAGS5',
     'TAGS6',
     'TAGS7',
     'TAGS8',
     'TAGS9']
    MOODPATH = "/mnt/c/Users/aag12/Downloads/autotagging_moodtheme.tsv.txt"
    df = pd.read_csv(MOODPATH, sep='\t', names=col_names)
    df = df[df["PATH"].isin(files)]
    inds = {'fast': 0,
     'sexy': 1,
     'mellow': 2,
     'heavy': 3,
     'horror': 4,
     'travel': 5,
     'holiday': 6,
     'groovy': 7,
     'funny': 8,
     'retro': 9,
     'hopeful': 10,
     'powerful': 11,
     'cool': 12,
     'nature': 13,
     'game': 14}

    final_labels = []
    for i in range(len(df)):
        curr = np.zeros(len(inds))
        moods = list(df.iloc[i])[5:]
        for theme in moods:
            if type(theme) == str and "mood" in theme:
                check = theme.split("---")[-1]
                if check in inds:
                    curr[inds[check]] = 1
            else:
                pass

        final_labels.append(curr)
    return final_labels

In [13]:
train_labels = files_to_labels(good_files)

In [14]:
test_labels = files_to_labels(test_files)

In [15]:
train_np_files = [("/mnt/c/Users/aag12/Documents/subset_moodtheme/" + g).replace(".mp3", ".npy") for g in good_files]
test_np_files = [("/mnt/c/Users/aag12/Documents/subset_moodtheme/" + g).replace(".mp3", ".npy") for g in test_files]

In [16]:
import time

In [17]:
# SEQ_LEN = 10000
SEQ_LEN = 1000

In [18]:
train_dataset = MusicDataset(train_np_files, train_labels, seq_len=SEQ_LEN)
test_dataset = MusicDataset(test_np_files, test_labels, seq_len=SEQ_LEN)