In [1]:
import os, sys, time, pickle
from pathlib import Path
from collections import Counter
from IPython.display import Audio

PROJECT_DIR = Path(sys.path[0])/".."
DATA_DIR = PROJECT_DIR/"data"
SRC_DIR = PROJECT_DIR/"src"
DEPS_DIR = PROJECT_DIR/"deps"

sys.path.append(str(SRC_DIR))
sys.path.append(str(DEPS_DIR))

%pylab inline
%config InlineBackend.figure_format='retina'

%load_ext autoreload 
%autoreload 2

from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader
from fairseq_wav2vec import Wav2Vec2Config, Wav2Vec2Model
import pandas as pd

from mugen_train import (
    musicDataset, IDX_to_GENRE, GENRE_TO_IDX, 
    get_music_data_loaders, GenreClassifier, get_batch_genre_one_hot,
    MusicCNN,
    get_num_params)

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [2]:
feature_size = 128
n_genres = 50

top50_genre_samples_df = pd.read_csv("/n1Tb/sc_mp3_top50_genre_samples.tsv_gz", compression='gzip', sep='\t')

train_loader, valid_loader = get_music_data_loaders(top50_genre_samples_df, cut=8e4)

In [3]:
model = Wav2Vec2Model(Wav2Vec2Config)
model = model.to("cuda")

t_specs = {
#     "d_model": 128, 
    "dim_feedforward": 64, 
    "num_decoder_layers": 0,
    "num_encoder_layers": 0,
    "nhead": 1,
}
genre_classifier = GenreClassifier(n_genres, feature_size, t_specs=t_specs, use_transformer=False).to("cuda")
# genre_loss_model = GenreLoss(n_genres, feature_size, genre_classifier)

print(f"wav2vec-model n_params: {get_num_params(model):,d}")
print(f"genre_classifier n_params: {get_num_params(genre_classifier):,d}")

# optimizer = torch.optim.Adam(model.parameters(), lr=0.05, betas=(0.9,0.98), eps=1e-06, weight_decay=0.01, fused=True)
optimizer = torch.optim.Adam(genre_classifier.parameters(), lr=0.005, betas=(0.9,0.98), eps=1e-06, weight_decay=0.01, fused=True)

wav2vec-model n_params: 95,356,288
genre_classifier n_params: 6,450


In [6]:
debug = False
data_loader = train_loader

model.train()
running_loss = 0.0
losses = []
for i, (batch, genres) in tqdm(enumerate(data_loader, 0), total=len(data_loader), disable=~debug):
    optimizer.zero_grad()
    outputs = model(batch)
    
    x = outputs["x"]
    num_inf = torch.sum(torch.isinf(x))
    
    if num_inf:
        print(f"num_inf in output of model {num_inf}")
        
        ignore_samples = torch.where(x.isinf())[1].unique()
        print(f"Num of ignored samples: {len(ignore_samples)}")
        filter_samples = [i not in ignore_samples for i in range(x.shape[1])]
        
        x = x[:, filter_samples, :]
        
        genres = [genre for i, genre in enumerate(genres) if i not in ignore_samples]

    # discard where there is inf values 
        
    genre_props = genre_classifier(x)
    min_max_prop = genre_props.min().to("cpu").item(), genre_props.max().to("cpu").item() 
    if np.isnan(min_max_prop[0]) or np.isnan(min_max_prop[1]):
        print("np.isnan(min_max_prop[0]) or np.isnan(min_max_prop[1])")
        break
    genre_loss = torch.nn.BCELoss()(genre_props, get_batch_genre_one_hot(genres, n_genres, "cuda"))

    wav2vec_features_pen = outputs["features_pen"]
    
    loss = genre_loss #+ 0.1 * wav2vec_features_pen
    loss_item = loss.to("cpu").item() 
    if debug:
        print("min_max_prop", min_max_prop)
        print("losses", loss_item, genre_loss.to("cpu").item(), wav2vec_features_pen.to("cpu").item())    

    loss.backward()
    optimizer.step()
    running_loss += loss_item
    losses.append(loss_item)

    out_is_inf = torch.isinf(x).any().item()
    if out_is_inf:
        inf_values = torch.isinf(x).sum() 
        print(f"{inf_values} inf values in output")
        break    

    for name, param in genre_classifier.named_parameters():
        has_nan = torch.isnan(param).any()
        has_inf = torch.isinf(param).any()

        if has_nan or has_inf:
            print(f"genre_classifier {name} has_nan {has_nan} has_inf {has_inf}.")
            break        
        
    for name, param in model.named_parameters():
        has_nan = torch.isnan(param).any()
        has_inf = torch.isinf(param).any()

        if has_nan or has_inf:
            print(f"Parameter {name} has_nan {has_nan} has_inf {has_inf}.")
            break

    if has_nan or has_inf:
        print("has_nan or has_inf")
        break
    
# return running_loss / len(data_loader)

num_inf in output of model 4
Num of ignored samples: 1
4 inf values in output


In [9]:
len(genre_props)

12

In [7]:
genres

[10, 37, 11, 29, 21, 5, 22, 6, 10, 36, 47, 39]

In [8]:
len(genres)

12

In [31]:
 
ignore_samples = torch.where(outputs["x"].isinf())[1].unique()

In [37]:
for x in ignore_samples.to("cpu"):
    print(x.item())

9


In [6]:
ignore_samples

NameError: name 'ignore_samples' is not defined

In [45]:
filter_samples

[True, True, True, True, True, True, True, True, True, False, True, True]

In [27]:
outputs["x"].shape

torch.Size([101, 12, 94])

In [32]:
ignore_samples

tensor([9], device='cuda:0')

In [None]:
data_loader = train_loader

model.train()
running_loss = 0.0
losses = []
for i, (batch, genres) in tqdm(enumerate(data_loader, 0), total=len(data_loader), disable=True):
    pass

In [4]:
for i, (batch, genres) in tqdm(enumerate(data_loader, 0), total=len(data_loader), disable=True):
    pass


NameError: name 'data_loader' is not defined

In [70]:
# Audio(batch[12].to("cpu").detach().numpy(), rate=44100, autoplay=True)

In [12]:
# from matplotlib import colors
# cmap = colors.ListedColormap(['red', 'blue', 'green', 'purple',])
# bounds = [-2,0,2]
# norm = colors.BoundaryNorm(bounds, cmap.N)

# fig, ax = plt.subplots()
# ax.imshow(outputs["x"][:, 5, :].to("cpu").detach().numpy(), cmap=cmap, norm=norm);


In [None]:
genre_props

In [21]:
for name, param in genre_classifier.named_parameters():
    print(param)
    min_max = (torch.min(param).to("cpu").detach().item(), torch.max(param).to("cpu").detach().item())
    print(f"min: {min_max[0]:,.3f} max: {min_max[1]:,.3f}")


Parameter containing:
tensor([[ 1.5653e-02,  1.0166e-02,  1.3567e-02,  ..., -1.4283e-03,
         -3.4696e-05, -1.8214e-03],
        [ 1.4348e-02,  1.4168e-02,  1.3006e-02,  ..., -2.0981e-04,
          1.5393e-03, -1.8361e-03],
        [ 7.3778e-03,  1.2378e-02,  8.1439e-03,  ...,  6.5706e-04,
         -1.8284e-03,  7.1113e-04],
        ...,
        [ 1.4650e-02,  1.1347e-02,  1.1165e-02,  ...,  2.6091e-04,
         -2.0520e-04,  6.1813e-05],
        [ 1.1922e-02,  1.2531e-02,  1.1692e-02,  ...,  9.5847e-04,
         -1.7996e-04, -9.5097e-05],
        [ 1.3669e-02,  1.0285e-02,  1.3568e-02,  ..., -2.1436e-04,
          3.3145e-04,  6.9002e-04]], device='cuda:0', requires_grad=True)
min: -0.002 max: 0.022
Parameter containing:
tensor([-0.2848, -0.3124, -0.2516, -0.2560, -0.3290, -0.3366, -0.3669, -0.2745,
        -0.3922, -0.3738, -0.3439, -0.3426, -0.3462, -0.3399, -0.4030, -0.2628,
        -0.2674, -0.3926, -0.3747, -0.2488, -0.3014, -0.3583, -0.2426, -0.2944,
        -0.2869, -0.3600

In [83]:
music_cnn = MusicCNN(50)

In [17]:
param.size()

torch.Size([384])

In [None]:
for name, param in genre_classifier.named_parameters():

In [7]:
from ignite.handlers import FastaiLRFinder

trainer = ...
model = ...
optimizer = ...

lr_finder = FastaiLRFinder()
to_save = {"model": model, "optimizer": optimizer}

with lr_finder.attach(trainer, to_save=to_save) as trainer_with_lr_finder:
    trainer_with_lr_finder.run(dataloader)

# Get lr_finder results
lr_finder.get_results()

# Plot lr_finder results (requires matplotlib)
lr_finder.plot()

# get lr_finder suggestion for lr
lr_finder.lr_suggestion()


TypeError: Object <class 'ellipsis'> should have `state_dict` method

### Test 2 -- with small update

In [4]:
train_loses = []
val_loses = []
for epoch in range(3):
    train_loss = train(model, optimizer, train_loader)
    val_loss = validation(model, valid_loader)
    print(f"{epoch:03d} train_loss: {train_loss:0.4f} val_loss: {val_loss:0.4f}")
    train_loses.append(train_loss)
    val_loses.append(val_loss)

100%|████████████████████████████████████████████████████████| 8334/8334 [54:23<00:00,  2.55it/s]
100%|████████████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.05it/s]


000 train_loss: 0.0000 val_loss: 0.0000


100%|████████████████████████████████████████████████████████| 8334/8334 [54:39<00:00,  2.54it/s]
100%|████████████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.10it/s]


001 train_loss: 0.0000 val_loss: 0.0000


100%|████████████████████████████████████████████████████████| 8334/8334 [54:43<00:00,  2.54it/s]
100%|████████████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.10it/s]

002 train_loss: 0.0000 val_loss: 0.0000





In [None]:
plt.plot(train_loses, label="train_loses");
plt.plot(val_loses, label="val_loses");
plt.legend();

### Test 1 -- diverging

In [5]:
train_loses = []
val_loses = []
for epoch in range(5):
    train_loss = train(model, optimizer, train_loader)
    val_loss = validation(model, valid_loader)
    print(f"{epoch:03d} train_loss: {train_loss:0.4f} val_loss: {val_loss:0.4f}")
    train_loses.append(train_loss)
    val_loses.append(val_loss)

100%|████████████████████████████████████████████████████████| 8334/8334 [54:19<00:00,  2.56it/s]
100%|████████████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.10it/s]


000 train_loss: 0.0000 val_loss: 0.0000


100%|████████████████████████████████████████████████████████| 8334/8334 [54:35<00:00,  2.54it/s]
100%|████████████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.11it/s]


001 train_loss: 0.0000 val_loss: 0.0000


100%|████████████████████████████████████████████████████████| 8334/8334 [54:34<00:00,  2.55it/s]
100%|████████████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.11it/s]


002 train_loss: 0.0000 val_loss: 0.0000


100%|████████████████████████████████████████████████████████| 8334/8334 [54:29<00:00,  2.55it/s]
100%|████████████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.09it/s]


003 train_loss: 0.0000 val_loss: 0.0000


100%|████████████████████████████████████████████████████████| 8334/8334 [54:29<00:00,  2.55it/s]
100%|████████████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.05it/s]

004 train_loss: 0.0000 val_loss: 0.0000





In [None]:
%debug

#### Scratchpad

In [49]:
for epoch in range(10):
    train_loss = train(model, optimizer, criterion, train_loader)
    print(f"Epoch {epoch+1}, train loss: {train_loss:.3f}")


In [66]:
for i, batch in enumerate(top50_dataloader):
    break

In [18]:
out = model.forward(sample_bs)

In [160]:
sample_bs.shape

torch.Size([32, 120000])

In [144]:

d.shape

torch.Size([2, 273024])

In [4]:
model.eval()
running_loss = 0.0
for i, (batch, genres) in tqdm(enumerate(valid_loader, 0), total=len(valid_loader)):
    outputs = model(batch)
    break

  0%|                                                                     | 0/17 [00:02<?, ?it/s]


In [18]:
outputs['x'].shape

torch.Size([101, 12, 120])

In [10]:
batch

<function __main__.music_collate_fn_wrap.<locals>.music_collate_fn(x)>