# Import Dependencies 

In [21]:
from transformers import BertTokenizer
from sklearn.model_selection import train_test_split
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
from torch.nn.utils.clip_grad import clip_grad_norm
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader

import torch.nn as nn
import numpy as np
import re
import pandas as pd
import torch

# Data Processing

In [22]:
df = pd.read_csv('../data/playlist_features_filtered.csv', delimiter=',', index_col=0)

In [23]:
# tokenize the playlist names 
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

encoded_playlist_names = tokenizer(text = df.name.to_list(),        
                           add_special_tokens=True,
                           padding = 'max_length',
                           truncation = 'longest_first',
                           max_length = 300,
                           return_attention_mask = True)

input_ids = encoded_playlist_names['input_ids']
attention_masks = encoded_playlist_names['attention_mask']

In [24]:
# put data into numpy arrays
names = np.array(input_ids)
input_ids = np.array(input_ids)
attention_masks = np.array(attention_masks)
labels = df[
        ["acousticness","danceability","duration_ms","energy","instrumentalness","key","liveness","loudness","mode","speechiness","tempo","time_signature","valence"]
    ].to_numpy()

print(labels.shape)

(7588, 13)


In [25]:
#formatting the input
test_size = 0.1
seed = 42
train_inputs, test_inputs, train_labels, test_labels = \
            train_test_split(input_ids, labels, test_size=test_size, 
                             random_state=seed)
train_masks, test_masks, _, _ = train_test_split(attention_masks, 
                                        labels, test_size=test_size, 
                                        random_state=seed)

In [26]:
#scale the label scores
score_scaler = StandardScaler()
score_scaler.fit(train_labels)

train_labels = score_scaler.transform(train_labels)
test_labels = score_scaler.transform(test_labels)

In [27]:
import torch
from torch.utils.data import TensorDataset, DataLoader

batch_size = 13

def create_dataloaders(inputs, masks, labels, batch_size):
    input_tensor = torch.tensor(inputs)
    mask_tensor = torch.tensor(masks)
    labels_tensor = torch.tensor(labels)
    dataset = TensorDataset(input_tensor, mask_tensor, 
                            labels_tensor)
    dataloader = DataLoader(dataset, batch_size=batch_size, 
                            shuffle=True)
    return dataloader


train_dataloader = create_dataloaders(train_inputs, train_masks, 
                                      train_labels, batch_size)

test_dataloader = create_dataloaders(test_inputs, test_masks, 
                                     test_labels, batch_size)

# Training

In [28]:
# initialize the model
from BertRegressor import BertRegressor
model = BertRegressor(drop_rate = 0.2)

In [29]:
#setting up the training env

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print(f"device: {device.type}")

device: cuda


In [30]:
# set up model parameters

#define the adam optimizer with a 5e-5 learning rate
optimizer = AdamW(model.parameters(), lr = 5e-5, eps = 1e-8)

#number of epochs
epochs = 5

#total steps
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = total_steps)
loss_fn = nn.MSELoss()



In [31]:
#training loop
from tqdm import tqdm

def train(model, optimizer, scheduler, loss_function, epochs,
          train_dataloader, device, clip_value = 2):
    
    for _ in tqdm(range(epochs)):
        model.train()
        
        for batch in tqdm(train_dataloader):
            batch_inputs, batch_masks, batch_labels = tuple(t.to(device) for t in batch)

            model.zero_grad()
            outputs = model(batch_inputs, batch_masks)
            loss = loss_function(outputs.squeeze(), batch_labels.squeeze().float())
            loss.backward()
            clip_grad_norm(model.parameters(), clip_value)
            optimizer.step()
            scheduler.step()
    
    return model

In [32]:
model = train(model, optimizer, scheduler, loss_fn, epochs, train_dataloader, device, clip_value= 2)

  clip_grad_norm(model.parameters(), clip_value)
100%|██████████| 526/526 [03:08<00:00,  2.79it/s]
100%|██████████| 526/526 [03:09<00:00,  2.78it/s]
100%|██████████| 526/526 [03:09<00:00,  2.78it/s]
100%|██████████| 526/526 [03:09<00:00,  2.78it/s]
100%|██████████| 526/526 [03:09<00:00,  2.78it/s]
100%|██████████| 5/5 [15:45<00:00, 189.06s/it]


In [33]:
torch.save(model.state_dict(), 'bert_model.pt')