In [1]:
! nvidia-smi

Mon Oct  3 17:31:41 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.43.04    Driver Version: 515.43.04    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:03:00.0 Off |                  N/A |
| 31%   44C    P0   109W / 350W |      0MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  Off  | 00000000:04:00.0 Off |                  N/A |
| 30%   41C    P0   113W / 350W |      0MiB / 24576MiB |      0%      Defaul

In [2]:
import os, random

cuda_device = 3
os.environ["CUDA_VISIBLE_DEVICES"]= str(cuda_device)

In [3]:
# "Basic" py library
import pandas as pd
import numpy as np

# visualisation
from matplotlib import pyplot as plt # basic visualisation in py
import seaborn as sns # great to interact with dataframes
import plotly.express as px # powerfull for interactive figures
from tqdm import tqdm  # generats progress bar to controll steps

import torch # Pytorch, Meta's library for ML
import torch.nn as nn # torch module for neural networks 

import transformers # HuggingFace library to use pretrained models
from transformers import BertTokenizerFast, BertForSequenceClassification
from transformers import Trainer, TrainingArguments

from datasets import load_metric
metric = load_metric('accuracy')

  metric = load_metric('accuracy')


In [4]:
def set_seed(seed: int):
    """Helper function for reproducible behavior to set the seed in ``random``, 
        ``numpy``, ``torch`` and/or ``tf`` (if installed).

    Args:
        seed (:obj:`int`): The seed to set.
    """
    from transformers.file_utils import is_tf_available, is_torch_available, is_torch_tpu_available
    
    random.seed(seed)
    np.random.seed(seed)
    if is_torch_available():
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        # ^^ safe to call this function even if cuda is not available
    if is_tf_available():
        import tensorflow as tf

        tf.random.set_seed(seed)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

In [5]:
# Set visual style
sns.set("talk")
sns.set_style("whitegrid")

#  set the rabdom seed 
seed = 31
set_seed(seed)

# Table of Content

click on the titles to reach the described sections

---------
[Loading Data](#intro)

[Tuning classification layer](#cls)

[Frozen + Logit Regression](#sigmoid)

[Frozen + FFN](#ffnn)

## Loading data<a id='intro'></a>

In [6]:
# Load the dataset in CSV
dream_records = pd.read_csv(
    "Reports_DreamerEmotions_PCACho_tsneCho_KMCluster2_KMCluster6_2WSA_6WSA.csv"
)

Coding_emotions = {
    "AN": "Anger",
    "AP": "Apprehension",
    "SD": "Sadness",
    "CO": "Confusion",
    "HA": "Happiness",
    
    "Missing": "Missing",
}

In [33]:
# idx_end = int(len(pstv_rprts)*.7)
# len(pstv_rprts), idx_end, len(pstv_rprts[:idx_end]), len(pstv_rprts[idx_end:])
pstv_rprts

["I was, living here in Cleveland in a fraternity house, but it was located in a downtown section. My fraternity brothers were fellows I used to know back home except one who I remember as being a fellow I know from Cleveland. I was upstairs cleaning up around. Then I came downstairs and found a lot of colored people there. Right away I noticed they had lipstick on, and this struck me as being peculiar. I couldn't help staring; once when I noticed that I was watching them, they attempted to hide their faces, some did at least. There were a bunch of dogs with them also. The dogs had fleas or something and were spreading an epidemic of disease in the neighborhood. I realized that I had to get serum to stop the epidemic. I didn't know where to get the serum. I had no transportation and went back to the fraternity house, but no one had any transportation. One fellow lent me his bicycle, so I started. I went about one block and saw a buddy of mine driving a bus, so I laid my bicycle down an

In [35]:
idx_end = int(len(pstv_rprts)* train_ratio)

for cllctn in tqdm(set(dream_records["collection"])):
    for emtn in set(dream_records[dream_records["# Emotions"].isin([1])]["Emotions"]):
        
        # list of reports with specific collection-emotion combination
        pstv_rprts = dream_records[
            dream_records["collection"].isin([cllctn]) & 
            dream_records["Emotions"].isin([emtn])
        ]["report"].to_list()
        
        # sample reports with from same collection  butdifferent emotion
        ngtv_rprts = dream_records[
            dream_records["collection"].isin([cllctn]) & 
            ~dream_records["Emotions"].isin([emtn])
        ].sample(len(pstv_rprts))["report"].to_list()
        
        random.shuffle(pstv_rprts), random.shuffle(ngtv_rprts) 
        
        pst_Trn = list(zip(pstv_rprts[:idx_end], [1]  * idx_end))
        ngt_Trn = list(zip(ngtv_rprts[:idx_end], [0]  * idx_end))

        pst_tst = list(zip(pstv_rprts[idx_end:], [1]  * idx_end))
        ngt_tst = list(zip(ngtv_rprts[idx_end:], [0]  * idx_end))
        
#         X_train, X_test = pstv_rprts[:idx_end], pstv_rprts[idx_end:]

100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 6/6 [00:00<00:00, 176.11it/s]


### Supervised Learning<a id='train'></a>
#### Frozen Model + Classification Layer<a id='cls'></a>

In [34]:
model_name  = "bert-large-cased"
max_length  = 512
device      = "cuda"
epochs      = 5
batch_size  = 16
train_ratio = .7
froze_model_layer = True 

In [None]:
# Set up the training / test data
train_encodings = tokenizer(
    list(X_train), 
    truncation=True, 
    padding=True, 
    max_length=max_length
)

test_encodings = tokenizer(
    list(X_test), 
    truncation=True, 
    padding=True, 
    max_length=max_length
)

# convert our tokenized data into a torch Dataset
train_dataset = HF_Dataset(train_encodings, y_train)
test_dataset  = HF_Dataset(test_encodings, y_test)

In [12]:
# load the tokenizer
tokenizer       = BertTokenizerFast.from_pretrained(model_name, do_lower_case=True)
tokenizer_vocab = list(tokenizer.get_vocab().keys())

model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2).to(device)

# Froze the weight of model aside of the classifier
if froze_model_layer:
    for name, param in model.named_parameters():
        if "classifier" not in name:
            param.requires_grad = False

Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

In [None]:
training_args = TrainingArguments(
    output_dir='./results',                  # output directory
    num_train_epochs=epochs,                 # total number of training epochs
    per_device_train_batch_size=batch_size,  # batch size per device during training
    per_device_eval_batch_size=batch_size,   # batch size for evaluation
    warmup_steps=100,                     # number of warmup steps for learning rate scheduler
    weight_decay=0.01,   
    
    # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    load_best_model_at_end=False,    # load the best model when finished training (default
                                     # metric is loss)
    
    # but you can specify `metric_for_best_model` argument to change 
    # to accuracy or other metric
    logging_steps=400,               # log & save weights each logging_steps
    evaluation_strategy="no",        # evaluate each `logging_steps`
)

trainer = Trainer(
    model = model,                         # the instantiated Transformers model to be trained
    args  = training_args,                 # training arguments, defined above
    train_dataset = train_dataset,         # training dataset
    eval_dataset  = test_dataset,          # evaluation dataset
    compute_metrics = compute_metrics,     # the callback that computes metrics of interest
)

In [None]:
# Evaluate model (sanity check befor training) 
trainer.evaluate()

In [None]:
trainer.train()

#### Frozen Model + Logistic regression (sigmoid layer)<a id='sigmoid'></a>
We will use the embeddin gs extracted from previous notebook as input "frozen" vectors â€“ i.e. computational encodings for each report.

Then, a logistic regression model is trained for each label to predict the extent to which su label is present in a given encoding. 

In [None]:
with open('BERT-Large-Cased_dream_records.npy', 'rb') as f:
    T_encoding = np.load(f)

In [7]:
class LogisticRegression(torch.nn.Module):
    
    """A trainable logistic regression model built from pytorch.
    
    Initilaisation Args:
        input_dim: int, number of dimensions of the input (vector features).
        output_dim: int, number of desired classes to classify (number of label's feature).
    
    Forward args: (training-loop function, i.e. forward step of the model)
        x: tensor having input_dim, batch_size shape.
    
    Output:
        tensor with gradient for backward pass (i.e. back-propagation).
    """
    
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim)
    def forward(self, x):
        outputs = torch.sigmoid(self.linear(x))
        return outputs


In [41]:
# Get training instances and labels
X_train, y_train = torch.Tensor(T_encoding), torch.Tensor(angr[:debug])
train_dataset = list(zip(X_train, y_train))

In [59]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=64, 
                                           shuffle=True)

In [31]:
# get the dimension of the encoding
embedding_dim = T_encoding[0].shape[0]

# pyTorch notation, we need to define a model and a loss funtion to compute the score 
logti_angr = LogisticRegression(embedding_dim, 1) # one logit per feature/emotion
optimizer  = torch.optim.SGD(logti_angr.parameters(), lr=.001)
criterion  = nn.MSELoss()  # loss f(), in this case mean square error

In [67]:
logti_angr = logti_angr.to("cuda")

In [49]:
# in pyTorch, we compute the loss at each step, with 1 or more batches (i.e., inputs)

In [69]:
it, epochs = 0, 5
for epoch in tqdm(range(int(epochs))):
    for i, (seq, score) in enumerate(train_loader):
        
        seq = seq.to('cuda')
        score = score.to('cuda')
        optimizer.zero_grad()
        outputs = logti_angr(seq)
        loss = criterion(outputs, score)
        loss.backward()
        optimizer.step()

#         it+=1
#         if it%500==0:
#             # calculate Accuracy
#             correct = 0
#             total = 0
#             for images, labels in test_loader:
#                 images = Variable(images.view(-1, 28*28))
#                 outputs = model(images)
#                 _, predicted = torch.max(outputs.data, 1)
#                 total+= labels.size(0)
#                 # for gpu, bring the predicted and labels back to cpu fro python operations to work
#                 correct+= (predicted == labels).sum()
#             accuracy = 100 * correct/total
    print("Iteration: {} Loss: {:.4f}".format(epoch, loss.item()))

100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5/5 [00:00<00:00, 68.94it/s]

Iteration: 0 Loss: 0.0233
Iteration: 1 Loss: 0.0187
Iteration: 2 Loss: 0.0160
Iteration: 3 Loss: 0.0151
Iteration: 4 Loss: 0.0161





#### Frozen Model + Feed-Forward Neural Network (FFNN)<a id='ffnn'></a>
We will use the embeddin gs extracted from previous notebook as input "frozen" vectors â€“ i.e. computational encodings for each report.

Then, a one-hidden-layer feed forward architecture will classify the input, and predict the score of the given label, in terms of "output cell".

In [None]:
class FF_Network(nn.Module):
    
    """A trainable Feedâ€“Forwad model built from pytorch.

    Initilaisation Args:
        input_dim: int, number of dimensions of the input (vector features).
        hidden_dimensions: int, number of features to reduce the input in
        output_classe: int, number of desired classes to classify (number of label's feature).

    Forward args: (training-loop function, i.e. forward step of the model)
        x: tensor having input_dim, batch_size shape.

    Output:
        tensor with gradient for backward pass (i.e. back-propagation).
    """
    
    def __init__(self, input_dim, hidden_dimensions, output_classes):
        super(FF_Network, self).__init__()
    
        # Inputs to hidden layer linear transformation
        self.hidden = nn.Linear(input_dim, hidden_dimensions)
        # Output layer, 10 units - one for each digit
        self.output = nn.Linear(hidden_dimensions, output_classes)
        
        # Define sigmoid activation and softmax output 
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = self.hidden(x)
        x = self.sigmoid(x)
        x = self.output(x)
        x = self.softmax(x)
        
        return x