In [46]:
#IMPORT STATEMENTS
import numpy as np
import pandas as pd
import tensorflow as tf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
!pip install datasets transformers loralib
import loralib as lora
from datasets import load_dataset
from transformers import TFAutoModel, AutoTokenizer
from torch.utils.data import Dataset, DataLoader



In [47]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForMaskedLM

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = TFAutoModel.from_pretrained("bert-base-uncased")
dataset = load_dataset("sst2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT

In [48]:
#VARS
seed_value = 45

# 1. Set the seed for NumPy
np.random.seed(seed_value)

# 2. Set the seed for Python's built-in random module
import random
random.seed(seed_value)
torch.manual_seed(seed_value)
# 3. Set the seed for TensorFlow
tf.random.set_seed(seed_value)
rank = 128
BATCH_SIZE = 512
feature_size = 66  # BERT embedding size
output_size = 128
num_epoch = 3

In [49]:
#FUNCTIONS

def tokenization(example, max_length=66):
    return tokenizer(
        example["sentence"],
        truncation=True,
        padding='max_length',  # or 'do_not_pad'
        return_tensors='pt',
        max_length=max_length
    )

def order(inp):
    '''
    This function will group all the inputs of BERT
    into a single dictionary and then output it with
    labels.
    '''
    data = list(inp.values())
    return {
        'input_ids': data[1],
        'attention_mask': data[2],
        'token_type_ids': data[3]
    }, data[0]

In [50]:
tokenized_train = dataset['train'].map(tokenization, batched = True)
tokenized_train.set_format('tf',
                            columns=['input_ids', 'attention_mask', 'token_type_ids', 'label'])

tokenized_train = tf.data.Dataset.from_tensor_slices(tokenized_train[:])
tokenized_train = tokenized_train.batch(BATCH_SIZE).shuffle(1000)
tokenized_train = tokenized_train.map(order, num_parallel_calls=tf.data.AUTOTUNE)
# # model(tokenized_train)
# exe = tokenizer(dataset['train']['sentence'],padding=True,truncation=True,max_length = 512,return_tensors='pt')
# # exe['']

In [51]:
A = tf.Variable(tf.random.normal(shape=(feature_size, rank), stddev=0.01))
B = tf.Variable(tf.random.normal(shape=(rank, output_size), stddev=0.01))

# Define the LoRA model
class LoRAModel(nn.Module):
    def __init__(self, bert_model,rank):
        super(LoRAModel, self).__init__()
        # # Define your layers here
        # self.B = nn.Parameter(torch.zeros((66,rank)))
        # self.A = nn.Parameter(torch.zeros((rank,128)))
        # nn.init.normal_(self.A, mean=0, std=1)
        # self.B = nn.Parameter(torch.randn((66, rank)) * 0.01)
        # self.A = nn.Parameter(torch.randn((rank, 128)) * 0.01)
        self.lora_layer = lora.Linear(66, 728, r=rank)
        # self.layer1 = nn.Linear(in_features=64, out_features=rank)
        # self.layer2 = nn.Linear(in_features=rank, out_features=128)
        self.layer1 = nn.Linear(728,2)
        self.bert = bert_model
        self.bert.bert._trainable = False
        self.const = 1
        # self.batch_norm = nn.BatchNorm1d(output_size)

    def forward(self, x):
        # Forward pass
        # AB = torch.matmul(self.B, self.A)
        # print(AB)
        # x1 = torch.matmul(torch.from_numpy((x['input_ids']).numpy()).float(),AB)
        x1 = self.lora_layer(torch.from_numpy((x['input_ids']).numpy()).float())
        mean_x1 = x1.mean(dim=1, keepdim=True)
        std_x1 = x1.std(dim=1, keepdim=True)
        x1 = (x1 - mean_x1) / (std_x1 + 1e-8)  # Adding a small epsilon for numerical stability
        model_soln = torch.from_numpy(self.bert(x)[1].numpy())
        mean_model = model_soln.mean(dim=1, keepdim=True)
        std_model = model_soln.std(dim=1, keepdim=True)
        model_soln = (model_soln - mean_model)/(std_model + 1e-8)
        # print(x1)
        # x2 = self.layer2(x1)
        x_final = model_soln + x1*self.const
        # x_final = self.batch_norm(x_final)
        x3 = self.layer1(x_final)
        # print(x1)
        x3 = torch.exp(x3)
        my_sum = torch.sum(x3,dim=1, keepdim=True)
        return x3/my_sum

In [52]:
model.bert._trainable = False
exe = tokenizer(['Hello world', 'Hi how are you'], padding=True, truncation=True,max_length = 66,
                  return_tensors='tf')
LORA_model = LoRAModel(model,rank)
safe = LORA_model.bert(exe) #for future assert
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(LORA_model.parameters(), lr=2e-2)


In [53]:
LORA_model.train()
for epoch in range(num_epoch):
    itr = 0
    for batch in tokenized_train:
      sz = np.shape(batch[1])[0]
      label_arr = np.zeros((sz, 2))
      label_arr[np.arange(sz), batch[1]] = 1
      label_arr = torch.from_numpy(label_arr)
      optimizer.zero_grad()
      outputs = LORA_model.forward(batch[0])
      loss = criterion(outputs, label_arr)
      # print(outputs)
      loss.backward()
      # torch.nn.utils.clip_grad_norm_(LORA_model.A, max_norm=1.0)
      # torch.nn.utils.clip_grad_norm_(LORA_model.B, max_norm=1.0)
      optimizer.step()
      # for name, param in LORA_model.named_parameters():
      #   if torch.isnan(param).any():
      #     print(f'NaN found in parameter: {name}')
      itr += 1
      if(itr % 10 == 0):
        print(f'Epoch [{epoch + 1}/{num_epoch}], Loss: {loss.item()}')

RuntimeError: ignored

In [None]:
tokenized_test = dataset['validation'].map(tokenization, batched = True)
tokenized_test.set_format('tf',
                            columns=['input_ids', 'attention_mask', 'token_type_ids', 'label'])

tokenized_test = tf.data.Dataset.from_tensor_slices(tokenized_test[:])
tokenized_test = tokenized_test.batch(BATCH_SIZE).shuffle(1000)
tokenized_test = tokenized_test.map(order, num_parallel_calls=tf.data.AUTOTUNE)
LORA_model.eval()  # Set the model to evaluation mode
total_correct = 0
total_samples = 0

with torch.no_grad():  # Disable gradient computation during testing
    for batch in tokenized_test:
        soln = batch[1].numpy()
        outputs = LORA_model(batch[0])
        ans = np.zeros_like(soln)
        for i in range(np.shape(soln)[0]):
          if(outputs[i][0] < outputs[i][1]):
            ans[i] = 1
        for i in range(np.shape(soln)[0]):
          if(ans[i] == soln[i]):
            total_correct += 1
          total_samples += 1

accuracy = total_correct / total_samples
print(f'Test Accuracy: {accuracy * 100:.2f}%')


In [None]:
trainable_params_count = 0

# Assuming model.x() returns a list of trainable weights
for weight_tensor in model.weights:
    trainable_params_count += tf.reduce_prod(tf.shape(weight_tensor)).numpy()

print(f"Number of trainable parameters: {trainable_params_count}")
num_params = trainable_params_count + 728 * 2

print(f"Number of trainable parameters in BERT model for classification: {num_params}")

LORA_params = 66 * rank + rank * 728 + 728 * 2

print(f"Number of trainable parameters in LoRA implemented model for classification: {LORA_params}")

percent = (LORA_params/num_params) * 100

print(f"Percentage: {percent:.2f}")


In [None]:
# exe = tokenizer(["Hello World I hope you are good"],padding=True,truncation=True,max_length = 512,return_tensors='tf')
# LORA_model = LoRAModel(model,rank)
# safe = LORA_model.bert(exe) #for future assert
# dir(safe)