In [1]:
!pip install transformers --quiet
!pip install accelerate --quiet
!pip install torch --quiet

[0m

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
model_path = "inception-mbzuai/jais-13b"

device = "cuda:0" if torch.cuda.is_available() else "cpu"
print("Available device: ", device)

Available device:  cuda:0


In [2]:
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True, offload_folder="offload")

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [3]:
for param in model.parameters():
    param.requires_grad = False

In [4]:
def get_response(text,tokenizer=tokenizer,model=model):
    input_ids = tokenizer(text, return_tensors="pt").input_ids
    inputs = input_ids.to(device)
    input_len = inputs.shape[-1]

    generate_ids = model.generate(
        inputs,
        top_p=0.9,
        temperature=0.3,
        max_length=200-input_len,
        min_length=input_len + 4,
        repetition_penalty=1.2,
        do_sample=True,
    )
    response = tokenizer.batch_decode(
        generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
    )[0]
    return response

## Basic Model inference

In [5]:
import time
# English prediction
text= "عاصمة دولة الإمارات العربية المتحدة ه"
start = time.time()
arabic_response = get_response(text)
print(arabic_response)
print("Arabic Response inference time: ", round(time.time()-start,4), " Seconds\n")

print("="*30)

# Arabic prediction
start = time.time()
text = "The capital of UAE is "
English_response = get_response(text)
print(English_response)
print("English Response inference time: ", round(time.time()-start,4), " Seconds")

عاصمة دولة الإمارات العربية المتحدة هيمدينة أبوظبي, وهي أكبر مدينة في البلاد. تقع على جزيرة أبو ظبي وتحيط بها المياه الزرقاء للخليج العربي. المدينة هي موطن للعديد من المعالم السياحية الشهيرة مثل مسجد الشيخ زايد الكبير ومتحف اللوفر وقصر الوطن وغيرها الكثير.
Arabic Response inference time:  4.8393  Seconds

The capital of UAE is  Abu Dhabi. The population was estimated at 8,634,000 in 2015 and the country's total area is 83,600 km2 (32,100 sq mi).

Abu Dhabi has a temperate climate with hot summers from April to October; winters are cool but dry between November and March. Annual rainfall averages around 100 mm (4 inches) per year.

The economy of Abu Dhabi depends on oil production which accounts for about 90% of its gross domestic product (GDP), as well as tourism and real estate development. Its main exports include petroleum products such as crude oil, natural gas condensates, liquefied natural gas, refined petroleum products including gasoline, diesel fuel, kerosene, jet fuels, lubr

# RBF

In [6]:
from rbf_layer import RBFLayer

## Model Arch

In [7]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"Trainable model parameters: {trainable_model_params}\nAll model parameters: {all_model_params}\nPercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

Trainable model parameters: 0
All model parameters: 13020811960
Percentage of trainable model parameters: 0.00%


# Layer replacement

In [8]:
for k, m in model.named_modules():
  if type(m).__name__ == 'Linear':
    print(k,m)
    print(k,m.parameters)
  else:
    pass

lm_head Linear(in_features=5120, out_features=84992, bias=False)
lm_head <bound method Module.parameters of Linear(in_features=5120, out_features=84992, bias=False)>


In [9]:
for param in model.parameters():
    param.requires_grad = False

In [10]:
def l_norm(x, p=2):
    return torch.norm(x, p=p, dim=-1)


# Gaussian RBF
def rbf_gaussian(x):
    return (-x.pow(2)).exp()
    
for name, layer in model.named_modules():
    if isinstance(layer, torch.nn.Linear):
      model._modules[name] = RBFLayer(in_features_dim = 5120, 
                                      out_features_dim = 84992, 
                                      num_kernels = 5, 
                                      radial_function=rbf_gaussian,
                                     norm_function=l_norm)

In [11]:
for k, m in model.named_modules():
  if type(m).__name__ == 'Linear':
    print(k,m)
    print(k,m.parameters)
  else:
    pass

In [12]:
print(print_number_of_trainable_model_parameters(model))

Trainable model parameters: 450565
All model parameters: 13021262525
Percentage of trainable model parameters: 0.00%


In [13]:
model.save_pretrained("updated_layer_model")

In [14]:
tokenizer = AutoTokenizer.from_pretrained(model_path)
new_model = AutoModelForCausalLM.from_pretrained("updated_layer_model", 
                                                 device_map="auto", 
                                                 trust_remote_code=True, 
                                                 offload_folder="offload")

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

Some weights of the model checkpoint at updated_layer_model were not used when initializing JAISLMHeadModel: ['lm_head.weights', 'lm_head.kernels_centers', 'lm_head.log_shapes']
- This IS expected if you are initializing JAISLMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing JAISLMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [17]:
for param in new_model.parameters():
    param.requires_grad = False

print(print_number_of_trainable_model_parameters(new_model))

Trainable model parameters: 0
All model parameters: 13020811960
Percentage of trainable model parameters: 0.00%


In [18]:
def get_response(text,tokenizer=tokenizer,model=model):
    input_ids = tokenizer(text, return_tensors="pt").input_ids
    inputs = input_ids.to(device)
    input_len = inputs.shape[-1]

    generate_ids = new_model.generate(
        inputs,
        top_p=0.9,
        temperature=0.3,
        max_length=200-input_len,
        min_length=input_len + 4,
        repetition_penalty=1.2,
        do_sample=True,
    )
    response = tokenizer.batch_decode(
        generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
    )[0]
    return response

In [21]:
del model

In [22]:
import time
# English prediction
text= "عاصمة دولة الإمارات العربية المتحدة ه"
start = time.time()
arabic_response = get_response(text)
print(arabic_response)
print("Arabic Response inference time: ", round(time.time()-start,4), " Seconds\n")

print("="*30)

# Arabic prediction
start = time.time()
text = "The capital of UAE is "
English_response = get_response(text)
print(English_response)
print("English Response inference time: ", round(time.time()-start,4), " Seconds")

عاصمة دولة الإمارات العربية المتحدة هوجمت من قبل الإرهابيين.
Arabic Response inference time:  3.7902  Seconds

The capital of UAE is  Abu Dhabi.

History 

Abu Dhabi was founded in the 18th century by Al-Nahyan tribe, a branch of Bani Yas tribe which originated from Oman and settled on the coasts of what would become known as United Arab Emirates (UAE). The city's name derives from its location at the foot of Mount Dhafra, meaning "the abode". In 1761, Sheikh Zayed bin Khalifa Al Nahyan established his residence here after he moved away from Dubai to escape an attack by pirates who had been harassing him for years. He then began to build up the town with the help of other members of the ruling family. By 1820, it became one of the most important ports in the region due to its strategic position between India and Africa; this attracted traders from all over the world including Britain, France, Germany, Portugal
English Response inference time:  101.2503  Seconds


# Complete Methodolgy



1.   Read Model
2.   Freeze All layers
3.   Put my Layer (RBF)
4.   Fine tune only that layer on summarization
5.   Fine tune the original
6.   Compare accuracy (ROUGE) and training time

