In [1]:
!pip install transformers --quiet
!pip install accelerate --quiet
!pip install torch --quiet

[0m

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
model_path = "inception-mbzuai/jais-13b"

device = "cuda:0" if torch.cuda.is_available() else "cpu"
print("Available device: ", device)

Available device:  cuda:0


In [2]:
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True, offload_folder="offload")

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [3]:
def get_response(text,tokenizer=tokenizer,model=model):
    input_ids = tokenizer(text, return_tensors="pt").input_ids
    inputs = input_ids.to(device)
    input_len = inputs.shape[-1]

    generate_ids = model.generate(
        inputs,
        top_p=0.9,
        temperature=0.3,
        max_length=200-input_len,
        min_length=input_len + 4,
        repetition_penalty=1.2,
        do_sample=True,
    )
    response = tokenizer.batch_decode(
        generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
    )[0]
    return response

## Basic Model inference

In [6]:
import time
# English prediction
text= "عاصمة دولة الإمارات العربية المتحدة ه"
start = time.time()
arabic_response = get_response(text)
print(arabic_response)
print("Arabic Response inference time: ", round(time.time()-start,4), " Seconds\n")

print("="*30)

# Arabic prediction
start = time.time()
text = "The capital of UAE is "
English_response = get_response(text)
print(English_response)
print("English Response inference time: ", round(time.time()-start,4), " Seconds")

عاصمة دولة الإمارات العربية المتحدة هية مدينة أبو ظبي, وهي أكبر مدن الدولة من حيث المساحة وعدد السكان. تقع على جزيرة أبوظبي التي تحوي مقر رئاسة الدولة ورئاسة مجلس الوزراء والسفارات المعتمدة لدى دولة الإمارات. كما أنها عاصمة المنطقة الشرقية لإمارة أبوظبي والتي تشمل مدنا مثل العين والمنطقة الغربية (ليوا) وجزيرة دلما وجزر القرم الشرقي.
Arabic Response inference time:  5.4609  Seconds

The capital of UAE is  Abu Dhabi.
Abu Dhabi has a population of 1,738,000 (2016).
It was the largest city in the United Arab Emirates and its most populous metropolitan area until Dubai surpassed it on 31 December 2008 with 2,912,869 residents according to the latest census data from 2005. The two cities are now tied at 3 million each as per the 2010 Census results released by Statistics Centre Abu Dhabi.

Dubai's rapid growth over recent years means that it is expected to overtake Abu Dhabi again soon; however, this will not happen for several decades due to the fact that the majority of people living withi

## Model Arch

In [7]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"Trainable model parameters: {trainable_model_params}\nAll model parameters: {all_model_params}\nPercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

Trainable model parameters: 13020811960
All model parameters: 13020811960
Percentage of trainable model parameters: 100.00%


# Layer replacement

In [8]:
for k, m in model.named_modules():
  if type(m).__name__ == 'Linear':
    print(k,m)
    print(k,m.parameters)
  else:
    pass

lm_head Linear(in_features=5120, out_features=84992, bias=False)
lm_head <bound method Module.parameters of Linear(in_features=5120, out_features=84992, bias=False)>


In [9]:
for param in model.parameters():
    param.requires_grad = False

In [10]:
for name, layer in model.named_modules():
    if isinstance(layer, torch.nn.Linear):
      model._modules[name] = torch.nn.Linear(5120, 84992, bias = False)

In [11]:
for k, m in model.named_modules():
  if type(m).__name__ == 'Linear':
    print(k,m)
    print(k,m.parameters)
  else:
    pass

lm_head Linear(in_features=5120, out_features=84992, bias=False)
lm_head <bound method Module.parameters of Linear(in_features=5120, out_features=84992, bias=False)>


In [12]:
print(print_number_of_trainable_model_parameters(model))

Trainable model parameters: 435159040
All model parameters: 13455971000
Percentage of trainable model parameters: 3.23%


In [13]:
model.save_pretrained("updated_layer_model")

In [14]:
tokenizer = AutoTokenizer.from_pretrained(model_path)
new_model = AutoModelForCausalLM.from_pretrained("updated_layer_model", 
                                                 device_map="auto", 
                                                 trust_remote_code=True, 
                                                 offload_folder="offload")

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [15]:
def get_response(text,tokenizer=tokenizer,model=model):
    input_ids = tokenizer(text, return_tensors="pt").input_ids
    inputs = input_ids.to(device)
    input_len = inputs.shape[-1]

    generate_ids = new_model.generate(
        inputs,
        top_p=0.9,
        temperature=0.3,
        max_length=200-input_len,
        min_length=input_len + 4,
        repetition_penalty=1.2,
        do_sample=True,
    )
    response = tokenizer.batch_decode(
        generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
    )[0]
    return response

In [16]:
import time
# English prediction
text= "عاصمة دولة الإمارات العربية المتحدة ه"
start = time.time()
arabic_response = get_response(text)
print(arabic_response)
print("Arabic Response inference time: ", round(time.time()-start,4), " Seconds\n")

print("="*30)

# Arabic prediction
start = time.time()
text = "The capital of UAE is "
English_response = get_response(text)
print(English_response)
print("English Response inference time: ", round(time.time()-start,4), " Seconds")

عاصمة دولة الإمارات العربية المتحدة ه هي أبو ظبي.
Arabic Response inference time:  2.7155  Seconds

The capital of UAE is  Abu Dhabi, which has a population of 1.5 million people and the second largest city in the country after Dubai with 2.2 million residents (the total number of Emirati citizens living in both cities was estimated at around 3.3 million).

Abu Dhabi's economy relies heavily on oil exports; however, it also features an extensive free trade zone that allows for foreign companies to operate without having to pay taxes or import duties until they export their goods from the region. The government plans to diversify its economy away from petroleum by investing more than $200 billion over the next five years into various industries such as tourism, real estate development, finance, aerospace manufacturing, and telecommunications. In addition, the emirate hosts several international conferences each year including the Formula One Grand Prix held annually since 2009, the Worl

# Complete Methodolgy



1.   Read Model
2.   Freeze All layers
3.   Put my Layer (RBF)
4.   Fine tune only that layer on summarization
5.   Fine tune the original
6.   Compare accuracy (ROUGE) and training time

