In [None]:
!pip install transformers --quiet
!pip install accelerate --quiet
!pip install torch --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m59.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m105.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: Operation cancelled by user[0m[31m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
model_path = "inception-mbzuai/jais-13b"

device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True, offload_folder="offload")

In [None]:
def get_response(text,tokenizer=tokenizer,model=model):
    input_ids = tokenizer(text, return_tensors="pt").input_ids
    inputs = input_ids.to(device)
    input_len = inputs.shape[-1]

    generate_ids = model.generate(
        inputs,
        top_p=0.9,
        temperature=0.3,
        max_length=200-input_len,
        min_length=input_len + 4,
        repetition_penalty=1.2,
        do_sample=True,
    )
    response = tokenizer.batch_decode(
        generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
    )[0]
    return response

## Basic Model inference

In [None]:
import time
# English prediction
text= "عاصمة دولة الإمارات العربية المتحدة ه"
start = time.time()
arabic_response = get_response(text)
print(arabic_response)
print("Arabic Response inference time: ", round(time.time()-start,4), " Seconds")
print("="*30)

# Arabic prediction
start = time.time()
text = "The capital of UAE is "
English_response = get_response(text)
print(English_response)
print("English Response inference time: ", round(time.time()-start,4), " Seconds")

عاصمة دولة الإمارات العربية المتحدة هيمدينة أبوظبي, وهي مدينة عصرية تتميز بناطحات السحاب الحديثة. أما دبي فهي مركز الأعمال في البلاد, كما أنها تضم أعلى مبنى في العالم وهو برج خليفة الذي يبلغ ارتفاعه 828 مترا.
Arabic Response inference time:  398.2313  Seconds
The capital of UAE is  Abu Dhabi.

Geography and climate 

Abu Dhabi has a hot desert climate (Köppen: BWh) with long, very dry summers and short winters that are warm to moderately cool; the average annual temperature in Abu Dhabi city is. The hottest month on record was June 2018 when the mean maximum daily air temperature reached, while the coldest month was January 2019 at. Temperatures above  occur for an average of 15 days per year, while temperatures below freezing only happen once every three years or so. Annual rainfall averages around  but can be as low as zero during droughts such as the one which occurred between 2007–2009. Most precipitation occurs from October through March, averaging about  annually. In summer, dayt

## Model Arch

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"Trainable model parameters: {trainable_model_params}\nAll model parameters: {all_model_params}\nPercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

Trainable model parameters: 13020811960
All model parameters: 13020811960
Percentage of trainable model parameters: 100.00%


# Layer replacement

In [None]:
for k, m in model.named_modules():
  if type(m).__name__ == 'Linear':
    print(k,m)
    print(k,m.parameters)
  else:
    pass

lm_head Linear(in_features=5120, out_features=84992, bias=False)
lm_head <bound method Module.parameters of Linear(in_features=5120, out_features=84992, bias=False)>


In [None]:
for k, m in model.named_modules():
  if type(m).__name__ == 'Linear':
    print(k,m)
    print(k,m.parameters)
  else:
    pass

lm_head Linear(in_features=5120, out_features=84992, bias=False)
lm_head <bound method Module.parameters of Linear(in_features=5120, out_features=84992, bias=False)>


In [None]:
for name, layer in model.named_modules():
    if isinstance(layer, torch.nn.Linear):
      model._modules[name] = torch.nn.Linear(5120, 84992, bias = False)

In [None]:
for param in model.parameters():
    param.requires_grad = True

In [None]:
print(print_number_of_trainable_model_parameters(model))

Trainable model parameters: 13456055992
All model parameters: 13456055992
Percentage of trainable model parameters: 100.00%


# Complete Methodolgy



1.   Read Model
2.   Freeze All layers
3.   Put my Layer (RBF)
4.   Fine tune only that layer on summarization
5.   Fine tune the original
6.   Compare accuracy (ROUGE) and training time

