# IA3

## I. Presentation

Infused Adapter by Inhibiting and Amplifying Inner Activations

In [5]:
from IPython.display import Image
Image(url='https://pbs.twimg.com/media/FShJxGtXEAAE09o.jpg:large', width=400)

## II. Example

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # or "0,1" for multiple GPUs
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
# prepare task, we use the same example as butfit

import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer

ckp_data = "yahma/alpaca-cleaned"
ckp = "bigscience/bloomz-1b1"

# load dataset
data = load_dataset(ckp_data, split="train[:1000]")

# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(ckp)

# process data
def process(sample):

    MAX_LEN = 256

    human = tokenizer("Human: " + "\n".join([sample["instruction"], sample["input"]]).strip() + "\n\nAssistant: ")
    ml = tokenizer(sample["output"] + tokenizer.eos_token)

    input_ids = human["input_ids"] + ml["input_ids"]
    attention_mask = human["attention_mask"] + ml["attention_mask"]
    labels = [-100] * len(human["input_ids"]) + ml["input_ids"]

    if len(input_ids) > MAX_LEN:

        input_ids = input_ids[:MAX_LEN]
        attention_mask = attention_mask[:MAX_LEN]
        labels = labels[:MAX_LEN]

    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

# tokenize dataset
tokenized_data = data.map(process, remove_columns=data.column_names)

# load model
model = AutoModelForCausalLM.from_pretrained(ckp, low_cpu_mem_usage=True)

# send to device
if torch.cuda.is_available():
    model = model.to("cuda:0")

2024-06-25 16:45:59.165432: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-25 16:45:59.165490: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-25 16:45:59.167726: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-25 16:45:59.179693: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## III. IA3

In [3]:
from peft import IA3Config, TaskType, get_peft_model

config = IA3Config(task_type=TaskType.CAUSAL_LM)
config

IA3Config(peft_type=<PeftType.IA3: 'IA3'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, inference_mode=False, target_modules=None, feedforward_modules=None, fan_in_fan_out=False, modules_to_save=None, init_ia3_weights=True)

In [4]:
peft_model = get_peft_model(model, config)
peft_model

PeftModelForCausalLM(
  (base_model): IA3Model(
    (model): BloomForCausalLM(
      (transformer): BloomModel(
        (word_embeddings): Embedding(250880, 1536)
        (word_embeddings_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
        (h): ModuleList(
          (0-23): 24 x BloomBlock(
            (input_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
            (self_attention): BloomAttention(
              (query_key_value): Linear(
                (base_layer): Linear(in_features=1536, out_features=4608, bias=True)
                (ia3_l): ParameterDict(  (default): Parameter containing: [torch.cuda.FloatTensor of size 4608x1 (cuda:0)])
              )
              (dense): Linear(in_features=1536, out_features=1536, bias=True)
              (attention_dropout): Dropout(p=0.0, inplace=False)
            )
            (post_attention_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
            (mlp): BloomMLP(
      

In [5]:
peft_model.print_trainable_parameters()

trainable params: 258,048 || all params: 1,065,572,352 || trainable%: 0.0242


In [6]:
# in the article, they use lr=3e-3
# with this lr, the training converges more effectively

# define training arguments
args = TrainingArguments(
    output_dir="../tmp/checkpoint",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    logging_steps=50,
    num_train_epochs=3,
    learning_rate=3e-3
)

# define trainer
trainer = Trainer(
    model=peft_model,
    args=args,
    train_dataset=tokenized_data,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True)
)

# train
trainer.train()

Step,Training Loss
50,2.2402
100,1.9145
150,1.8771
200,1.9345
250,1.7417
300,1.7664
350,1.747


TrainOutput(global_step=375, training_loss=1.874089599609375, metrics={'train_runtime': 344.1672, 'train_samples_per_second': 8.717, 'train_steps_per_second': 1.09, 'total_flos': 1670689992499200.0, 'train_loss': 1.874089599609375, 'epoch': 3.0})

In [12]:
def generate(_model, _tokenizer, instruction, input=None):

    prompt = "human: {}\n{}".format(instruction, input).strip() + "\n\nAssistant: "
    inputs = _tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].to(_model.device)

    generation_output = _model.generate(
        input_ids=input_ids,
        output_scores=True,
        max_new_tokens=256, 
        do_sample=True
    )
    for seq in generation_output:
        output = tokenizer.decode(seq, skip_special_tokens=True)
        print(output)

generate(peft_model, tokenizer, "List five steps for comparing two products.")

human: List five steps for comparing two products.
None

Assistant: 1. Know the product characteristics and benefits. 
2. Know the characteristics and benefits of the opposite (i.e. contrast).
3. Identify comparable (same source, product, and process) characteristics and benefits. 4. Compare the benefits. 5. Evaluate the pros and cons of the comparison.
