In [24]:
!pip install -U peft bitsandbytes transformers accelerate



In [25]:
!pip install -U trl



trl -> transformer reinforcement learning

In [26]:
!pip install PyMuPDF



Non-Instruction Fine tuning Dataset

In [27]:
from datasets import load_dataset

ds = load_dataset("wikitext", "wikitext-103-v1", split="train")

In [28]:
ds

Dataset({
    features: ['text'],
    num_rows: 1801350
})

model selection

In [29]:
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

In [30]:
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling

In [31]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [32]:
if tokenizer.pad_token is None:
  tokenizer.pad_token = tokenizer.eos_token

In [33]:
def tokenizer_func(examples):
  tokens = tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)
  tokens['labels'] = tokens['input_ids'].copy()
  return tokens

In [34]:
ds = ds.filter(lambda x: len(x["text"].strip()) > 0)

In [35]:
ds = ds.select(range(1500))

In [36]:
tokenized = ds.map(tokenizer_func, batched=True, remove_columns=ds.column_names)

In [37]:
training_args = TrainingArguments(
    output_dir = "./llama-wiki-domain",
    overwrite_output_dir = True,
    num_train_epochs = 2,
    per_device_train_batch_size = 1,
    save_steps = 100,
    save_total_limit = 2,
    logging_steps = 50,
    learning_rate = 2e-5,
    fp16 = False,
    report_to='none'
)

In [38]:
help(TrainingArguments)

Help on class TrainingArguments in module transformers.training_args:

class TrainingArguments(builtins.object)
 |
 |  TrainingArguments is the subset of the arguments we use in our example scripts **which relate to the training loop
 |  itself**.
 |
 |  Using [`HfArgumentParser`] we can turn this class into
 |  [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
 |  command line.
 |
 |  Parameters:
 |      output_dir (`str`, *optional*, defaults to `"trainer_output"`):
 |          The output directory where the model predictions and checkpoints will be written.
 |      overwrite_output_dir (`bool`, *optional*, defaults to `False`):
 |          If `True`, overwrite the content of the output directory. Use this to continue training if `output_dir`
 |          points to a checkpoint directory.
 |      do_train (`bool`, *optional*, defaults to `False`):
 |          Whether to run training or not. This argument is not directly used 

In [39]:
model = AutoModelForCausalLM.from_pretrained(model_name)

In [40]:
from peft import LoraConfig, get_peft_model, TaskType

In [41]:
lora_config = LoraConfig(
    task_type = TaskType.CAUSAL_LM,
    r = 8,
    lora_alpha = 16,
    target_modules = ['q_proj', 'v_proj'],
    lora_dropout = 0.05,
    bias='none'
)

In [42]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rot

In [43]:
q_lora_model = get_peft_model(model, lora_config)

In [44]:
q_lora_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_feat

In [45]:
trainer = Trainer(
    model = q_lora_model,
    args = training_args,
    train_dataset = tokenized
)

In [46]:
trainer.train()

Step,Training Loss
50,8.8433
100,6.7184
150,2.2668
200,0.7382
250,0.5885
300,0.674
350,0.6716
400,0.6702
450,0.7781
500,0.6082


TrainOutput(global_step=3000, training_loss=0.8592377134958903, metrics={'train_runtime': 2066.3795, 'train_samples_per_second': 1.452, 'train_steps_per_second': 1.452, 'total_flos': 9544447033344000.0, 'train_loss': 0.8592377134958903, 'epoch': 2.0})

In [49]:
model_path = "/content/llama-wiki-domain/checkpoint-3000"

In [50]:
trained_model = AutoModelForCausalLM.from_pretrained(model_path, device_map='auto')

In [63]:
prompt = "In mathematics, a prime number is"

In [64]:
inputs = tokenizer(prompt, return_tensors='pt').to("cuda")

In [65]:
outputs = trained_model.generate(
    **inputs,
    max_new_tokens = 100,
    temperature = 0.8,
    top_p = 0.9,
    do_sample = True,
    repetition_penalty = 1.1
)

In [66]:
print("\n Model Output \n")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


 Model Output 

In mathematics, a prime number is a positive integer which has no factors other than itself and 1. Prime numbers have the property that their factors are not divisible by each other, except for themselves and 1. Mathematical examples of primes include 2 (monochord), 3 (tripod), 5 (cannonball), 7 (safety pin), 11 (golden section), 13 (pentagon) and 19 (diamond). These primes
