In [None]:
# upload JSONL dataset
from google.colab import files
uploaded = files.upload()

Saving finetune_data_cleaned.jsonl to finetune_data_cleaned.jsonl


In [None]:
# hugging face token
!pip install -q huggingface_hub
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
!pip install -q transformers accelerate peft datasets bitsandbytes

In [None]:
# load TinyLlama-1.1B-Chat-v1.0

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from peft import LoraConfig, get_peft_model, TaskType

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Load model in 4-bit
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)


In [None]:
# set up LoRA config
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 2,252,800 || all params: 1,102,301,184 || trainable%: 0.2044




In [None]:
# load dataset
import pandas as pd
from datasets import Dataset
import os

# Load the data using pandas
df = pd.read_json(os.path.abspath("finetune_data_cleaned.jsonl"), lines=True)

# Convert the pandas DataFrame to a Dataset object
dataset = Dataset.from_pandas(df)

# No test split needed for fine-tuning, access the dataset directly

In [None]:
# tokenize
def format(example):
    return f"### Question: {example['prompt']}\n### Answer: {example['response']}"

def tokenize(example):
    prompt = format(example)
    tokens = tokenizer(prompt, padding="max_length", truncation=True, max_length=512)
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = dataset.map(tokenize)

Map:   0%|          | 0/248 [00:00<?, ? examples/s]

In [None]:
# train!
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./skinc-finetuned-tinyllama",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_steps=10,
    learning_rate=2e-4,
    fp16=True,
    save_strategy="epoch",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,6.8713
20,0.6248
30,0.3315
40,0.2994


TrainOutput(global_step=48, training_loss=1.7406415790319443, metrics={'train_runtime': 166.1404, 'train_samples_per_second': 4.478, 'train_steps_per_second': 0.289, 'total_flos': 2369597328064512.0, 'train_loss': 1.7406415790319443, 'epoch': 3.0})

In [None]:
# save model
model.save_pretrained("skinc-finetuned-tinyllama")
tokenizer.save_pretrained("skinc-finetuned-tinyllama")

('skinc-finetuned-tinyllama/tokenizer_config.json',
 'skinc-finetuned-tinyllama/special_tokens_map.json',
 'skinc-finetuned-tinyllama/chat_template.jinja',
 'skinc-finetuned-tinyllama/tokenizer.model',
 'skinc-finetuned-tinyllama/added_tokens.json',
 'skinc-finetuned-tinyllama/tokenizer.json')

In [None]:
# load the fine-tuned model and tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

model_dir = "skinc-finetuned-tinyllama"

model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = PeftModel.from_pretrained(model, model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

# set device to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)



PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_fe

In [None]:
# test the model
prompt = "### Question: What is Vitamin C good for?\n### Answer:"
input_ids = tokenizer(prompt, return_tensors="pt").to(device)

# Use the custom stopping criteria
output = model.generate(**input_ids, max_new_tokens=200, stopping_criteria=stop_criteria) # Increased max_new_tokens to potentially get a second marker

# Decode the entire output
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)

# Find the index of the first '### Answer:' marker
answer_start = decoded_output.find("### Answer:")

if answer_start != -1:
    # Find the index of the next '### Question' marker after the first answer
    question_stop = decoded_output.find("### Question", answer_start + len("### Answer:"))
    if question_stop != -1:
        # Print the text between the markers
        print(decoded_output[answer_start + len("### Answer:"):question_stop].strip())
    else:
        # If no subsequent '### Question' is found, print from the answer start to the end
        print(decoded_output[answer_start + len("### Answer:"):].strip())
else:
    # If no '### Answer:' is found, print the whole decoded output
    print(decoded_output.strip())

NameError: name 'tokenizer' is not defined

In [None]:
# push to hugging face hub
repo_id = "stutipandey/lora_finetuned_tinyllama_skinc" # Replace with your desired repository ID

# Push the model and tokenizer to the Hub
model.push_to_hub(repo_id)
tokenizer.push_to_hub(repo_id)

Uploading...:   0%|          | 0.00/9.02M [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

Uploading...:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/stutipandey/lora_finetuned_tinyllama_skinc/commit/06783c90595d7c2810c3e904b6184f0391442263', commit_message='Upload tokenizer', commit_description='', oid='06783c90595d7c2810c3e904b6184f0391442263', pr_url=None, repo_url=RepoUrl('https://huggingface.co/stutipandey/lora_finetuned_tinyllama_skinc', endpoint='https://huggingface.co', repo_type='model', repo_id='stutipandey/lora_finetuned_tinyllama_skinc'), pr_revision=None, pr_num=None)

In [None]:
# Zip the fine-tuned model folder for easier download - ULTIMATELY UNECESSARY, used HF Inference API instead
folder_to_zip = "./fine_tuned_model_for_download" # Make sure this matches the directory where you saved the model
zip_file_name = "fine_tuned_llm.zip" # Specify the name for your zip file

!zip -r {zip_file_name} {folder_to_zip}

print(f"Folder '{folder_to_zip}' has been zipped into '{zip_file_name}'.")
print(f"You can now download '{zip_file_name}' from the file explorer on the left sidebar.")

  adding: fine_tuned_model_for_download/ (stored 0%)
  adding: fine_tuned_model_for_download/special_tokens_map.json (deflated 79%)
  adding: fine_tuned_model_for_download/README.md (deflated 66%)
  adding: fine_tuned_model_for_download/tokenizer.model (deflated 55%)
  adding: fine_tuned_model_for_download/tokenizer_config.json (deflated 68%)
  adding: fine_tuned_model_for_download/chat_template.jinja (deflated 60%)
  adding: fine_tuned_model_for_download/adapter_config.json (deflated 55%)
  adding: fine_tuned_model_for_download/tokenizer.json (deflated 85%)
  adding: fine_tuned_model_for_download/adapter_model.safetensors (deflated 41%)
Folder './fine_tuned_model_for_download' has been zipped into 'fine_tuned_llm.zip'.
You can now download 'fine_tuned_llm.zip' from the file explorer on the left sidebar.
