In [None]:
pip install transformers peft datasets torch bitsandbytes tf-keras

Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x8

In [None]:
import os
import torch
import json
import logging
import torch.nn as nn
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, TrainingArguments, Trainer, BitsAndBytesConfig, AutoModelForVision2Seq, Blip2Processor
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
import gc

In [None]:
assert torch.cuda.is_available(), "CUDA is not available. Please enable GPU in Colab."
os.environ["WANDB_DISABLED"] = "true"

In [None]:
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

In [None]:
class JSONFormatter(logging.Formatter):
    def format(self, record):
        log_entry = {
            "level": record.levelname,
            "message": record.getMessage(),
            "timestamp": self.formatTime(record, self.datefmt)
        }
        return json.dumps(log_entry)

In [None]:
file_handler = logging.FileHandler("training_log.json", mode="w")
file_handler.setFormatter(JSONFormatter())

In [None]:
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))

In [None]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(file_handler)
logger.addHandler(console_handler)

In [None]:
file_handler.flush()

In [None]:
model_name = "convergence-ai/proxy-lite-3b"
device = "cuda"

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.float16
)

In [None]:
#model = Qwen2VLForConditionalGeneration.from_pretrained(
#    model_name,
#    quantization_config=bnb_config,
#    device_map={"": "cuda"}
#)

model = AutoModelForVision2Seq.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)

processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b", trust_remote_code=True)

logger.info("Proxy-lite-3b Model Loaded Successfully!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/65.4k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.51G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/126 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


preprocessor_config.json:   0%|          | 0.00/432 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/882 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.56M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/68.0 [00:00<?, ?B/s]

2025-03-26 14:48:45,389 - INFO - Proxy-lite-3b Model Loaded Successfully!


In [None]:
lora_config = LoraConfig(
    r=64,
    lora_alpha=128,
    lora_dropout=0.1,
    bias="none",
    target_modules=["q_proj", "v_proj"],
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

In [None]:
model = model.to(device)

In [None]:
dataset = load_dataset("FinLang/investopedia-instruction-tuning-dataset")

README.md:   0%|          | 0.00/4.92k [00:00<?, ?B/s]

train.csv:   0%|          | 0.00/277M [00:00<?, ?B/s]

test.csv:   0%|          | 0.00/30.7M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/206461 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/22940 [00:00<?, ? examples/s]

In [None]:
dataset["train"] = dataset["train"].select(range(500))
dataset["test"] = dataset["test"].select(range(100))

In [None]:
logger.info(f"Dataset column names: {dataset['train'].column_names}")

2025-03-26 14:49:12,314 - INFO - Dataset column names: ['Topic', 'Title', 'Context', 'Question-Answer', 'Question', 'Answer', 'bge-large-en-v1.5-correlation']


In [None]:
possible_text_columns = [col for col in dataset["train"].column_names if "text" in col.lower() or "content" in col.lower()]
if not possible_text_columns:
    raise ValueError("No suitable text column found in dataset!")
text_column = possible_text_columns[0]
logger.info(f"Using text column: {text_column}")

2025-03-26 14:49:12,322 - INFO - Using text column: Context


In [None]:
def tokenize_function(examples):
    inputs = processor(text=examples[text_column], truncation=True, padding="max_length", max_length=512, return_tensors="pt")
    return {
        "input_ids": inputs["input_ids"].squeeze(0),
        "attention_mask": inputs["attention_mask"].squeeze(0),
    }

dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
dataset = dataset["train"].train_test_split(test_size=0.1)

dataset = dataset.map(lambda x: {"input_ids": x["input_ids"], "attention_mask": x["attention_mask"]}, batched=True)

dataset = dataset.remove_columns([text_column])

dataset.set_format("torch")

Map:   0%|          | 0/450 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [None]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs["input_ids"]
        outputs = model(**inputs)
        logits = outputs.logits

        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = labels[..., 1:].contiguous()

        loss_fct = nn.CrossEntropyLoss()
        loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

        return (loss, outputs) if return_outputs else loss

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
    push_to_hub=False,
    gradient_accumulation_steps=128,
    fp16=True,
    logging_steps=10,
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [None]:
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,No log
2,No log,No log
3,5.963600,No log


TrainOutput(global_step=15, training_loss=5.58084233601888, metrics={'train_runtime': 837.458, 'train_samples_per_second': 2.687, 'train_steps_per_second': 0.018, 'total_flos': 1.957265801006285e+16, 'train_loss': 5.58084233601888, 'epoch': 3.8533333333333335})

In [None]:
model.save_pretrained("./fine_tuned_proxy_lite")
processor.save_pretrained("./fine_tuned_proxy_lite")
logger.info("Fine-tuning completed and model saved!")

2025-03-26 15:13:09,736 - INFO - Fine-tuning completed and model saved!


In [None]:
# Evaluate Model
eval_results = trainer.evaluate()
logger.info(f"Evaluation Results: {eval_results}")

2025-03-26 15:13:19,334 - INFO - Evaluation Results: {'eval_runtime': 9.5776, 'eval_samples_per_second': 5.221, 'eval_steps_per_second': 5.221, 'epoch': 3.8533333333333335}


In [None]:
# Check Training Logs
logger.info(f"Training Log History: {trainer.state.log_history}")

2025-03-26 15:13:19,341 - INFO - Training Log History: [{'eval_runtime': 9.6337, 'eval_samples_per_second': 5.19, 'eval_steps_per_second': 5.19, 'epoch': 1.0, 'step': 4}, {'eval_runtime': 9.6553, 'eval_samples_per_second': 5.179, 'eval_steps_per_second': 5.179, 'epoch': 2.0, 'step': 8}, {'loss': 5.9636, 'grad_norm': 9.858227729797363, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.568888888888889, 'step': 10}, {'eval_runtime': 9.6645, 'eval_samples_per_second': 5.174, 'eval_steps_per_second': 5.174, 'epoch': 3.0, 'step': 12}, {'eval_runtime': 9.6346, 'eval_samples_per_second': 5.19, 'eval_steps_per_second': 5.19, 'epoch': 3.8533333333333335, 'step': 15}, {'train_runtime': 837.458, 'train_samples_per_second': 2.687, 'train_steps_per_second': 0.018, 'total_flos': 1.957265801006285e+16, 'train_loss': 5.58084233601888, 'epoch': 3.8533333333333335, 'step': 15}, {'eval_runtime': 9.5776, 'eval_samples_per_second': 5.221, 'eval_steps_per_second': 5.221, 'epoch': 3.8533333333333335, 'step'

In [None]:
def generate_response(prompt):
    inputs = processor(text=prompt, images=None, truncation=True, padding="max_length", max_length=512, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=50)

    return processor.batch_decode(output, skip_special_tokens=True)[0]

logger.info("Before fine-tuning: " + generate_response("Explain stock market liquidity."))


Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.
2025-03-26 15:13:23,121 - INFO - Before fine-tuning: Explain stock market liquidity. around next is on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on on


In [None]:
del model, trainer, dataset
gc.collect()
torch.cuda.empty_cache()

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.float16
)

# Reload the model with quantization
#model = Qwen2VLForConditionalGeneration.from_pretrained(
#    "./fine_tuned_ui_tars",
#    quantization_config=bnb_config,
#    device_map="auto"  # Distributes across available GPUs
#)

#processor = AutoProcessor.from_pretrained("./fine_tuned_ui_tars")

model = AutoModelForVision2Seq.from_pretrained("fine_tuned_ui_tars", torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)

processor = Blip2Processor.from_pretrained("fine_tuned_ui_tars", trust_remote_code=True)

OSError: fine_tuned_ui_tars is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [None]:
final_response = generate_response("Explain stock market liquidity.")
logger.info(f"After reloading fine-tuned model: {final_response}")

NameError: name 'model' is not defined