In [None]:
!pip install transformers peft torch ijson gdown
!pip install -U datasets
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from transformers.trainer_callback import EarlyStoppingCallback
from peft import LoraConfig, get_peft_model
from datasets import IterableDataset
import gdown
import json
import ijson
from decimal import Decimal
import random
import os

# Download dataset
drive_link = "https://drive.google.com/file/d/1AYCccocBJsar7M_DGc-pljNM21wbU9mg/view?usp=sharing"
file_id = drive_link.split('/d/')[1].split('/')[0]
download_url = f"https://drive.google.com/uc?id={file_id}"
local_dataset_path = "/content/synthetic_data.json"

try:
    gdown.download(download_url, local_dataset_path, quiet=False)
except Exception as e:
    print(f"Error downloading dataset: {e}")
    raise

# JSON streaming generator
def json_array_generator(file_path, fraction=0.12, train_split=0.833):
    with open(file_path, 'r') as f:
        parser = ijson.items(f, 'item')
        for item in parser:
            if random.random() < fraction:
                if random.random() < train_split:
                    yield {"item": item, "split": "train"}
                else:
                    yield {"item": item, "split": "validation"}

# Create IterableDataset
dataset = IterableDataset.from_generator(
    json_array_generator,
    gen_kwargs={"file_path": local_dataset_path}
)

# Load model and tokenizer
model_name = "HuggingFaceTB/SmolLM2-135M-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Set pad_token to eos_token
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"  # Automatically place model on GPU if available
)
model.train()

# Custom JSON encoder for Decimal
class DecimalEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Decimal):
            return float(obj)
        return super().default(obj)

# Format example
def format_example(example):
    json_str = json.dumps(example["item"], cls=DecimalEncoder)
    prompt = f"Based on the following data:\n{json_str}\nGenerate the drone action in the format: vx, vy, vz, yaw"
    action = example["item"].get("Generated Action", {"vx": 0, "vy": 0, "vz": 0, "yaw": 0})
    response = f"{action['vx']:.2f}, {action['vy']:.2f}, {action['vz']:.2f}, {action['yaw']:.2f}"
    messages = [
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": response}
    ]
    return {
        "text": tokenizer.apply_chat_template(messages, tokenize=False),
        "split": example["split"]
    }

# Tokenize function
def tokenize_function(examples):
    tokenized_inputs = tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].clone()
    # Mask padding tokens in labels to ignore them in loss computation
    tokenized_inputs["labels"][tokenized_inputs["attention_mask"] == 0] = -100
    return tokenized_inputs

# Process dataset
formatted_dataset = dataset.map(format_example, batched=False)
train_dataset = formatted_dataset.filter(lambda x: x["split"] == "train").select_columns(['text'])
val_dataset = formatted_dataset.filter(lambda x: x["split"] == "validation").select_columns(['text'])

tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
tokenized_val = val_dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # Causal language modeling
)

# LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Training arguments
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/finetuned_tinyllama",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    max_steps=2000,  # Reduced from 4000 based on loss trends
    learning_rate=1e-4,
    fp16=True,
    logging_steps=50,
    save_steps=200,
    save_total_limit=2,
    eval_strategy="steps",
    eval_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    warmup_steps=100,
    lr_scheduler_type="cosine",
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={'use_reentrant': False},
    max_grad_norm=0.5,
    dataloader_num_workers=1,
    remove_unused_columns=False,
    resume_from_checkpoint=True if os.path.exists("/content/drive/MyDrive/finetuned_tinyllama/checkpoint-200") else False,
    label_smoothing_factor=0.1,
    label_names=["labels"],  # Fix for the warning
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    data_collator=data_collator,  # Added data collator
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

# Train
try:
    trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
except ValueError as e:
    print(f"No valid checkpoint found or other error: {e}. Starting training from scratch.")
    trainer.train()

# Save model and tokenizer
model.save_pretrained("/content/drive/MyDrive/lora_adapters")
tokenizer.save_pretrained("/content/drive/MyDrive/lora_adapters")

: 

Resume From any Given Checkpoint

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import IterableDataset
import os

# Define paths
output_dir = "/content/drive/MyDrive/finetuned_tinyllama"
checkpoint_dir = output_dir

# Load model and tokenizer
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16
).to("cuda" if torch.cuda.is_available() else "cpu")

# Configure LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Define a simple streaming dataset (replace with your actual dataset)
def dummy_dataset_generator():
    for i in range(1000):
        yield {"text": f"Sample text {i}"}

dataset = IterableDataset.from_generator(dummy_dataset_generator)

# Tokenization function
def tokenize_function(examples):
    tokenized_inputs = tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].clone()
    return tokenized_inputs

tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["text"])

# Set up training arguments
training_args = TrainingArguments(
    output_dir=checkpoint_dir,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    max_steps=10000,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=100,
    save_steps=1000,
    save_total_limit=2,
    remove_unused_columns=False,
    dataloader_num_workers=1,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Specify the checkpoint to resume from
checkpoint_to_resume = "/content/drive/MyDrive/finetuned_tinyllama/checkpoint-2000"  # Set to your desired checkpoint path, e.g., "checkpoint-1000"

# Resume training logic
if isinstance(checkpoint_to_resume, str) and os.path.isdir(checkpoint_to_resume):
    print(f"Resuming training from {checkpoint_to_resume}")
    trainer.train(resume_from_checkpoint=checkpoint_to_resume)
else:
    print(f"Checkpoint {checkpoint_to_resume} not found or invalid, starting from scratch.")
    trainer.train()

# Save the final model and tokenizer
model.save_pretrained("/content/drive/MyDrive/lora_adapters")
tokenizer.save_pretrained("/content/drive/MyDrive/lora_adapters")

Merge the LoRA Adapters with the Base Model

In [2]:
from peft import PeftModel
from transformers import AutoModelForCausalLM

# Load the base model
base_model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-135M-Instruct")

# Load the LoRA adapters
model = PeftModel.from_pretrained(base_model, "/content/drive/MyDrive/lora_adapters")

# Merge the adapters with the base model
merged_model = model.merge_and_unload()

# Save the merged model
merged_model.save_pretrained("/content/drive/MyDrive/merged_model")

In [None]:
Convert into GGFU for Ollama

In [None]:
# Install dependencies
!apt-get update
!apt-get install -y build-essential cmake
!pip install torch torchvision transformers peft

In [None]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Install dependencies
!apt-get update
!apt-get install -y build-essential cmake
!pip install torch torchvision transformers peft

# Clone and build llama.cpp
!git clone https://github.com/ggerganov/llama.cpp.git /content/llama.cpp
!mkdir -p /content/llama.cpp/build
!cd /content/llama.cpp/build && cmake .. && cmake --build . --config Release

# Define paths
lora_path = "/content/lora_adapters"  # Upload your LoRA files here
output_dir = "/content/ollama_models"
gguf_output = os.path.join(output_dir, "model-q4_0.gguf")
merged_model_path = os.path.join(output_dir, "merged_model")
llama_cpp_path = "/content/llama.cpp"
quantize_bin = os.path.join(llama_cpp_path, "build/bin/llama-quantize")



In [None]:
# Verify LoRA adapters (upload files to /content/lora_adapters first)
if not os.path.exists(lora_path):
    raise FileNotFoundError(f"LoRA adapters folder not found at {lora_path}. Please upload files.")
required_files = ["adapter_config.json", "adapter_model.safetensors"]
if not all(os.path.exists(os.path.join(lora_path, f)) for f in required_files):
    raise FileNotFoundError(f"Missing required files in {lora_path}: {required_files}")

# Create output directory
os.makedirs(output_dir, exist_ok=True)

# Load base model and tokenizer
print("Loading base model and tokenizer...")
model_name = "HuggingFaceTB/SmolLM2-135M-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)
print("Base model and tokenizer loaded.")

# Load and merge LoRA adapters
print("Loading and merging LoRA adapters...")
model = PeftModel.from_pretrained(base_model, lora_path)
merged_model = model.merge_and_unload()
print("LoRA adapters merged.")

# Save merged model
print("Saving merged model...")
merged_model.save_pretrained(merged_model_path)
tokenizer.save_pretrained(merged_model_path)
print(f"Merged model saved to {merged_model_path}.")

# Convert to GGUF (f16)
print("Converting to GGUF (f16)...")
!python /content/llama.cpp/convert_hf_to_gguf.py {merged_model_path} --outfile /content/ollama_models/model-f16.gguf --outtype f16

# Quantize to Q4_0
print("Quantizing to Q4_0...")
!/content/llama.cpp/build/bin/llama-quantize /content/ollama_models/model-f16.gguf /content/ollama_models/model-q4_0.gguf Q4_0

# Verify GGUF file
if not os.path.exists(gguf_output):
    raise FileNotFoundError(f"GGUF file not created at {gguf_output}")
print(f"GGUF file created at {gguf_output}.")

{
  "input": {
    "sensor_data": {
      "timestamp": "2025-06-21T13:45:00Z",
      "sensors": {
        "Left": 1253,
        "Right": 1473,
        "Front": 718,
        "Back": 440,
        "Up": 1315,
        "Bottom": 1402,
        "AccelX": -1.87,
        "AccelY": -0.1,
        "AccelZ": 10.05,
        "GyroX": 0.94,
        "GyroY": -0.81,
        "GyroZ": -0.27
      }
    },
    "yolo_data": {
      "frame_number": 0,
      "timestamp": "2025-06-21T13:45:00Z",
      "boxes_3d": [
        {
          "class_name": "sofa",
          "depth_value": 1.2,
          "dimensions": [1.05, 1.04, 0.74]
        }
      ]
    },
    "vlm_data": {
      "description": "Someone is lying on the couch."
    }
  },
  "output": {
    "vx": 0.0,
    "vy": -0.7,
    "vz": 0.2,
    "yaw": -90.0
  }
}

In [None]:
from google.colab import drive
import os
import shutil

# Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')
print("Google Drive mounted.")

# Define paths
output_dir = "/content/ollama_models"
drive_output_dir = "/content/drive/My Drive/ollama_models" # You can change the destination path in your Drive

# Check if the output directory exists before moving
if os.path.exists(output_dir):
    print(f"Moving {output_dir} to {drive_output_dir}...")
    shutil.move(output_dir, drive_output_dir)
    print("Folder moved successfully.")
else:
    print(f"Folder not found at {output_dir}. Skipping move.")