In [None]:
from huggingface_hub import login
login(token="PUT_NEW_HF_TOKEN_FROM_HUGGING_API_TOKEN")

!pip install -q -U transformers peft accelerate bitsandbytes datasets sentencepiece safetensors pyarrow
!pip install -q -U git+https://github.com/huggingface/trl.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
import json
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
import os

print("Libraries imported successfully!")

Libraries imported successfully!


In [None]:
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # non-AWQ TinyLLaMA
DATA_PATH = "next.json"
HF_TOKEN = "PUT_NEW_HF_TOKEN_FROM_HUGGING_API_TOKEN"                         # HF token
OUTPUT_DIR = "/content/tinyllama_nextscene_qlora"
ADAPTER_DIR = "/content/tinyllama_adapter"

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(ADAPTER_DIR, exist_ok=True)


In [None]:
out_path = "/content/next_scene_dataset_instr.jsonl"
records = []

with open(DATA_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

for rec in data:
    desc_key = next((k for k in rec if "description" in k.lower()), None)
    next_key = next((k for k in rec if "next" in k.lower() and "prediction" in k.lower()), None)
    if not desc_key or not next_key: continue
    records.append({
        "instruction": "Predict the next probable scene description given the current scene.",
        "input": rec[desc_key].strip(),
        "output": rec[next_key].strip()
    })

with open(out_path, "w", encoding="utf-8") as f:
    for r in records: f.write(json.dumps(r, ensure_ascii=False) + "\n")

dataset = load_dataset("json", data_files=out_path)["train"]

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

def format_for_training(example):
    text = f"Instruction: {example['instruction']}\nInput: {example['input']}\nOutput: {example['output']}"
    return {"text": text}

dataset = dataset.map(format_for_training)


Generating train split: 0 examples [00:00, ? examples/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/323 [00:00<?, ? examples/s]

In [None]:
bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=bnb_cfg,
    torch_dtype=torch.float16
)

model = prepare_model_for_kbit_training(model)

lora_cfg = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_cfg)


`torch_dtype` is deprecated! Use `dtype` instead!


In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token

def tokenize_fn(example):
    return tokenizer(
        example["text"],
        truncation=True,
        max_length=1024,
        padding="max_length",
    )

dataset = dataset.map(tokenize_fn, batched=True)
dataset.set_format(type="torch", columns=["input_ids", "attention_mask"])


Map:   0%|          | 0/323 [00:00<?, ? examples/s]

In [None]:
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,            # where checkpoints are saved
    num_train_epochs=3,               # total epochs
    per_device_train_batch_size=2,    # batch size per GPU
    gradient_accumulation_steps=4,    # effective batch size = 2*4=8
    learning_rate=5e-5,
    fp16=True,                        # mixed precision for speed
    logging_steps=10,
    save_strategy="epoch",
    optim="paged_adamw_8bit",
    report_to="none",
    max_grad_norm=0.3
)
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=lora_cfg,
    args=training_args,
)





Truncating train dataset:   0%|          | 0/323 [00:00<?, ? examples/s]

In [None]:
trainer.train()

# Save LoRA adapters
trainer.model.save_pretrained(ADAPTER_DIR)
tokenizer.save_pretrained(ADAPTER_DIR)


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.
  return fn(*args, **kwargs)


Step,Training Loss
10,2.2012
20,0.39
30,0.2416
40,0.2169
50,0.2052
60,0.192
70,0.1758
80,0.1622
90,0.1568
100,0.1488


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


('/content/tinyllama_adapter/tokenizer_config.json',
 '/content/tinyllama_adapter/special_tokens_map.json',
 '/content/tinyllama_adapter/chat_template.jinja',
 '/content/tinyllama_adapter/tokenizer.model',
 '/content/tinyllama_adapter/added_tokens.json',
 '/content/tinyllama_adapter/tokenizer.json')

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # base model
ADAPTER_DIR = "/content/tinyllama_adapter"        # path to saved LoRA adapters

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    torch_dtype="auto",
)


`torch_dtype` is deprecated! Use `dtype` instead!


In [None]:
# Load LoRA weights on top of the base model
model = PeftModel.from_pretrained(model, ADAPTER_DIR)
model.eval()


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear(

In [None]:
def predict_next_scene(description, max_tokens=120):
    prompt = f"Instruction: Predict the next probable scene description given the current scene.\nInput: {description}\nOutput:"

    # Tokenize input
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)

    # Generate output
    outputs = model.generate(
        input_ids,
        max_new_tokens=max_tokens,
        do_sample=True,
        temperature=0.5,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id
    )

    # Decode response
    response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
    return response.strip()


In [None]:
desc = "A worker on the floor, tangled in an orange extension cord."
print("Next scene prediction:")
print(predict_next_scene(desc))


Next scene prediction:
The worker tries to untangle themselves, but their hands are entangled in the cord, and their body is hanging over the edge of the ladder.


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer_base = AutoTokenizer.from_pretrained(BASE_MODEL)
model_base = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto")

def predict_next_scene_base(description):
    prompt = f"Instruction: Predict the next probable scene description given the current scene.\nInput: {description}\nOutput:"
    input_ids = tokenizer_base(prompt, return_tensors="pt").input_ids.to(model_base.device)
    outputs = model_base.generate(input_ids, max_new_tokens=120, do_sample=True, temperature=0.5, top_p=0.9)
    response = tokenizer_base.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
    return response.strip()

# Compare
print("Next scene prediction (Base TinyLLaMA):")
print(predict_next_scene_base(desc))
print(predict_next_scene_base(desc2))



Next scene prediction (Base TinyLLaMA):
A man is falling from a flagpole.
Explanation: The man in the black vest is falling from a flagpole, which is the next probable scene description. The second man is higher on the structure against a cloudy sky, which is the third probable scene description.
Example 2: Predict the next probable scene description given the current scene.
Input: A man is sitting on a bench with his back to a wall.
Output: A man is sitting on a bench with his back to a wall.
Explanation: The man in the ben
"The man falls forward off the scooter onto the sidewalk, catching himself with his hands."

Example 2:
Input: A man is sitting on a park bench, reading a book.
Output: "The man is sitting on the park bench, reading a book."

Example 3:
Input: A man is walking down a street, holding a bag of groceries.
Output: "The man is walking down the street, holding a bag of groceries."

Example 4:
Input: A woman is


In [None]:
desc2 ="A man wearing a helmet and jeans is falling forward off an electric scooter onto a sidewalk, catching himself with his hands."
desc = "A man in a black vest and pants is falling head-first from a tall, flag-decorated metal platform. A second man is visible higher on the structure against a cloudy sky.."
print("Next scene prediction:")
print(predict_next_scene(desc))
print(predict_next_scene(desc2))



🪄 Next scene prediction:
The man lands hard on the ground, his head hitting the concrete below. He struggles to get up, his body twisting in pain.
The man lands hard on his back, his head hitting the pavement with a loud thud. He looks up to see the scooter spinning out of control, coming to a stop just inches away from him.
