## 1. Setup & Installation

In [1]:
!pip install -q transformers datasets peft accelerate bitsandbytes huggingface_hub rouge-score

In [2]:
import os
import logging
import torch
from pathlib import Path

import pandas as pd
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, PeftModel, prepare_model_for_kbit_training
from huggingface_hub import login, HfApi

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

2026-01-20 03:34:42.660026: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768880082.681294     235 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768880082.687797     235 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768880082.704702     235 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768880082.704722     235 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768880082.704724     235 computation_placer.cc:177] computation placer alr

GPU Available: True
GPU: Tesla T4
Memory: 15.83 GB


## 2. Configuration

In [3]:
from kaggle_secrets import UserSecretsClient
HF_TOKEN = UserSecretsClient().get_secret("HF_TOKEN")
HF_USERNAME = "raflisbk"  
HF_REPO_NAME = "t5-posting-time-summarizer"
  
STAGE1_MODEL_PATH = f"{HF_USERNAME}/{HF_REPO_NAME}"
STAGE1_SUBFOLDER = "stage1"

df = pd.read_csv('/kaggle/input/stage-2-narrative/stage2_training_narrative.csv')

MAX_INPUT_LENGTH = 256  
MAX_TARGET_LENGTH = 1024 

LORA_R = 64
LORA_ALPHA = 128
LORA_DROPOUT = 0.05
LORA_TARGET_MODULES = ["q", "v", "k", "o"]

BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 0.0001   
NUM_EPOCHS = 10
WARMUP_RATIO = 0.15

OUTPUT_DIR = "./outputs_stage2b"
MERGED_OUTPUT_DIR = "../models/stage2_mergedb"

In [4]:
if HF_TOKEN:
    login(token=HF_TOKEN)
    print("Logged in to HuggingFace Hub")
else:
    print("Warning: HF_TOKEN not set. Set it with: os.environ['HF_TOKEN'] = 'your_token'")

Logged in to HuggingFace Hub


## 3. Load Data

In [5]:
print(f"Loaded {len(df)} samples")

print("\n" + "="*60)
print("SAMPLE INPUT:")
print(df.iloc[0]['input_text'])
print("\nSAMPLE TARGET:")
print(df.iloc[0]['target_text'])
print("="*60)

Loaded 321 samples

SAMPLE INPUT:
Day: Sunday, Time: 15:00 - 18:00, Score: 82
Hourly: 01(67), 01(39), 02(39), 02(47), 03(75), 03(25), 04(34), 04(42), 05(53), 05(48), 06(66), 06(52), 07(75), 07(47), 08(72), 08(67), 09(72), 09(80), 10(70), 10(98), 11(88), 11(75), 12(77), 12(67), 13(81), 13(71), 14(83), 14(72), 15(86), 15(93), 16(81), 16(81), 17(71), 17(81), 17(80), 18(74), 18(83), 18(69), 19(69), 19(80), 19(80), 20(76), 20(73), 20(81), 21(62), 21(72), 21(73), 22(64), 22(67), 22(68), 23(62), 23(51), 23(60)
Daily Avg: 68.3, Peak: 15(93)

SAMPLE TARGET:
Sunday from 3 PM to 6 PM is a particularly strong engagement window, likely catching users as they wind down their weekend afternoons.

Insight:
- The highest hourly engagement occurs at 3 PM, peaking at 93.
- Engagement during this period is 20% higher than the daily average, signalling increased user attention.
- Even the lowest hourly scores within this window comfortably exceed the daily average of 68.3.


In [6]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df, test_size=0.1, random_state=42)
print(f"Train: {len(train_df)}, Val: {len(val_df)}")

train_dataset = Dataset.from_pandas(train_df.reset_index(drop=True))
val_dataset = Dataset.from_pandas(val_df.reset_index(drop=True))

dataset = DatasetDict({
    "train": train_dataset,
    "validation": val_dataset
})

print(dataset)

Train: 288, Val: 33
DatasetDict({
    train: Dataset({
        features: ['input_text', 'target_text'],
        num_rows: 288
    })
    validation: Dataset({
        features: ['input_text', 'target_text'],
        num_rows: 33
    })
})


## 4. Load Stage 1 Model & Tokenizer

In [7]:
tokenizer = AutoTokenizer.from_pretrained(
    STAGE1_MODEL_PATH,
    subfolder=STAGE1_SUBFOLDER
)
print(f"Tokenizer loaded: {tokenizer.__class__.__name__}")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float32,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForSeq2SeqLM.from_pretrained(
    STAGE1_MODEL_PATH,
    subfolder=STAGE1_SUBFOLDER,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
print(f"Model loaded from: {STAGE1_MODEL_PATH}/{STAGE1_SUBFOLDER}")

Tokenizer loaded: T5TokenizerFast


adapter_config.json:   0%|          | 0.00/882 [00:00<?, ?B/s]



adapter_model.safetensors:   0%|          | 0.00/151M [00:00<?, ?B/s]

Model loaded from: raflisbk/t5-posting-time-summarizer/stage1


In [8]:
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    target_modules=LORA_TARGET_MODULES,
    bias="none",
    task_type="SEQ_2_SEQ_LM"
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()



trainable params: 37,748,736 || all params: 820,898,816 || trainable%: 4.5985


## 5. Tokenization

In [9]:
def preprocess_function(examples):
    inputs = [f"summarize: {text}" for text in examples["input_text"]]
    targets = examples["target_text"]
    
    model_inputs = tokenizer(
        inputs,
        max_length=MAX_INPUT_LENGTH,
        padding="max_length",
        truncation=True
    )
    
    labels = tokenizer(
        targets,
        max_length=MAX_TARGET_LENGTH,
        padding="max_length",
        truncation=True
    )
    
    labels["input_ids"] = [
        [(label if label != tokenizer.pad_token_id else -100) for label in labels_seq]
        for labels_seq in labels["input_ids"]
    ]
    
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=dataset["train"].column_names
)

print(f"Tokenized dataset: {tokenized_dataset}")

Map:   0%|          | 0/288 [00:00<?, ? examples/s]

Map:   0%|          | 0/33 [00:00<?, ? examples/s]

Tokenized dataset: DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 288
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 33
    })
})


## 6. Training Setup

In [10]:
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True
)

training_args = Seq2SeqTrainingArguments(
    output_dir=OUTPUT_DIR,
    
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    
    learning_rate=LEARNING_RATE,
    lr_scheduler_type="cosine",
    warmup_ratio=WARMUP_RATIO,
    
    num_train_epochs=NUM_EPOCHS,
    
    fp16=False,  
    bf16=False,
    
    optim="paged_adamw_8bit",
    weight_decay=0.01,
    max_grad_norm=1.0,
    
    eval_strategy="epoch",
    save_strategy="epoch",

    logging_steps=10,
    report_to=[],

    predict_with_generate=True,
    generation_max_length=MAX_TARGET_LENGTH,

    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    push_to_hub=True,
    hub_model_id = STAGE1_MODEL_PATH,
    hub_token=HF_TOKEN
)

print("Training arguments configured!")
print(f"Effective batch size: {BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS}")
print(f"Learning rate: {LEARNING_RATE}")
print(f"Epochs: {NUM_EPOCHS}")

Training arguments configured!
Effective batch size: 16
Learning rate: 0.0001
Epochs: 10


In [11]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

print("Trainer initialized!")

  trainer = Seq2SeqTrainer(


Trainer initialized!


## 7. Train!

In [12]:
trainer.train()

  return fn(*args, **kwargs)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch,Training Loss,Validation Loss
1,3.2863,2.759873
2,2.7887,2.089126
3,2.166,1.719419
4,1.8545,1.558257
5,1.7343,1.482579
6,1.6687,1.430487
7,1.6589,1.4013
8,1.587,1.386422
9,1.5626,1.381265
10,1.5749,1.380317


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=180, training_loss=1.975002776251899, metrics={'train_runtime': 5011.9157, 'train_samples_per_second': 0.575, 'train_steps_per_second': 0.036, 'total_flos': 3485854177689600.0, 'train_loss': 1.975002776251899, 'epoch': 10.0})

## 8. Evaluation

In [17]:
eval_results = trainer.evaluate()
print(f"\nEvaluation Results:")
for key, value in eval_results.items():
    print(f"  {key}: {value:.4f}")


Evaluation Results:
  eval_loss: 1.3803
  eval_runtime: 18.4974
  eval_samples_per_second: 1.7840
  eval_steps_per_second: 0.4870
  epoch: 10.0000


In [18]:
test_input = val_df.iloc[0]['input_text']
expected_output = val_df.iloc[0]['target_text']

print("INPUT:")
print(test_input)

inputs = tokenizer(f"summarize: {test_input}", return_tensors="pt", max_length=MAX_INPUT_LENGTH, truncation=True)
inputs = {k: v.to(model.device) for k, v in inputs.items()}

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_length=MAX_TARGET_LENGTH,
        num_beams=4,
        early_stopping=True,
        do_sample=False
    )

generated = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\nEXPECTED:")
print(expected_output)

print("\nGENERATED:")
print(generated)

INPUT:
Day: Saturday, Time: 09:00 - 12:00, Score: 79
Hourly: 01(30), 01(37), 02(28), 02(32), 03(30), 03(35), 04(37), 04(33), 05(44), 05(50), 06(48), 06(59), 07(65), 07(69), 08(84), 08(89), 09(97), 09(80), 10(84), 10(71), 11(77), 11(66), 12(73), 12(57), 13(77), 13(63), 14(74), 14(65), 15(59), 15(71), 16(63), 16(58), 17(59), 17(53), 18(50), 18(55), 19(54), 19(46), 20(50), 20(43), 21(50), 21(44), 22(43), 22(38), 23(34), 23(38)
Daily Avg: 55.7, Peak: 09(97)

EXPECTED:
Saturday's 09:00-12:00 slot is ideal for engagement, capturing users during their active weekend morning with a strong 79/100 score.

Insight:
- The peak engagement hits 97 at 9 AM, soaring 74% above the daily average of 55.7.
- This morning window outperforms the afternoon (1-4 PM) by 12%, showing higher user receptivity.
- Sustained high scores from 8-10 AM indicate a prime window for priority content.

GENERATED:
Saturday's 09:00-12:00 slot delivers a strong 79/100 engagement score, capitalizing on peak morning activity wh

## 9. Save & Merge Model

In [19]:
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"LoRA adapter saved to: {OUTPUT_DIR}")

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

No files have been modified since last commit. Skipping to prevent empty commit.


LoRA adapter saved to: ./outputs_stage2b


## 10. Upload to HuggingFace Hub

In [22]:
print("Uploading LoRA adapter to HuggingFace Hub...")

from huggingface_hub import HfApi

# Upload LoRA adapter langsung (tanpa merge)
api = HfApi(token=HF_TOKEN)

print(f"Uploading adapter from {OUTPUT_DIR} to {STAGE1_MODEL_PATH}/stage2...")
api.upload_folder(
    folder_path=OUTPUT_DIR,
    repo_id=STAGE1_MODEL_PATH,
    path_in_repo="stage2b",
    token=HF_TOKEN
)

print(f" Stage 2 LoRA adapter uploaded to: {STAGE1_MODEL_PATH}/stage2b")
print("\nTo use this model for inference:")
print("1. Load base model from stage1")
print("2. Load LoRA adapter from stage2")
print("3. Merge at inference time (in float16, not 4bit)")

Uploading LoRA adapter to HuggingFace Hub...
Uploading adapter from ./outputs_stage2b to raflisbk/t5-posting-time-summarizer/stage2...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

 Stage 2 LoRA adapter uploaded to: raflisbk/t5-posting-time-summarizer/stage2b

To use this model for inference:
1. Load base model from stage1
2. Load LoRA adapter from stage2
3. Merge at inference time (in float16, not 4bit)
