In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
torch.cuda.empty_cache()
torch.cuda.memory_summary()




In [3]:
!nvidia-smi


Sun Mar  9 13:28:40 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   40C    P8             10W /   70W |       3MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla T4                       Off |   00

In [4]:
import os
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset

# ✅ Prevent CUDA memory fragmentation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

def fine_tune_flan_t5():
    dataset = load_dataset("squad_v2")

    # ✅ Remove examples with empty answers
    def filter_empty_answers(example):
        return bool(example["answers"]["text"])  

    dataset["train"] = dataset["train"].filter(filter_empty_answers)
    dataset["validation"] = dataset["validation"].filter(filter_empty_answers)

    print(f"Train set size after filtering: {len(dataset['train'])}")
    print(f"Validation set size after filtering: {len(dataset['validation'])}")

    # ✅ Use a subset for memory efficiency
    dataset["train"] = dataset["train"].shuffle(seed=42).select(range(2000))  
    dataset["validation"] = dataset["validation"].shuffle(seed=42).select(range(500))

    print("\nExample data after filtering:", dataset["train"][0])

    test_inputs = [
        (dataset["train"][0]["context"], dataset["train"][0]["question"]),
        (dataset["train"][1]["context"], dataset["train"][1]["question"])
    ]

    # ✅ Use FLAN-T5 Large (Memory Optimized)
    model_name = "google/flan-t5-small"  
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    model = AutoModelForSeq2SeqLM.from_pretrained(
        model_name,
        device_map="auto",  # ✅ Automatically selects GPU
        torch_dtype=torch.bfloat16,  # ✅ Use BF16 instead of FP16 (Better for T5 models)
    )

    tokenizer.pad_token = tokenizer.eos_token  # ✅ Ensure padding token is set

    def preprocess_function(examples):
        """ ✅ Format input as structured prompt for FLAN-T5 """
        inputs = [
            f"Answer the question based on the provided context:\n\nContext: {c}\n\nQuestion: {q}\n\nAnswer:"
            for c, q in zip(examples["context"], examples["question"])
        ]

        # ✅ Ensure answers are non-empty, otherwise use "No Answer"
        outputs = [a["text"][0] if len(a["text"]) > 0 else "No Answer" for a in examples["answers"]]

        tokenized_inputs = tokenizer(inputs, padding="max_length", truncation=True, max_length=512)
        tokenized_outputs = tokenizer(outputs, padding="max_length", truncation=True, max_length=128)

        tokenized_inputs["labels"] = tokenized_outputs["input_ids"]

        return tokenized_inputs

    # ✅ Apply preprocessing
    tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)

    # ✅ Memory-Optimized Training Arguments
    training_args = TrainingArguments(
        output_dir="./flan_t5_results",
        evaluation_strategy="epoch",
        save_strategy="no",  
        learning_rate=3e-5,
        per_device_train_batch_size=2,  # ✅ Lower batch size to avoid OOM
        per_device_eval_batch_size=2,
        num_train_epochs=5,  
        weight_decay=0.01,
        logging_dir="./logs",
        logging_steps=10,
        log_level="info",
        report_to="none",
        gradient_accumulation_steps=4,  # ✅ Helps with batch size reduction
        gradient_checkpointing=True,  # ✅ Reduce memory by recomputing activations
        bf16=True,  # ✅ BF16 is better for T5 models
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["validation"],
        tokenizer=tokenizer,
    )

    print("\n🚀 Starting Training...")
    with torch.autocast("cuda", dtype=torch.bfloat16):  # ✅ Enable mixed precision training
        trainer.train()

    eval_results = trainer.evaluate()
    print(f"\n🔥 Final Evaluation Loss: {eval_results['eval_loss']:.4f}")

    trainer.save_model("./fine_tuned_flan_t5")
    print("\n✅ Fine-tuned model saved!")

    return model, tokenizer, test_inputs

def test_flan_t5_model(model, tokenizer, test_inputs):
    print("\n🔍 Running test queries from the training set...\n")

    for context, query in test_inputs:
        prompt = f"Answer the question based on the provided context:\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:"
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")

        output = model.generate(
            **inputs, 
            max_new_tokens=256,  
            temperature=0.7,  
            num_beams=3,  
            repetition_penalty=1.2,  
            do_sample=True
        )
        print(f"❓ Question: {query}")
        print(f"📖 Context: {context[:200]}...")  
        print(f"💡 Answer: {tokenizer.decode(output[0], skip_special_tokens=True)}\n")

if __name__ == "__main__":
    model, tokenizer, test_inputs = fine_tune_flan_t5()
    test_flan_t5_model(model, tokenizer, test_inputs)


README.md:   0%|          | 0.00/8.92k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/16.4M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/130319 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/11873 [00:00<?, ? examples/s]

Filter:   0%|          | 0/130319 [00:00<?, ? examples/s]

Filter:   0%|          | 0/11873 [00:00<?, ? examples/s]

Train set size after filtering: 86821
Validation set size after filtering: 5928

Example data after filtering: {'id': '572817584b864d1900164463', 'title': 'London', 'context': 'Outward urban expansion is now prevented by the Metropolitan Green Belt, although the built-up area extends beyond the boundary in places, resulting in a separately defined Greater London Urban Area. Beyond this is the vast London commuter belt. Greater London is split for some purposes into Inner London and Outer London. The city is split by the River Thames into North and South, with an informal central London area in its interior. The coordinates of the nominal centre of London, traditionally considered to be the original Eleanor Cross at Charing Cross near the junction of Trafalgar Square and Whitehall, are approximately 51°30′26″N 00°07′39″W\ufeff / \ufeff51.50722°N 0.12750°W\ufeff / 51.50722; -0.12750.', 'question': 'Greater London is divided into what two groups of boroughs?', 'answers': {'text': ['Inner 

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

  trainer = Trainer(
You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
Using auto half precision backend
***** Running training *****
  Num examples = 2,000
  Num Epochs = 5
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 4
  Total optimization steps = 1,250
  Number of trainable parameters = 76,961,152



🚀 Starting Training...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch,Training Loss,Validation Loss
1,0.1662,0.042428
2,0.0453,0.02763
3,0.038,0.026731
4,0.0452,0.026593
5,0.0444,0.026514



***** Running Evaluation *****
  Num examples = 500
  Batch size = 2
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.

***** Running Evaluation *****
  Num examples = 500
  Batch size = 2

***** Running Evaluation *****
  Num examples = 500
  Batch size = 2

***** Running Evaluation *****
  Num examples = 500
  Batch size = 2

***** Running Evaluation *****
  Num examples = 500
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)



***** Running Evaluation *****
  Num examples = 500
  Batch size = 2


Saving model checkpoint to ./fine_tuned_flan_t5
Configuration saved in ./fine_tuned_flan_t5/config.json
Configuration saved in ./fine_tuned_flan_t5/generation_config.json



🔥 Final Evaluation Loss: 0.0265


Model weights saved in ./fine_tuned_flan_t5/model.safetensors
tokenizer config file saved in ./fine_tuned_flan_t5/tokenizer_config.json
Special tokens file saved in ./fine_tuned_flan_t5/special_tokens_map.json
Copy vocab file to ./fine_tuned_flan_t5/spiece.model



✅ Fine-tuned model saved!

🔍 Running test queries from the training set...

❓ Question: Greater London is divided into what two groups of boroughs?
📖 Context: Outward urban expansion is now prevented by the Metropolitan Green Belt, although the built-up area extends beyond the boundary in places, resulting in a separately defined Greater London Urban Area. ...
💡 Answer: Inner London and Outer London

❓ Question: Where is the Opera House located?
📖 Context: The German Renaissance has bequeathed the city some noteworthy buildings (especially the current Chambre de commerce et d'industrie, former town hall, on Place Gutenberg), as did the French Baroque an...
💡 Answer: Place Broglie

