In [1]:
!pip install transformers datasets accelerate

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer
import torch




In [2]:
# Replace 'your_dataset.json' with your actual data file path in Colab
dataset = load_dataset("json", data_files="student_qa_dataset.json")

model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Fix padding token if not defined
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


Generating train split: 0 examples [00:00, ? examples/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [3]:
max_input_length = 512
max_target_length = 128

def preprocess_function(examples):
    inputs = [f"question: {q} context: {c}" for q, c in zip(examples["question"], examples["context"])]
    model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, padding="max_length")

    labels_text = []
    for answers in examples["answers"]:
        if answers["text"]:
            labels_text.append(answers["text"][0])
        else:
            labels_text.append("")

    labels = tokenizer(text_target=labels_text, max_length=max_target_length, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]

    return model_inputs

tokenized_datasets = dataset["train"].map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)

small_train_dataset = tokenized_datasets.shuffle(seed=42).select(range(min(2000, len(tokenized_datasets))))
small_eval_dataset = tokenized_datasets.shuffle(seed=42).select(range(min(500, len(tokenized_datasets))))

print("Dataset tokenized and preprocessed.")


Map:   0%|          | 0/5 [00:00<?, ? examples/s]

Dataset tokenized and preprocessed.


In [8]:
training_args = TrainingArguments(
    output_dir="./flan_t5_finetuned",
    eval_strategy="epoch",
    learning_rate=3e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    report_to="none",
    push_to_hub=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    tokenizer=tokenizer,
)

trainer.train()


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,48.567207
2,No log,47.341225
3,No log,46.773949


TrainOutput(global_step=3, training_loss=43.46631876627604, metrics={'train_runtime': 31.8384, 'train_samples_per_second': 0.471, 'train_steps_per_second': 0.094, 'total_flos': 2788357570560.0, 'train_loss': 43.46631876627604, 'epoch': 3.0})

In [11]:
from transformers import pipeline

# Load fine-tuned model and tokenizer
fine_tuned_model_name = "./flan_t5_finetuned"
tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(fine_tuned_model_name)

chatbot = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

question = "What was Naidu's GPA ?"
context = "Naidu is a senior Cloud Computing student at BITS Pilani. He has a GPA of 8.4 and is part of the AI research group."

input_text = f"question: {question} context: {context}"

outputs = chatbot(input_text, max_new_tokens=50, do_sample=True, num_beams=5, early_stopping=True, no_repeat_ngram_size=3, temperature=0.7)

print(f"Question: {question}")
print(f"Answer: {outputs[0]['generated_text']}")


Device set to use cuda:0


Question: What was Naidu's GPA ?
Answer: 8.4


In [12]:
from huggingface_hub import notebook_login, whoami, HfApi

print("\n--- Hugging Face Login ---")
try:
    # A login prompt will appear if not already logged in. Use a 'write' token.
    notebook_login()
    user_info = whoami()
    username = user_info['name']
    print(f"✅ Successfully logged in as: {username}")

    # Define your repository name using your username
    repo_name = f"{username}/flan_t5_finetuned_education"

    # Create a model repository on the Hub
    api = HfApi()
    api.create_repo(repo_id=repo_name, exist_ok=True, repo_type="model")

    # Push model and tokenizer to Hugging Face Hub
    print(f"\n--- Pushing Model to Hub ({repo_name}) ---")
    model.push_to_hub(repo_name)
    tokenizer.push_to_hub(repo_name)

    print(f"\n✅ Model uploaded to: https://huggingface.co/{repo_name}")

    # Test loading and using the model directly from the Hub
    print("\n--- Testing Model from Hugging Face Hub ---")
    classifier_hub = pipeline("sentiment-analysis", model=repo_name)
    result_hub = classifier_hub(["good!", "bad."])
    print(result_hub)

except Exception as e:
    print(f"\n--- Hugging Face Push Failed ---")
    print(f"Error: {e}. Ensure you are logged in and the 'save_path' model exists.")


--- Hugging Face Login ---


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

✅ Successfully logged in as: naidu9678

--- Pushing Model to Hub (naidu9678/flan_t5_finetuned_education) ---


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...k2bxyl1/model.safetensors:   5%|5         | 16.8MB /  308MB            

README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  .../tmpk65l2zcs/spiece.model: 100%|##########|  792kB /  792kB            


✅ Model uploaded to: https://huggingface.co/naidu9678/flan_t5_finetuned_education

--- Testing Model from Hugging Face Hub ---


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at naidu9678/flan_t5_finetuned_education and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


[{'label': 'LABEL_0', 'score': 0.7698573470115662}, {'label': 'LABEL_0', 'score': 0.7856703400611877}]
