In [1]:
#pip install transformers datasets


In [2]:
#!pip install evaluate
#!pip install rouge_score

In [None]:
# Install necessary libraries
#!pip install transformers datasets

# Import the required libraries
from datasets import load_dataset
from evaluate import load
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# Load the dataset
dataset = load_dataset('FreedomIntelligence/medical-o1-reasoning-SFT', 'en', split="train[:500]", trust_remote_code=True)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")

# Preprocess function to format the data
def preprocess_function(examples):
    # Combine the Question and Complex_CoT as the input
    inputs = [f"Question: {q} \nComplex_CoT: {cot}" for q, cot in zip(examples['Question'], examples['Complex_CoT'])]
    targets = examples['Response']
    
    # Tokenize the inputs and targets
    model_inputs = tokenizer(inputs, truncation=True, padding=True, max_length=512)
    labels = tokenizer(targets, truncation=True, padding=True, max_length=512)

    model_inputs['labels'] = labels['input_ids']
    return model_inputs




  from .autonotebook import tqdm as notebook_tqdm
E0000 00:00:1741505170.423782     197 common_lib.cc:612] Could not set metric server port: INVALID_ARGUMENT: Could not find SliceBuilder port 8471 in any of the 0 ports provided in `tpu_process_addresses`="local"
=== Source Location Trace: ===
learning/45eac/tfrc/runtime/common_lib.cc:230


In [None]:
# Apply the preprocessing function to the dataset
dataset = dataset.map(preprocess_function, batched=True)

# Split the dataset into train and eval (90% train, 10% eval)
train_dataset = dataset.shuffle(seed=42).select([i for i in range(int(0.9 * len(dataset)))])
eval_dataset = dataset.shuffle(seed=42).select([i for i in range(int(0.9 * len(dataset)), len(dataset))])

# Load ROUGE metric for evaluation
rouge_metric = load("rouge")

# Define evaluation function
def compute_metrics(pred):
    labels_ids = pred.label_ids
    preds = pred.predictions
    decoded_preds = tokenizer.decode(preds[0], skip_special_tokens=True)
    decoded_labels = tokenizer.decode(labels_ids[0], skip_special_tokens=True)
    
    result = rouge_metric.compute(predictions=[decoded_preds], references=[decoded_labels])
    return result

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",          # output directory
    evaluation_strategy="epoch",     # Evaluate every epoch
    num_train_epochs=3,              # number of training epochs
    per_device_train_batch_size=8,   # batch size for training
    per_device_eval_batch_size=8,    # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir="./logs",            # directory for storing logs
)


In [None]:
# Initialize the Trainer with the evaluation setup
trainer = Trainer(
    model=model,                         # the model to train
    args=training_args,                  # training arguments
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,           # evaluation dataset
    compute_metrics=compute_metrics,     # evaluation metrics (ROUGE)
)



In [None]:
# Start fine-tuning and evaluation
trainer.train()



In [None]:
# Evaluate the model after training
results = trainer.evaluate()
print(results)



In [None]:
# Save the fine-tuned model and tokenizer
model.save_pretrained("fine_tuned_flan_t5")
tokenizer.save_pretrained("fine_tuned_flan_t5")