In [None]:
!pip install datasets



In [None]:
# For Google Colab - Upload JSON files for train and test separately
from google.colab import files
from datasets import Dataset
from transformers import AutoTokenizer, TrainingArguments, Trainer, AutoModelForSeq2SeqLM
import pandas as pd

# Step 1: Upload files
print("Please upload your TRAIN JSON file:")
uploaded_train = files.upload()
train_file = list(uploaded_train.keys())[0]

print("Please upload your TEST JSON file:")
uploaded_test = files.upload()
test_file = list(uploaded_test.keys())[0]

# Read the uploaded JSON files into pandas DataFrames
train_df = pd.read_json(train_file, lines=True)
test_df = pd.read_json(test_file, lines=True)

# Convert DataFrames to HuggingFace Datasets
dataset_train = Dataset.from_pandas(train_df)
dataset_test = Dataset.from_pandas(test_df)

# Optional: Create a validation set from test
split = dataset_test.train_test_split(test_size=0.5, seed=42)
dataset_validation = split['train']
dataset_test = split['test']

# Final combined dataset dict
dataset = {
    'train': dataset_train,
    'validation': dataset_validation,
    'test': dataset_test
}
print(dataset)

# Step 3: Tokenization
tokenizer = AutoTokenizer.from_pretrained('t5-small')

def preprocess_function(example):
    model_inputs = tokenizer(example['input'], truncation=False, max_length=64, padding='max_length')
    labels = tokenizer(example['output'], truncation=False, max_length=64, padding='max_length')
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

dataset_train = dataset['train'].map(preprocess_function, batched=True)
dataset_validation = dataset['validation'].map(preprocess_function, batched=True)
dataset_test = dataset['test'].map(preprocess_function, batched=True)

# Step 4: Define TrainingArguments
training_args = TrainingArguments(
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=20,
    report_to=[],  # This explicitly disables logging to wandb/huggingface
)

# Step 5: Load model and train
model = AutoModelForSeq2SeqLM.from_pretrained('t5-small')

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_validation,
    tokenizer=tokenizer
)

trainer.train()


Please upload your TRAIN JSON file:


Saving train_data.txt to train_data.txt
Please upload your TEST JSON file:


Saving test_data.txt to test_data.txt
{'train': Dataset({
    features: ['input', 'output'],
    num_rows: 1416
}), 'validation': Dataset({
    features: ['input', 'output'],
    num_rows: 177
}), 'test': Dataset({
    features: ['input', 'output'],
    num_rows: 177
})}


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Map:   0%|          | 0/1416 [00:00<?, ? examples/s]

Map:   0%|          | 0/177 [00:00<?, ? examples/s]

Map:   0%|          | 0/177 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

  trainer = Trainer(


Step,Training Loss
500,0.8232
1000,0.0457
1500,0.0294
2000,0.0223
2500,0.0201
3000,0.0177
3500,0.0173


TrainOutput(global_step=3540, training_loss=0.13799395588158214, metrics={'train_runtime': 395.5057, 'train_samples_per_second': 71.605, 'train_steps_per_second': 8.951, 'total_flos': 479109977210880.0, 'train_loss': 0.13799395588158214, 'epoch': 20.0})

In [None]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.5-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.5


In [None]:
!pip install rouge-score


Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=2c2b1b09272c6220be7765095f87e667f6091aac0f58c00205c55a878f1b04ca
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
import os
import torch
from evaluate import load
import editdistance
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer

# Function to get model size
def get_model_size(path):
    total_size = 0
    for dirpath, _, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size / (1024 * 1024)  # in MB

# Define prediction and evaluation
predictions = []
references = []

model.eval()
for example in dataset_test:
    input_ids = torch.tensor(example['input_ids']).unsqueeze(0).to(model.device)

    with torch.no_grad():
        output_ids = model.generate(input_ids, max_length=128)

    pred = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
    label = tokenizer.decode(example['labels'], skip_special_tokens=True).strip()

    predictions.append(pred)
    references.append(label)

# Evaluation metric 1: Exact Match
exact_match = load("exact_match")
em_result = exact_match.compute(predictions=predictions, references=references)["exact_match"]

# Evaluation metric 2: Levenshtein Similarity
def compute_levenshtein_similarity(predictions, references):
    similarities = []
    for pred, ref in zip(predictions, references):
        pred = pred.strip()
        ref = ref.strip()
        dist = editdistance.eval(pred, ref)
        max_len = max(len(pred), len(ref))
        similarity = 1.0 - (dist / max_len) if max_len > 0 else 1.0
        similarities.append(similarity)
    return sum(similarities) / len(similarities)

ls_result = compute_levenshtein_similarity(predictions, references)

# Evaluation metric 3: BLEU Score
def compute_bleu_score(predictions, references):
    bleu_scores = []
    smoothing = SmoothingFunction().method1

    for pred, ref in zip(predictions, references):
        pred_tokens = pred.strip().split()
        ref_tokens = ref.strip().split()

        # Calculate BLEU score for this pair
        if len(ref_tokens) > 0:
            bleu = sentence_bleu([ref_tokens], pred_tokens, smoothing_function=smoothing)
            bleu_scores.append(bleu)
        else:
            bleu_scores.append(0.0)

    return sum(bleu_scores) / len(bleu_scores)

bleu_result = compute_bleu_score(predictions, references)

# Evaluation metric 4: ROUGE Scores
def compute_rouge_scores(predictions, references):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    rouge1_scores = []
    rougeL_scores = []

    for pred, ref in zip(predictions, references):
        scores = scorer.score(ref.strip(), pred.strip())
        rouge1_scores.append(scores['rouge1'].fmeasure)
        rougeL_scores.append(scores['rougeL'].fmeasure)

    return {
        'rouge1': sum(rouge1_scores) / len(rouge1_scores),
        'rougeL': sum(rougeL_scores) / len(rougeL_scores)
    }

rouge_results = compute_rouge_scores(predictions, references)

# Display all results
results = pd.DataFrame([{
    "Model": "t5-small-finetuned",
    "Exact Match": em_result,
    "Levenshtein Similarity": ls_result,
    "BLEU Score": bleu_result,
    "ROUGE-1": rouge_results['rouge1'],
    "ROUGE-L": rouge_results['rougeL']
}])
print(results)

Downloading builder script: 0.00B [00:00, ?B/s]

                Model  Exact Match  Levenshtein Similarity  BLEU Score  \
0  t5-small-finetuned     0.932203                0.992464      0.3751   

    ROUGE-1   ROUGE-L  
0  0.980957  0.980957  


In [None]:
df = pd.DataFrame()
df['predictions'] = predictions
df['references'] = references

In [None]:
df

Unnamed: 0,predictions,references
0,"AddMarker('Sydney Opera House', [-33.8568, 151...","AddMarker('Sydney Opera House', [-33.8568, 151..."
1,"AddVector('polyline', 'polylines_monorails_Tok...","AddVector('polyline', 'polylines_monorails_Tok..."
2,ZoomIn(6),ZoomIn(6)
3,ZoomOut(2),ZoomOut(2)
4,"MoveToExtent(40.7128, -74.0060, 34.0522, -118....","MoveToExtent(40.7128, -74.0060, 34.0522, -118...."
...,...,...
172,"Cartography('fill', 'lightgray', [2])","Cartography('fill', 'lightgray', [2])"
173,"MoveToExtent(-8.5684, -87.5863, 29.2017, -31.4...","MoveToExtent(-8.5684, -87.5863, 29.2017, -31.4..."
174,"AddMarker('Park', [-83.4835, 41.8363])","AddMarker('Park', [-83.4835, 41.8363])"
175,"AddVector('polyline', 'polylines_canals_Venice...","AddVector('polyline', 'polylines_canals_Venice..."


In [None]:
!python -m transformers.onnx --model=t5_finetuned onnx/ --opset 17

2025-07-21 10:46:24.020719: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753094784.045670   10453 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753094784.052942   10453 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-21 10:46:24.077113: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88

In [None]:
model.save_pretrained("t5_finetuned")
tokenizer.save_pretrained("t5_finetuned")

('t5_finetuned/tokenizer_config.json',
 't5_finetuned/special_tokens_map.json',
 't5_finetuned/spiece.model',
 't5_finetuned/added_tokens.json',
 't5_finetuned/tokenizer.json')

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers.onnx import export
from transformers.onnx.features import FeaturesManager
from pathlib import Path

# Load fine-tuned model and tokenizer
model_path = "t5_finetuned"
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Set ONNX export path and feature
onnx_path = Path("t5_finetuned.onnx")
feature = "seq2seq-lm"

# Get ONNX export configuration
model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(model, feature=feature)
onnx_config = model_onnx_config(model.config)

# ✅ Export with a higher opset (>=14 to support `aten::triu`)
export(
    preprocessor=tokenizer,
    model=model,
    config=onnx_config,
    opset=17,
    output=onnx_path
)


  if sequence_length != 1:


(['input_ids',
  'attention_mask',
  'decoder_input_ids',
  'decoder_attention_mask'],
 ['logits'])

In [None]:
from google.colab import files
files.download("t5_finetuned.onnx")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import files
files.download("t5_finetuned/tokenizer.json")