<a href="https://colab.research.google.com/github/kxaercastro/Aether_Final_Project/blob/main/Aether_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# ============================================================
# 1. Install Dependencies
# ============================================================
!pip install datasets transformers
!pip install evaluate
!pip install rouge_score

from transformers import (
    Trainer,
    TrainingArguments,
    AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq
)

from datasets import load_dataset
from transformers import (
    AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq,
    TrainingArguments, Trainer
)

import evaluate
import torch
import numpy as np
import pandas as pd
import nltk
nltk.download("punkt")
import time
from scipy.stats import uniform
from transformers import T5ForConditionalGeneration, T5Tokenizer, TrainingArguments, Trainer, DataCollatorForSeq2Seq
import os
from random import choice
from evaluate import load as load_metric
from datasets import load_dataset

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=f5649e2778b21f6277d5365c8ecfcdb8d8e90a31ed957f2957127ea4eb389fa2
  Stored in directory: /root/.cache/pip/wheels/85/9d/af/01feefbe7d55ef5468796f0c68225b6788e85d9d0a281e7a70
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [6]:
# ============================================================
# 2. Load Dataset (BEA-19 Subset)
# ============================================================
dataset = load_dataset("juancavallotti/bea-19-corruption")

# ✅ Use only small subset to avoid high memory use
train_data = dataset["train"].select(range(1000))
eval_data = dataset["train"].select(range(200, 250))

print(f"Train size: {len(train_data)} | Eval size: {len(eval_data)}")

Repo card metadata block was not found. Setting CardData to empty.


Train size: 1000 | Eval size: 50


In [7]:
# ============================================================
# 3. Load (T5)
# ============================================================
model_name = "vennify/t5-base-grammar-correction"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


In [None]:
def tokenize_function(examples):
    tokenized_inputs = tokenizer(examples['sentence'], truncation=True, padding='max_length', max_length=256)
    tokenized_labels = tokenizer(examples['broken'], truncation=True, padding='max_length', max_length=256)
    tokenized_inputs['labels'] = tokenized_labels['input_ids']
    return tokenized_inputs

In [13]:
tokenized_train_data = train_data.map(tokenize_function, batched=True)
tokenized_eval_data = eval_data.map(tokenize_function, batched=True)

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [8]:
# ============================================================
# 5. Evaluation Metrics (BLEU + ROUGE)
# ============================================================
bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.where(predictions != -100, predictions, tokenizer.pad_token_id)
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    bleu_score = bleu.compute(predictions=decoded_preds, references=[[l] for l in decoded_labels])["score"]
    rouge_score = rouge.compute(predictions=decoded_preds, references=decoded_labels)

    return {
        "bleu": round(bleu_score, 2),
        "rouge1": round(rouge_score["rouge1"], 2),
        "rougeL": round(rouge_score["rougeL"], 2)
    }

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [9]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU.")

Using GPU: Tesla T4


In [10]:
log_columns = [
    'learning_rate', 'batch_size', 'num_train_epochs',
    'gradient_accumulation_steps',
    'max_grad_norm',
    'adam_epsilon',
    'warmup_steps',
    'weight_decay',
    'label_smoothing',

    # METRICS
    'eval_loss', 'bleu', 'rouge', 'time_taken'
]

experiment_log = pd.DataFrame(columns=log_columns)


In [11]:
param_dist = {
    'learning_rate': uniform(loc=1e-5, scale=1e-4 - 1e-5),      # Learning rate between 1e-5 and 1e-4
    'batch_size': [1, 2, 4, 8],                                 # Batch sizes to try
    'num_train_epochs': [2, 3, 4],                              # Number of epochs
    'gradient_accumulation_steps': [1, 2, 4, 8],                # Accumulate gradients
    'max_grad_norm': uniform(loc=0.5, scale=1.5),               # Gradient clipping between 0.5 and 2.0
    'adam_epsilon': uniform(loc=1e-8, scale=1e-7 - 1e-8),       # Epsilon between 1e-8 and 1e-7
    'warmup_steps': [0, 300, 600],                              # Warmup steps
    'weight_decay': uniform(loc=0.0, scale=0.1),                # Weight decay between 0.0 and 0.1
    'label_smoothing': uniform(loc=0.0, scale=0.1),             # Label smoothing between 0.0 and 0.1
}


In [14]:
training_args = TrainingArguments(
    output_dir='./results',
    eval_strategy="epoch",
    logging_dir='./logs',
    logging_steps=500,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    report_to="none",
)

# Trainer Setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_data,
    eval_dataset=tokenized_eval_data,
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(
        tokenizer=tokenizer,
        model=model,
        padding=True,
        return_tensors="pt"
    )
)

  trainer = Trainer(


In [15]:
best_score = np.inf
best_params = {}

In [16]:
model_save_path = './best_model'
os.makedirs(model_save_path, exist_ok=True)

In [None]:
for _ in range(10):  # 10 trials for random search
    # Randomly sample hyperparameters
    current_params = {
        'learning_rate': choice(param_dist['learning_rate'].rvs(size=1)),
        'batch_size': choice(param_dist['batch_size']),
        'num_train_epochs': choice(param_dist['num_train_epochs']),
        'gradient_accumulation_steps': choice(param_dist['gradient_accumulation_steps']),
        'max_grad_norm': choice(param_dist['max_grad_norm'].rvs(size=1)),
        'adam_epsilon': choice(param_dist['adam_epsilon'].rvs(size=1)),
        'warmup_steps': choice(param_dist['warmup_steps']),
        'weight_decay': choice(param_dist['weight_decay'].rvs(size=1)),
        'label_smoothing': choice(param_dist['label_smoothing'].rvs(size=1)),
    }

    # Training Arguments
    training_args = TrainingArguments(
        output_dir='./results',
        eval_strategy="epoch",
        logging_dir='./logs',
        logging_steps=500,

        num_train_epochs=current_params['num_train_epochs'],
        per_device_train_batch_size=current_params['batch_size'],
        per_device_eval_batch_size=current_params['batch_size'],
        gradient_accumulation_steps=current_params['gradient_accumulation_steps'],

        max_grad_norm=current_params['max_grad_norm'],
        adam_epsilon=current_params['adam_epsilon'],
        warmup_steps=current_params['warmup_steps'],
        weight_decay=current_params['weight_decay'],
        label_smoothing_factor=current_params['label_smoothing'],

        report_to="none",
    )

    # Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train_data,
        eval_dataset=tokenized_eval_data,
        data_collator=DataCollatorForSeq2Seq(tokenizer, model=model)
    )

    # Measure time for training and evaluation
    start_time = time.time()
    trainer.train()
    eval_results = trainer.evaluate()

    # Get predictions for metrics
    predictions_output = trainer.predict(tokenized_eval_data)
    predictions = predictions_output.predictions[0] if isinstance(predictions_output.predictions, tuple) else predictions_output.predictions
    predicted_token_ids = np.argmax(predictions, axis=-1)
    decoded_preds = tokenizer.batch_decode(predicted_token_ids, skip_special_tokens=True)

    labels = eval_data['broken']
    references_for_metrics = [[label] for label in labels]

    bleu_result = bleu.compute(predictions=decoded_preds, references=references_for_metrics)
    bleu_score = bleu_result['bleu'] if bleu_result and 'bleu' in bleu_result else 0.0

    rouge_result = rouge.compute(predictions=decoded_preds, references=references_for_metrics)
    rouge_score = rouge_result

    eval_loss = eval_results['eval_loss']
    time_taken = time.time() - start_time

    new_row_df = pd.DataFrame([
        {
            'learning_rate': current_params['learning_rate'],
            'batch_size': current_params['batch_size'],
            'num_train_epochs': current_params['num_train_epochs'],
            'gradient_accumulation_steps': current_params['gradient_accumulation_steps'],
            'max_grad_norm': current_params['max_grad_norm'],
            'adam_epsilon': current_params['adam_epsilon'],
            'warmup_steps': current_params['warmup_steps'],
            'weight_decay': current_params['weight_decay'],
            'label_smoothing': current_params['label_smoothing'],

            'eval_loss': eval_loss,
            'bleu': bleu_score,
            'rouge': rouge_score,
            'time_taken': time_taken
        }
    ])

    experiment_log = pd.concat([experiment_log, new_row_df], ignore_index=True)

    if eval_loss < best_score:
        best_score = eval_loss
        trainer.save_model(model_save_path)


Epoch,Training Loss,Validation Loss
1,No log,0.312978
2,0.858300,0.295219
3,0.858300,0.287362
4,0.314400,0.286188


  experiment_log = pd.concat([experiment_log, new_row_df], ignore_index=True)


Epoch,Training Loss,Validation Loss
1,No log,0.275579
2,0.296100,0.27215
3,0.296100,0.268592
4,0.285900,0.26847


Epoch,Training Loss,Validation Loss
1,No log,0.26465
2,0.275000,0.263263
3,0.275000,0.260381
4,0.272700,0.260292


Epoch,Training Loss,Validation Loss
1,No log,0.261308
2,0.264400,0.258689
3,0.264400,0.256638
4,0.265100,0.256578


Epoch,Training Loss,Validation Loss
1,No log,0.257478
2,0.259600,0.256111
3,0.259600,0.254673
4,0.260900,0.254373


Epoch,Training Loss,Validation Loss
1,No log,0.255704
2,0.257300,0.255213
3,0.257300,0.254111
4,0.258600,0.253266


Epoch,Training Loss,Validation Loss
1,No log,0.254534
2,0.255900,0.253575
3,0.255900,0.253013
4,0.257200,0.252276


Epoch,Training Loss,Validation Loss
1,No log,0.253347
2,0.255000,0.252524
3,0.255000,0.25188
4,0.256200,0.251672


Epoch,Training Loss,Validation Loss
1,No log,0.253672
2,0.254400,0.252879
3,0.254400,0.251495
4,0.255500,0.251234


Epoch,Training Loss,Validation Loss
1,No log,0.252719
2,0.254100,0.253383
3,0.254100,0.251541
4,0.255100,0.251007


In [None]:
# Save the log to Excel
excel_file_path = 'aether_hyperparameter_tuning_log.xlsx'
experiment_log.to_excel(excel_file_path, index=False)

In [None]:
if not experiment_log.empty:
    best_run = experiment_log.loc[experiment_log['eval_loss'].idxmin()]
    print(f"Best Hyperparameters: {best_run}")
else:
    print("Experiment log is empty. Please run the hyperparameter tuning loop first to populate the log.")

Best Hyperparameters: learning_rate                                                           0.000044
batch_size                                                                     1
num_train_epochs                                                               4
gradient_accumulation_steps                                                    4
max_grad_norm                                                           1.926071
adam_epsilon                                                                 0.0
warmup_steps                                                                   0
weight_decay                                                            0.059866
label_smoothing                                                         0.015602
eval_loss                                                               0.251007
bleu                                                                    0.981132
rouge                          {'rouge1': 0.9886173474091828, 'rouge2': 0.976...
time_t

In [None]:
from google.colab import files
files.download(excel_file_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
data = {
    'sentence': [
        "I like play soccer.",
        "She are going to the store.",
        "He dont know the answer.",
        "It raining outside.",
        "They was happy about the news.",
        "She can sings very well.",
        "The dog chased it tail.",
        "We was waiting for the bus.",
        "My mother is a doctor she works hard.",
        "I did not done my homework."
    ],
    'broken': [
        "I like to play soccer.",
        "She is going to the store.",
        "He doesn't know the answer.",
        "It is raining outside.",
        "They were happy about the news.",
        "She can sing very well.",
        "The dog chased its tail.",
        "We were waiting for the bus.",
        "My mother is a doctor; she works hard.",
        "I did not do my homework."
    ]
}

In [None]:
from datasets import Dataset

custom_dataset = Dataset.from_dict(data)

model_save_path = './best_model'
model = T5ForConditionalGeneration.from_pretrained(model_save_path)
tokenizer = T5Tokenizer.from_pretrained(model_save_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def tokenize_function(examples):
    tokenized_inputs = tokenizer(examples['sentence'], truncation=True, padding='max_length', max_length=256)
    tokenized_labels = tokenizer(examples['broken'], truncation=True, padding='max_length', max_length=256)
    tokenized_inputs['labels'] = tokenized_labels['input_ids']
    return tokenized_inputs

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [None]:
custom_tokenized = custom_dataset.map(tokenize_function, batched=True)

inputs = custom_tokenized['sentence']
predictions = []

for sentence in inputs:
    input_ids = tokenizer.encode(sentence, return_tensors="pt").to(device)

    output = model.generate(input_ids, max_length=256, num_beams=5, early_stopping=True)

    prediction = tokenizer.decode(output[0], skip_special_tokens=True)
    predictions.append(prediction)

gold_standard = custom_tokenized['broken']

# Use the 'bleu' and 'rouge' objects loaded from 'evaluate' in cell RbviGiLWEtd4
# The variables 'bleu_score' and 'rouge_score' from the tuning loop are floats/dicts, not the metric objects.
test_bleu_result = bleu.compute(predictions=predictions, references=[[g] for g in gold_standard])
test_bleu = test_bleu_result['bleu']

test_rouge_result = rouge.compute(predictions=predictions, references=[[g] for g in gold_standard])
test_rouge = test_rouge_result

print(f"Test BLEU: {test_bleu}")
print(f"Test ROUGE: {test_rouge}")
print("-" * 50)
print("-" * 50)

for i, sentence in enumerate(inputs):
    print(f"Input Sentence (Broken): {sentence}")
    print(f"Model Output: {predictions[i]}")
    print(f"Gold Standard (Corrected): {gold_standard[i]}")
    print("-" * 50)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Test BLEU: 0.5859493876324035
Test ROUGE: {'rouge1': np.float64(0.8590476190476191), 'rouge2': np.float64(0.6287301587301587), 'rougeL': np.float64(0.8606637806637807), 'rougeLsum': np.float64(0.8601587301587301)}
--------------------------------------------------
--------------------------------------------------
Input Sentence (Broken): I like play soccer.
Model Output: I like play soccer.
Gold Standard (Corrected): I like to play soccer.
--------------------------------------------------
Input Sentence (Broken): She are going to the store.
Model Output: She are going to the store.
Gold Standard (Corrected): She is going to the store.
--------------------------------------------------
Input Sentence (Broken): He dont know the answer.
Model Output: He dont know the answer.
Gold Standard (Corrected): He doesn't know the answer.
--------------------------------------------------
Input Sentence (Broken): It raining outside.
Model Output: It raining outside.
Gold Standard (Corrected): It 

# Task
Retrain the model using the identified best hyperparameters, then re-evaluate its performance on the custom dataset by re-calculating BLEU and ROUGE scores to confirm improved output quality against the gold standard.

## Retrain Model with Best Hyperparameters

### Subtask:
Reload the original model and tokenizer, then configure a new `TrainingArguments` instance using the `best_run` hyperparameters identified during the random search. Create a new `Trainer` instance with this configuration and train the model on the full training dataset. Finally, save this newly trained model to the `best_model` path.


**Reasoning**:
The subtask requires retraining the model with the best hyperparameters identified during the random search. This involves reloading the original model and tokenizer, configuring new training arguments based on the best run, initializing a new trainer, training the model, and then saving the retrained model.



In [17]:
print("Reloading original model and tokenizer...")
model_name = "vennify/t5-base-grammar-correction"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

print("Configuring TrainingArguments with best hyperparameters...")
# Create new TrainingArguments with best_run hyperparameters
training_args_best = TrainingArguments(
    output_dir='./results_best_run',
    eval_strategy="no", # No evaluation during this final training run
    logging_dir='./logs_best_run',
    logging_steps=500,
    report_to="none",
    save_strategy="no", # Only save at the end explicitly

    num_train_epochs=best_run['num_train_epochs'],
    per_device_train_batch_size=best_run['batch_size'],
    gradient_accumulation_steps=best_run['gradient_accumulation_steps'],
    learning_rate=best_run['learning_rate'],
    max_grad_norm=best_run['max_grad_norm'],
    adam_epsilon=best_run['adam_epsilon'],
    warmup_steps=best_run['warmup_steps'],
    weight_decay=best_run['weight_decay'],
    label_smoothing_factor=best_run['label_smoothing'],
)

print("Initializing Trainer for final training...")
# Initialize a new Trainer with the reloaded model and best training arguments
trainer_best = Trainer(
    model=model,
    args=training_args_best,
    train_dataset=tokenized_train_data, # Use the full tokenized_train_data
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model),
)

print("Starting final model training with best hyperparameters...")
trainer_best.train()

print(f"Saving the retrained model to {model_save_path}...")
# Save the retrained model
trainer_best.save_model(model_save_path)

print("Model retraining and saving complete.")

Reloading original model and tokenizer...
Configuring TrainingArguments with best hyperparameters...
Initializing Trainer for final training...


  trainer_best = Trainer(


Starting final model training with best hyperparameters...


Step,Training Loss
500,2.6702


Saving the retrained model to ./best_model...
Model retraining and saving complete.


# Task
Re-evaluate the performance of the retrained model on the custom dataset by generating predictions, calculating the BLEU and ROUGE scores, and printing a comparison of input, model output, and gold standard sentences.

## Re-evaluate Model Performance

### Subtask:
Load the retrained model, generate predictions on the custom dataset, calculate BLEU and ROUGE scores, and print a comparison of input, model output, and gold standard sentences.


**Reasoning**:
The model has been retrained with the best hyperparameters and saved. Now, I need to reload this retrained model and its tokenizer, then re-evaluate its performance on the custom dataset by generating predictions and calculating BLEU and ROUGE scores, as per the subtask instructions.



**Reasoning**:
The previous cell failed because `custom_dataset` was not defined. I need to re-initialize `custom_dataset` and `data` (which `custom_dataset` depends on) before using it. I will include the code from previous cells that defines `data` and `custom_dataset` to ensure they are available.



In [24]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

model_save_path = './best_model' # This is the path where your retrained model is saved

tokenizer = T5Tokenizer.from_pretrained(model_save_path)
model = T5ForConditionalGeneration.from_pretrained(model_save_path)

print("Model and tokenizer loaded from local path successfully.")

Model and tokenizer loaded from local path successfully.


In [19]:
from datasets import Dataset

# Re-define 'data' and 'custom_dataset' as they might have been lost in the kernel state
data = {
    'sentence': [
        "I like play soccer.",
        "She are going to the store.",
        "He dont know the answer.",
        "It raining outside.",
        "They was happy about the news.",
        "She can sings very well.",
        "The dog chased it tail.",
        "We was waiting for the bus.",
        "My mother is a doctor she works hard.",
        "I did not done my homework."
    ],
    'broken': [
        "I like to play soccer.",
        "She is going to the store.",
        "He doesn't know the answer.",
        "It is raining outside.",
        "They were happy about the news.",
        "She can sing very well.",
        "The dog chased its tail.",
        "We were waiting for the bus.",
        "My mother is a doctor; she works hard.",
        "I did not do my homework."
    ]
}
custom_dataset = Dataset.from_dict(data)


print(f"Loading retrained model and tokenizer from {model_save_path}...")
model = T5ForConditionalGeneration.from_pretrained(model_save_path)
tokenizer = T5Tokenizer.from_pretrained(model_save_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print("Generating predictions on the custom dataset...")
custom_tokenized = custom_dataset.map(tokenize_function, batched=True) # Ensure custom_dataset is tokenized with the correct tokenizer

inputs = custom_tokenized['sentence']
predictions = []

for sentence in inputs:
    input_ids = tokenizer.encode(sentence, return_tensors="pt").to(device)
    output = model.generate(input_ids, max_length=256, num_beams=5, early_stopping=True)
    prediction = tokenizer.decode(output[0], skip_special_tokens=True)
    predictions.append(prediction)

gold_standard = custom_tokenized['broken']

print("Calculating BLEU and ROUGE scores...")
test_bleu_result = bleu.compute(predictions=predictions, references=[[g] for g in gold_standard])
test_bleu = test_bleu_result['bleu']

test_rouge_result = rouge.compute(predictions=predictions, references=[[g] for g in gold_standard])
test_rouge = test_rouge_result

print(f"Test BLEU: {test_bleu}")
print(f"Test ROUGE: {test_rouge}")
print("-" * 50)
print("-" * 50)

print("Comparison of Input, Model Output, and Gold Standard:")
for i, sentence in enumerate(inputs):
    print(f"Input Sentence (Broken): {sentence}")
    print(f"Model Output: {predictions[i]}")
    print(f"Gold Standard (Corrected): {gold_standard[i]}")
    print("-" * 50)

Loading retrained model and tokenizer from ./best_model...
Generating predictions on the custom dataset...


Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Calculating BLEU and ROUGE scores...
Test BLEU: 0.8517477347783053
Test ROUGE: {'rouge1': np.float64(0.9371428571428572), 'rouge2': np.float64(0.85), 'rougeL': np.float64(0.9371428571428572), 'rougeLsum': np.float64(0.9371428571428572)}
--------------------------------------------------
--------------------------------------------------
Comparison of Input, Model Output, and Gold Standard:
Input Sentence (Broken): I like play soccer.
Model Output: I like to play soccer.
Gold Standard (Corrected): I like to play soccer.
--------------------------------------------------
Input Sentence (Broken): She are going to the store.
Model Output: She is going to the store.
Gold Standard (Corrected): She is going to the store.
--------------------------------------------------
Input Sentence (Broken): He dont know the answer.
Model Output: He doesn't know the answer.
Gold Standard (Corrected): He doesn't know the answer.
--------------------------------------------------
Input Sentence (Broken): It

In [25]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Path to your retrained best model
model_path = './best_model'

tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

print("Model and tokenizer loaded successfully from local path.")

Model and tokenizer loaded successfully from local path.


In [26]:
import shutil
from google.colab import files
import os

# Define the path to your best model directory
model_directory = './best_model'

# Define the name for the zip file
zip_filename = 'best_model.zip'

# Create a zip archive of the model directory
shutil.make_archive(os.path.splitext(zip_filename)[0], 'zip', model_directory)

print(f"'{model_directory}' has been zipped to '{zip_filename}'.")

# Download the zip file
files.download(zip_filename)

print("Download initiated. Please check your browser's downloads.")

'./best_model' has been zipped to 'best_model.zip'.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Download initiated. Please check your browser's downloads.
