In [None]:
!pip install transformers datasets torch

In [2]:
from datasets import load_dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import time

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [15]:
dataset = load_dataset('wmt/wmt19', 'de-en', split='validation')

In [6]:
print(dataset['translation'][0]['en'])

Munich 1856: Four maps that will change your view of the city


In [7]:
def translate_batch(input_ids, model):
    with torch.no_grad():
        outputs = model.generate(input_ids)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

In [12]:
def calculate_inference_time(model):
    batch_size = 16
    total_inference_time = 0

    start_time = time.time()

    translations = []

    for i in range(0, len(dataset['translation']), batch_size):
        batch_input_ids = inputs.input_ids[i:i + batch_size]
        translations.extend(translate_batch(batch_input_ids, model))

    end_time = time.time()

    total_inference_time = end_time - start_time

    return total_inference_time, translations

In [13]:
large_model_name = 't5-large'
tokenizer = T5Tokenizer.from_pretrained(large_model_name)
large_model = T5ForConditionalGeneration.from_pretrained(large_model_name)
large_model = large_model.to(device)

inputs = tokenizer(["translate English to German: " + entry['en'] for entry in dataset['translation']],
                   return_tensors="pt", padding=True, truncation=True)
inputs = inputs.to(device)

total_inference_time_large, translations_large = calculate_inference_time(large_model)
print(f"Total inference time for large model: {total_inference_time_large} seconds")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Total inference time for large model: 205.51281762123108 seconds


In [14]:
for i, (original, translated) in enumerate(zip([entry['en'] for entry in dataset['translation'][:10]], translations_large[:10])):
    print(f"Original: {original} => Translated: {translated}")

Original: Munich 1856: Four maps that will change your view of the city => Translated: München 1856: Vier Karten, die Ihre Sicht auf die Stadt verändern
Original: A mental asylum, where today young people are said to meet. => Translated: Ein psychisches Asyl, wo sich heute junge Menschen treffen sollen.
Original: A crypt chapel, where they are now digging tunnels for the S-Bahn. => Translated: Eine Krypta, wo jetzt Tunnel für die S-Bahn gegraben
Original: Allotment holders cultivate the soil of former farmers. => Translated: Die Besitzer von Feldern bewirtschaften den Boden ehemaliger Bauern.
Original: The oldest official map of Munich brings captivating stories to light. => Translated: Die älteste offizielle Karte von München erfährt spannende Geschichten.
Original: It is annoying when geographical maps are not up-to-date. => Translated: Es ist ärgerlich, wenn die geographischen Karten nicht auf dem neuesten Stand
Original: Anyone who has ever got worked up because the car's sat-nav i

In [16]:
small_model_name = 't5-small'
tokenizer = T5Tokenizer.from_pretrained(small_model_name)
small_model = T5ForConditionalGeneration.from_pretrained(small_model_name)
small_model = small_model.to(device)

inputs = tokenizer(["translate English to German: " + entry['en'] for entry in dataset['translation']],
                   return_tensors="pt", padding=True, truncation=True)
inputs = inputs.to(device)

total_inference_time_small, translations_small = calculate_inference_time(small_model)
print(f"Total inference time for small model: {total_inference_time_small} seconds")

Total inference time for small model: 49.66063165664673 seconds


In [17]:
for i, (original, translated) in enumerate(zip([entry['en'] for entry in dataset['translation'][:10]], translations_small[:10])):
    print(f"Original: {original} => Translated: {translated}")

Original: Munich 1856: Four maps that will change your view of the city => Translated: München 1856: Vier Karten, die Ihren Blick auf die Stadt verändern werden
Original: A mental asylum, where today young people are said to meet. => Translated: Ein geistiges Asyl, wo sich heute junge Menschen treffen sollen.
Original: A crypt chapel, where they are now digging tunnels for the S-Bahn. => Translated: Eine Kryptkapelle, wo sie nun Tunnel für die S-Bahn gra
Original: Allotment holders cultivate the soil of former farmers. => Translated: Die Besitzer der Zucht pflanzen den Boden ehemaliger Bauern.
Original: The oldest official map of Munich brings captivating stories to light. => Translated: Die älteste offizielle Karte Münchens bringt faszinierende Geschichten ins Licht.
Original: It is annoying when geographical maps are not up-to-date. => Translated: Es ist ärgerlich, wenn geografische Karten nicht aktuell sind.
Original: Anyone who has ever got worked up because the car's sat-nav is sh

In [20]:
print(f"Inference time for large model: {total_inference_time_large} seconds")
print(f"Inference time for small model: {total_inference_time_small} seconds")
print(f"The smaller model is {total_inference_time_large / total_inference_time_small} times faster than the larger model")

Inference time for large model: 205.51281762123108 seconds
Inference time for small model: 49.66063165664673 seconds
The smaller model is 4.1383448169194725 times faster than the larger model
