In [None]:
!pip install transformers

In [None]:
! pip install sentencepiece

In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [None]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small', return_dict=True)

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

# 1. Text Summarization

In [None]:
one_piece_sequence = ("The series focuses on Monkey D. Luffy, a young man made of rubber, who, inspired by his childhood idol,"
             "the powerful pirate Red-Haired Shanks, sets off on a journey from the East Blue Sea to find the mythical treasure,"
             "the One Piece, and proclaim himself the King of the Pirates. In an effort to organize his own crew, the Straw Hat Pirates,"
             "Luffy rescues and befriends a pirate hunter and swordsman named Roronoa Zoro, and they head off in search of the "
             "titular treasure. They are joined in their journey by Nami, a money-obsessed thief and navigator; Usopp, a sniper "
             "and compulsive liar; and Sanji, a perverted but chivalrous cook. They acquire a ship, the Going Merry, and engage in confrontations"
             "with notorious pirates of the East Blue. As Luffy and his crew set out on their adventures, others join the crew later in the series, "
             "including Tony Tony Chopper, an anthropomorphized reindeer doctor; Nico Robin, an archaeologist and former Baroque Works assassin; "
             "Franky, a cyborg shipwright; Brook, a skeleton musician and swordsman; and Jimbei, a fish-man helmsman and former member of the Seven "
             "Warlords of the Sea. Once the Going Merry is damaged beyond repair, Franky builds the Straw Hat Pirates a new ship, the Thousand Sunny,"
             "Together, they encounter other pirates, bounty hunters, criminal organizations, revolutionaries, secret agents, and soldiers of the"
             "corrupt World Government, and various other friends and foes, as they sail the seas in pursuit of their dreams.")

In [None]:
inputs = tokenizer.encode("summarize: " + one_piece_sequence,
                          return_tensors='pt',
                          max_length=512,
                          truncation=True)

In [None]:
summarization_ids = model.generate(inputs, max_length=80, min_length=40, length_penalty=5., num_beams=2)

In [None]:
summarization = tokenizer.decode(summarization_ids[0])

In [None]:
summarization

'<pad>the Straw Hat Pirates befriends a pirate hunter and swordsman named Roronoa Zoro. they are joined in their journey by Nami, a money-obsessed thief and navigator. others join the crew later in the series, including Tony Tony Chopper, an anthropomorphized reinde'

# 2. Language Translation

In [None]:
language_sequence = ("You should definitely watch 'One Piece', it is so good, you will love the comic book")

In [None]:
input_ids = tokenizer("translate English to French: "+language_sequence, return_tensors="pt").input_ids

In [None]:
language_ids = model.generate(input_ids)



In [None]:
language_translation = tokenizer.decode(language_ids[0], skip_special_tokens=True)

In [None]:
language_translation

"Vous devriez regarder 'One Piece', c'est si bon"

# 3. Text Classification

In [None]:
entailment_premise = ("I love One Piece.")
entailment_hypothesis = ("My feelings towards One Piece is filled with love")

In [None]:
input_ids = tokenizer("mnli premise: "+entailment_premise+" hypothesis: "+entailment_hypothesis, return_tensors="pt").input_ids

In [None]:
entailment_ids = model.generate(input_ids)

In [None]:
entailment = tokenizer.decode(entailment_ids[0], skip_special_tokens=True)

In [None]:
entailment

'entailment'

In [None]:
entailment_premise_1 = ("In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced.")
entailment_hypothesis_1 = ("The sky is blue due to the shorter wavelength of blue light.")

In [None]:
input_ids_1 = tokenizer("mnli premise: "+entailment_premise_1+" hypothesis: "+entailment_hypothesis_1, return_tensors="pt").input_ids

In [None]:
entailment_ids_1 = model.generate(input_ids_1)



In [None]:
entailment_1 = tokenizer.decode(entailment_ids_1[0],skip_special_tokens=True)
print(entailment_1)

contradiction


# 4. Sentence Similarity

In [None]:
stsb_sentence_1 = ("Luffy was fighting in the war.")
stsb_sentence_2 = ("Luffy's fighting style is comical.")

In [None]:
input_ids = tokenizer("stsb sentence 1: "+stsb_sentence_1+" sentence 2: "+stsb_sentence_2, return_tensors="pt").input_ids

In [None]:
stsb_ids = model.generate(input_ids)



In [None]:
stsb = tokenizer.decode(stsb_ids[0], skip_special_tokens=True)

In [None]:
stsb

'4.0'