In [1]:
# %pip install transformers

In [3]:
# %pip install datasets

In [1]:
# %pip install accelerate

In [2]:
# %pip install sentencepiece

In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset

In [2]:
# directory where to cache models
cache_dir = '/home/jupyter/data/transformers'

## Pre-Trained Model Examples

In [4]:
def generate(prompt, max_new_tokens=100):
    
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
    
    result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    
    return "\n".join(result)

In [5]:
# one of the available flan-t5 models
checkpoint = "google/flan-t5-base"

In [6]:
# loading model and tokenizer based on particular model checkpoint
# tokenizer is used to preprocess text input in a way that the model can understand
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, cache_dir=cache_dir)

In [8]:
prompt = "translate English to German: How old are you?"

response = generate(prompt)
response

'Wie old sind Sie?'

In [9]:
prompt = "summarize: The pizza at Roberts Pizza & Dough company is great, although expensive. I'd recommended the pepporoni."

response = generate(prompt, max_new_tokens=10)
response

'Great pizza.'

In [10]:
prompt = "summarize: The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 10295.35 is added to the Public Contract Code, to read:\n10295.35.\n(a) (1) Notwithstanding any other law, a state agency shall not enter into any contract for the acquisition of goods or services in the amount of one hundred thousand dollars ($100,000) or more with a contractor that, in the provision of benefits, discriminates between employees on the basis of an employee’s or dependent’s actual or perceived gender identity, including, but not limited to, the employee’s or dependent’s identification as transgender.\n(2) For purposes of this section, “contract” includes contracts with a cumulative amount of one hundred thousand dollars ($100,000) or more per contractor in each fiscal year.\n(3) For purposes of this section, an employee health plan is discriminatory if the plan is not consistent with Section 1365.5 of the Health and Safety Code and Section 10140 of the Insurance Code.\n(4) The requirements of this section shall apply only to those portions of a contractor’s operations that occur under any of the following conditions:\n(A) Within the state.\n(B) On real property outside the state if the property is owned by the state or if the state has a right to occupy the property, and if the contractor’s presence at that location is connected to a contract with the state.\n(C) Elsewhere in the United States where work related to a state contract is being performed.\n(b) Contractors shall treat as confidential, to the maximum extent allowed by law or by the requirement of the contractor’s insurance provider, any request by an employee or applicant for employment benefits or any documentation of eligibility for benefits submitted by an employee or applicant for employment.\n(c) After taking all reasonable measures to find a contractor that complies with this section, as determined by the state agency, the requirements of this section may be waived under any of the following circumstances:\n(1) There is only one prospective contractor willing to enter into a specific contract with the state agency."

response = generate(prompt, max_new_tokens=100)
response

'SECTION 1. Section 10295.35 is added to the Public Contract Code, to read: 10295.35.'

In [None]:
prompt = "A step by step recipe to make bolognese pasta do not repeat the steps:"

response = generate(prompt, max_new_tokens=100)
response

In [12]:
prompt = (
    "Question: What class do I TA for in the past? "
    "Context: My name is Ignas and I currently a TA NLP and Conversation AI but have also TA'd Optimization and Simulation Methods in the past"
)

response = generate(prompt, max_new_tokens=100)
response

'NLP and Conversation AI'