In [1]:
!pip install transformers langchain google-search-results --quiet

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM


model_name = "cerebras/Cerebras-GPT-2.7B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

Downloading (…)lve/main/config.json:   0%|          | 0.00/361 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/10.7G [00:00<?, ?B/s]

In [3]:
from transformers import pipeline


pipe = pipeline(
    "text-generation", model=model, tokenizer=tokenizer,
    max_new_tokens=100, early_stopping=True, no_repeat_ngram_size=2
)

In [4]:
from langchain.llms import HuggingFacePipeline


llm = HuggingFacePipeline(pipeline=pipe)

In [5]:
from langchain import PromptTemplate
from langchain import LLMChain


template = """
{input}
"""

prompt = PromptTemplate(
    input_variables=["input"],
    template=template,
)

chain = LLMChain(
    llm=llm,
    verbose=True,
    prompt=prompt
)

In [6]:
response = chain.run("""When I opened the door, I saw a""")
print(response)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
When I opened the door, I saw a
[0m

[1m> Finished chain.[0m

woman in a white dress,
with a black veil over her face.
She was holding a baby in her arms.


In [7]:
template = """
Given a tweet:
---
{input}
---
The topic of the tweet is:
"""

prompt = PromptTemplate(
    input_variables=["input"],
    template=template,
)

chain = LLMChain(
    llm=llm,
    verbose=True,
    prompt=prompt
)

In [8]:
response = chain.run("""After writing the same client code in two languages:
Perhaps, SDKs should be thin clients for a backend SDK service that dispatches the requests to actual backend services.""")
print(response)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Given a tweet:
---
After writing the same client code in two languages:
Perhaps, SDKs should be thin clients for a backend SDK service that dispatches the requests to actual backend services.
---
The topic of the tweet is:
[0m

[1m> Finished chain.[0m

* The client should not be aware of any backend service. It should only be able to send requests. The backend should dispatch the request to the actual service, which should then dispatch it to a real backend. This is a thin client. 
   * The thin-client should have no knowledge of how the backend works. All it knows is that it should send a request. That's it. No knowledge about how it works, no need to know how to use it, etc
