In [22]:
%pip install -Uqqq pip --progress-bar off
%pip install -qqq runpod==0.10.0 --progress-bar off
%pip install -qqq text-generation==0.6.0 --progress-bar off
%pip install -qqq requests==2.31.0 --progress-bar off

In [23]:
import requests
import runpod
from text_generation import Client

In [53]:
import runpod
# Load .env variables
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

HF_HUB_TOKEN = os.getenv("HF_HUB_TOKEN", "hf-some-token")
RUNPOD_API_KEY = os.getenv("RUNPOD_API_KEY", "add-here-if-not-set-in-env-file")
 
assert HF_HUB_TOKEN.startswith("hf_"), "This doesn't look like a valid Hugging Face Token"
print("HF_HUB_TOKEN: " + HF_HUB_TOKEN[0:6])
assert not RUNPOD_API_KEY.startswith("add-here"), "This doesn't look like a valid Runpod API Key"
runpod.api_key = RUNPOD_API_KEY 
print("RUNPOD_API_KEY: " + runpod.api_key[0:6])

HF_HUB_TOKEN: hf_ENv
RUNPOD_API_KEY: 9MJ87C


In [52]:
gpu_count = 1

envs = {"HUGGING_FACE_HUB_TOKEN":HF_HUB_TOKEN}

    # data_center_id="EU-RO-1",
    # data_center_id="US-KS-1",
    # docker_args="--model-id TheBloke/Llama-2-7b-chat-fp16 --env HUGGING_FACE_HUB_TOKEN=hf_foo",
pod = runpod.create_pod(
    name="Llama-7b-sample",
    image_name="ghcr.io/huggingface/text-generation-inference:0.9.4",
    gpu_type_id="NVIDIA RTX A4500",
    cloud_type="SECURE",
    data_center_id="EU-RO-1",
    docker_args="--model-id TheBloke/Llama-2-7b-chat-fp16",
    gpu_count=gpu_count,
    volume_in_gb=50,
    container_disk_in_gb=5,
    ports="80/http,29500/http",
    volume_mount_path="/data",
    env=envs,
)


In [39]:
SERVER_URL = f'https://{pod["id"]}-80.proxy.runpod.net'
print(SERVER_URL)

https://i4fu3rms6p2hoa-80.proxy.runpod.net


In [40]:
print(f"Docs (Swagger UI) URL: {SERVER_URL}/docs")

Docs (Swagger UI) URL: https://i4fu3rms6p2hoa-80.proxy.runpod.net/docs


In [41]:
DEFAULT_SYSTEM_PROMPT = """
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
""".strip()


def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
    return f"""
[INST] <<SYS>>
{system_prompt}
<</SYS>>

{prompt} [/INST]
""".strip()

## API

In [42]:
def make_request(prompt: str):
    data = {
        "inputs": prompt,
        "parameters": {"best_of": 1, "temperature": 0.01, "max_new_tokens": 512},
    }
    headers = {"Content-Type": "application/json"}

    return requests.post(f"{SERVER_URL}/generate", json=data, headers=headers)

In [43]:
%%time
prompt = generate_prompt(
    "Write an email to a new client to offer a subscription for a paper supply for 1 year."
)
response = make_request(prompt)

CPU times: user 19.1 ms, sys: 3.45 ms, total: 22.6 ms
Wall time: 11.2 s


In [44]:
response.status_code

200

In [45]:
print(response.json()["generated_text"].strip())

Subject: Welcome to [Company Name] - Paper Supply Subscription Offer
Dear [Client Name],
We are thrilled to welcome you to [Company Name], and we hope you're doing well! As a valued client, we're excited to offer you a special subscription deal for a year's supply of high-quality paper products.
Our paper supply subscription service is designed to provide you with a convenient and cost-effective way to stock up on the paper products you need, without any hassle or waste. With our subscription, you'll receive a regular shipment of paper products, tailored to your specific needs and preferences.
Here's what you can expect with our subscription service:
* A wide range of paper products, including A4, A3, A2, A1, and custom sizes
* High-quality, durable paper that's perfect for printing, writing, and crafting
* Regular shipments every [insert time frame, e.g., monthly, quarterly, etc.]
* Flexible subscription plans to suit your needs and budget
* Easy online management and tracking of your

In [None]:
DWIGHT_SYSTEM_PROMPT = """
You're a salesman and beet farmer know as Dwight K Schrute from the TV show The Office. Dwgight replies just as he would in the show.
You always reply as Dwight would reply. If you don't know the answer to a question, please don't share false information. Always format your responses using markdown.
""".strip()

In [None]:
%%time
prompt = generate_prompt(
    "Write an email to a new client to offer a subscription for a paper supply for 1 year.",
    system_prompt=DWIGHT_SYSTEM_PROMPT,
)
response = make_request(prompt)

In [None]:
print(response.json()["generated_text"].strip())

## Client

In [None]:
client = Client(SERVER_URL, timeout=60)

In [None]:
%%time
response = client.generate(prompt, max_new_tokens=512).generated_text

In [None]:
print(response.strip())

In [None]:
text = ""
for response in client.generate_stream(prompt, max_new_tokens=512):
    if not response.token.special:
        new_text = response.token.text
        print(new_text, end="")
        text += new_text

In [None]:
runpod.terminate_pod(pod["id"])

## References

- https://www.runpod.io/console/gpu-secure-cloud
- https://docs.runpod.io/docs/get-gpu-types
- https://github.com/facebookresearch/llama