In [1]:
import os
import sys
from sambanova_endpoint import SambaNovaEndpoint, SambaverseEndpoint, SambaNovaEmbeddingModel
from dotenv import load_dotenv
import json
load_dotenv("../.env")


True

# SambaStudio endpoint

## Non streaming

In [2]:

llm = SambaNovaEndpoint(
            streaming=False,
            model_kwargs={
                "do_sample": True, 
                "temperature": 0.01,
                "max_tokens_to_generate": 256,
                # "repetition_penalty": {"type": "float", "value": "1"},
                # "top_k": {"type": "int", "value": "50"},
                # "top_logprobs": {"type": "int", "value": "0"},
                # "top_p": {"type": "float", "value": "1"}
            }
        ) 


In [3]:
llm.invoke("tell me a 50 word tale")

"of a man who was once a great warrior, but is now a humble farmer.\n\nThe sun sets on the fields, a weary farmer bends to his work. Once a great warrior, he fought for his people's freedom. Now, he fights for his family's survival. His calloused hands, a testament to his past, still hold the strength of a warrior. But his heart, once filled with anger and vengeance, now holds only love and hope."

## Streaming

In [4]:
llm=SambaNovaEndpoint(
    streaming=True,
    model_kwargs={
        "do_sample": True,
        "max_tokens_to_generate": 256,
        "temperature": 0.01,
        # "repetition_penalty": {"type": "float", "value": "1"},
        # "top_k": {"type": "int", "value": "50"},
        # "top_logprobs": {"type": "int", "value": "0"},
        # "top_p": {"type": "float", "value": "1"}
        }
    )

In [5]:
for chunk in llm.stream("tell me a 50 word tale"):
    data = json.loads(json.loads(chunk)["data"])["stream_token"]
    print(data, end="", flush=True)

 of a man who was once a great warrior, but is now a humble farmer.

The sun sets on the fields, a weary farmer bends to his work. Once a great warrior, he fought for his people's freedom. Now, he fights for his family's survival. His calloused hands, a testament to his past, still hold the strength of a warrior. But his heart, once filled with anger and vengeance, now holds only love and hope.

# Sambaverse


## Non Streaming

In [6]:
llm = SambaverseEndpoint(
        sambaverse_model_name="Meta/llama-2-7b-chat-hf",
        model_kwargs={
            "do_sample": True, 
            "max_tokens_to_generate": 256,
            "temperature": 0.01,
            "process_prompt": True,
            "select_expert": "llama-2-7b-chat-hf"
            #"stop_sequences": { "type":"str", "value":""},
            # "repetition_penalty": {"type": "float", "value": "1"},
            # "top_k": {"type": "int", "value": "50"},
            # "top_p": {"type": "float", "value": "1"}
        }
    ) 

In [7]:
llm.invoke("tell me a 50 word tale")

' Sure! Here is a 50-word tale:\n\nThe cat purred contentedly on my lap, pawing at my hand with a gentle mew.'

## Streaming

In [8]:
llm = SambaverseEndpoint(
        streaming=True,
        sambaverse_model_name="Meta/llama-2-7b-chat-hf",
        model_kwargs={
            "do_sample": True, 
            "max_tokens_to_generate": 256,
            "temperature": 0.01,
            "process_prompt": True,
            "select_expert": "llama-2-7b-chat-hf"
            #"stop_sequences": { "type":"str", "value":""},
            # "repetition_penalty": {"type": "float", "value": "1"},
            # "top_k": {"type": "int", "value": "50"},
            # "top_p": {"type": "float", "value": "1"}
        }
    ) 

In [9]:

for chunk in llm.stream("tell me a 50 word tale"):
    data = json.loads(json.loads(chunk)["data"])["stream_token"]
    print(data, end="", flush=True)

  Sure! Here is a 50-word tale:

The cat purred contentedly on my lap, pawing at my hand with a gentle mew.

# Embeddings

In [None]:
embedding = SambaNovaEmbeddingModel()
embedding.embed_documents(["tell me a 50 word tale","tell me a joke"])
embedding.embed_query("tell me a 50 word tale")

In [13]:
from langchain.vectorstores import Chroma
from langchain.schema import Document

docs=["tell me a 50 word tale","tell me a joke","when was America discoverd?", "how to build an engine?", "give me 3 party activities", "give me three healty dishes"]
docs=[Document(doc) for doc in docs]

query = "prompt for generating something fun"

vectordb = Chroma.from_documents(docs, embedding)
retriever = vectordb.as_retriever()

retriever.get_relevant_documents(query)


<bound method Response.json of <Response [200]>>


[Document(page_content='give me 3 party activities'),
 Document(page_content='tell me a 50 word tale'),
 Document(page_content='tell me a joke'),
 Document(page_content='give me three healty dishes')]