# This notebook showcases examples of how the API can be used
# OpenAI API REF: https://platform.openai.com/docs/api-reference/

##

# Setup - Env Parameter

In [2]:
#!pip install openai
import os
from openai import OpenAI
os.environ['OPENAI_BASE_URL'] = 'http://127.0.0.1:8000/v1'
os.environ['OPENAI_API_KEY'] = 'NA'
client = OpenAI()


# List all models

In [3]:
client.models.list().data

[Model(id='llama-3.1-8B', created=None, object='model', owned_by='remichu', created_by=1686935002),
 Model(id='mistral', created=None, object='model', owned_by='remichu', created_by=1686935002)]

# Basic Chat Completion

In [4]:
client = OpenAI()

completion = client.chat.completions.create(
  model="mistral",
  messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Write me a 10 lines poem"}
  ]
)

print(completion.choices[0].message)

ChatCompletionMessage(content="In the quiet of the night, stars twinkle bright,\nGuiding lost souls with their gentle light.\nWhispers of dreams dance in the moon's soft glow,\nA symphony of silence, where secrets grow.\n\nDaybreak brings a new canvas to unfold,\nStories untold, waiting to be told.\nLife's rhythm echoes in every beat,\nA melody that makes us feel complete.\n\nEmbrace each moment, let your spirit soar,\nFor life is but a fleeting, precious lore.", refusal=None, role='assistant', function_call=None, tool_calls=None, tool_call_id=None, name=None, artifact_meta=None)


# Basic Chat Completion - With Streaming

In [5]:
messages = [
    {"role": "user","content": """Write me a 10 lines poem"""}]

completion = client.chat.completions.create(
  model="mistral",
  messages=messages,
  stream=True,
  temperature=0.1,
)

for chunk in completion:
  #print(chunk)
  print(chunk.choices[0].delta.content, end='')

In the quiet of the night, stars twinkle bright,
Guiding dreams that dance in the soft moonlight.
Whispers of wind through leaves, a gentle sigh,
Nature's lullaby as day and night collide.

Life's tapestry woven with threads unseen,
A journey through time, a story yet to be gleaned.
Embrace each moment, let your spirit soar,
For life is but a fleeting, precious bore.

So here's to love, laughter, and dreams afloat,
To memories made and those yet to be wrote.
May you find joy in every sunlit day,
And peace within yourself come what may.

# Tool Usage

In [6]:
client = OpenAI()

tools = [
  {
    "type": "function",
    "function": {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA",
          },
          "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
        },
        "required": ["location"],
      },
    }
  }
]
messages = [{"role": "user", "content": "What's the weather like in Boston and Hanoi today?"}]
completion = client.chat.completions.create(
  model="gpt-4o",
  messages=messages,
  tools=tools,
  tool_choice="auto"
)

print(completion)


ChatCompletion(id='cmpl-ead7d6a7a85044668fcbb3538e9f452a', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_7571fc3272a14fecaaa1a8a4e75525dc', function=Function(arguments='{"location": "Boston", "unit": null}', name='get_current_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_a4b1c2806f6f43db929dcd01a5f05c70', function=Function(arguments='{"location": "Hanoi", "unit": null}', name='get_current_weather'), type='function', index=1)], tool_call_id=None, name=None, artifact_meta=None))], created=1723946888, model='llama-3.1-8B', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=72, prompt_tokens=896, total_tokens=968))


# Thinking Template (Gallama Only)

In [7]:
# user can define 

thinking_template ="""
<chain_of_thought>
  <problem>{problem_statement}</problem>
  <initial_state>{initial_state}</initial_state>
  <steps>
    <step>{action1}</step>
    <step>{action2}</step>
    <!-- Add more steps as needed -->
  </steps>
  <answer>Provide the answer</answer>
  <final_answer>Only the final answer, no need to provide the step by step problem solving</final_answer>
</chain_of_thought>
"""

In [8]:
messages = [
    {"role": "user","content": """
I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples and ate 1. How many apples did I remain with?
"""}                    
]


completion = client.chat.completions.create(
  model="mistral",
  messages=messages,
  temperature=0.1,
  max_tokens= 200,
  #tools=tools,
  #tool_choice="auto",
  # stream=True,
  extra_body={
    "thinking_template": thinking_template,
  },
)

print(completion.choices[0].message.content)

# for chunk in completion:
#     print(chunk)

The user has 10 APPLES LEFT.


## Mixture of Agents

In [9]:
messages = [{"role": "user", "content": "Cat or Dog is faster, in term of reaction speed?"}]

completion = client.chat.completions.create(
  #model="llama3.1-8B",
  model="llama-3.1-8B",
  messages=messages,
  extra_body={
    "mixture_of_agents": {
        "agent_list": ["mistral", "llama-3.1-8B"],
        "master_agent": "llama-3.1-8B",
    }
  },
)

print(completion.choices[0].message.content)

When it comes to reaction speed, cats generally outperform dogs. Several factors contribute to this difference:

1.  **Evolutionary advantage**: As solitary hunters, cats have developed rapid reflexes to catch prey, which has led to adaptations in their brain and nervous system.
2.  **Physical attributes**: Cats' flexible spines and muscles enable quicker changes in direction and acceleration, allowing them to react faster to unexpected stimuli.
3.  **Visual processing**: Studies suggest that cats possess higher visual acuity and can process visual information more quickly than dogs, enabling them to detect movement and react faster.

Research indicates that cats can respond to sounds and movements approximately 50% faster than dogs. For instance, one study discovered that cats reacted to a sudden noise within 100-150 milliseconds, whereas dogs required around 200-250 milliseconds to respond.

While dogs excel in scent detection and obedience training, their reaction time may not match

### Mixture of Agents with tools usage

In [10]:
tools = [
  {
    "type": "function",
    "function": {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA",
          },
          "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
        },
        "required": ["location"],
      },
    }
  }
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]

completion = client.chat.completions.create(
  model="llama-3.1-8B",
  messages=messages,
  tools=tools,
  tool_choice="auto",
  extra_body={
    "mixture_of_agents": {
        "agent_list": ["mistral", "llama-3.1-8B"],
        "master_agent": "llama-3.1-8B",
    }
  },
)

print(completion.choices[0].message.tool_calls[0].function)
# Function(arguments='{"location": "Boston"}', name='get_current_weather')

Function(arguments='{"location": "Boston"}', name='get_current_weather')


In [11]:
messages = [
    {"role": "user","content": """tell me a 200 words story on llama"""}]

completion = client.chat.completions.create(
  model="mistral",
  messages=messages,
  #tools=tools,
  #tool_choice="auto",
  stream=True,
  temperature=0.1,
)

# print(completion.choices[0].message.content)
for chunk in completion:
  #print(chunk)
  print(chunk.choices[0].delta.content, end='')


In the heart of the Andes, where the sun painted vibrant hues on the mountains, lived a young llama named Luna. She was not like the others; her coat was a unique blend of silver and gold, shimmering under the sunlight. Luna was known for her adventurous spirit and her love for exploring the vast landscapes of her home.

One day, Lama Alpaca, the wise elder of their herd, shared a tale about a hidden valley filled with lush grass and crystal-clear waterfalls. Intrigued by this story, Luna decided to embark on a journey to find this mystical place. With determination in her eyes and a small pack of provisions on her back, she set off towards the unknown.

The journey was long and challenging, but Luna's spirit remained unbroken. She navigated through treacherous terrains, crossed swift rivers, and climbed steep hills. After several days, she finally reached the valley Alpaca had spoken of. It was more beautiful than any story could ever tell. The grass was so green it seemed to glow, a

# Completion endpoint

In [17]:
prompt = '''
 <s>[INST]Tell a joke about llama [/INST]                                         
'''

## With Streaming

In [19]:
for chunk in client.completions.create(
  model="mistral",
  prompt=prompt,
  max_tokens=100,
  temperature=0,
  stream=True
):
  print(chunk.choices[0].text)

Why don
't llamas make
 good secret keepers?


Because they always
 spit it out!


## No Streaming

In [21]:
completion = client.completions.create(
    model="mistral",
    prompt=prompt,
    max_tokens=100,
    temperature=0
)
print(completion.choices[0].text)


Why don't llamas make good secret keepers?

Because they always spit it out!


## Streaming with requests instead of OpenAI

In [23]:
import requests

url = "http://127.0.0.1:8000/v1/chat/generate"
headers = {
    "Content-Type": "application/json",
    "Accept": "text/event-stream"
}
data = {
    "model": "mistral",
    "prompt": prompt,
    "max_tokens": 1000,
    "temperature": 0,
    "stream": True
}

response = requests.post(url, json=data, headers=headers, stream=True)

for line in response.iter_lines():
    if line:
        print(f"Raw data: {line.decode('utf-8')}")
        # Here you would normally parse the JSON, but for now, let's just print the raw data

Raw data: data: {"id": "cmpl-7f280d3dc4aa41a58bbbde126378e3d8", "object": "text_completion", "created": 1723947030, "model": "mistral", "system_fingerprint": "fp_44709d6fcb", "choices": [{"text": "Why don", "index": 0, "logprobs": null, "finish_reason": null}]}
Raw data: data: {"id": "cmpl-7f280d3dc4aa41a58bbbde126378e3d8", "object": "text_completion", "created": 1723947030, "model": "mistral", "system_fingerprint": "fp_44709d6fcb", "choices": [{"text": "'t llamas make", "index": 0, "logprobs": null, "finish_reason": null}]}
Raw data: data: {"id": "cmpl-7f280d3dc4aa41a58bbbde126378e3d8", "object": "text_completion", "created": 1723947030, "model": "mistral", "system_fingerprint": "fp_44709d6fcb", "choices": [{"text": " good secret keepers?", "index": 0, "logprobs": null, "finish_reason": null}]}
Raw data: data: {"id": "cmpl-7f280d3dc4aa41a58bbbde126378e3d8", "object": "text_completion", "created": 1723947030, "model": "mistral", "system_fingerprint": "fp_44709d6fcb", "choices": [{"text

# Embedding

In [5]:
from openai import OpenAI
client = OpenAI(base_url="http://127.0.0.1:8000/v1")

response = client.embeddings.create(
    input=["Your text string goes here", "test test"],
    model="multilingual-e5-large-instruct"
    #model="gte-large-en-v1.5"
)

print(response.data[0].embedding)

[0.018757905811071396, 3.4400782169541344e-05, -0.018391599878668785, -0.03940839320421219, 0.03602006286382675, -0.04148412495851517, -0.005986809264868498, 0.05485428497195244, 0.04642925411462784, -0.026450326666235924, 0.02565666474401951, 0.0505196675658226, -0.030540740117430687, -0.018025295808911324, -0.02413039095699787, -0.009653682820498943, -0.03244858235120773, 0.026801370084285736, -0.02996075712144375, -0.01118758786469698, 0.04499455541372299, -0.005719711538404226, -0.0009348427993245423, -0.03614216670393944, -0.03718003258109093, -0.016025876626372337, -0.02181045338511467, -0.03455484285950661, -0.04856603592634201, -0.046856608241796494, 0.009760521352291107, 0.05378589406609535, -0.03968312218785286, -0.04981758072972298, -0.02414565347135067, 0.04981758072972298, 0.0173079464584589, 0.044322993606328964, -0.02976234070956707, 0.048535510897636414, -0.021535724401474, 0.008829494938254356, 0.004204884637147188, -0.006883495487272739, -0.014423288404941559, 0.01782

# Mixture of Agents

In [25]:
messages = [{"role": "user", "content": "Cat or Dog is faster, in term of reaction speed?"}]

completion = client.chat.completions.create(
  #model="llama3.1-8B",
  model="llama-3.1-8B",
  messages=messages,
  extra_body={
    "mixture_of_agents": {
        "agent_list": ["mistral", "llama-3.1-8B"],
        "master_agent": "llama-3.1-8B",
    }
  },
)

print(completion.choices[0].message.content)

**Reaction Speed: Cat vs Dog**

When it comes to reaction speed, cats generally outperform dogs. This is due to their evolutionary advantages, physical characteristics, and neurological adaptations.

**Key Factors Contributing to Faster Reaction Speed in Cats:**

1.  **Evolutionary Advantage:** As solitary hunters, cats rely on stealth and quick reflexes to catch prey. Their brain structure and nervous system have adapted to respond rapidly to potential threats.
2.  **Flexibility and Agility:** Cats' flexible spines and muscles enable them to twist and turn quickly, making sharp turns and rapid changes in direction possible.
3.  **Eye Movement:** Cats have a higher frequency of eye movements (around 100-150 times per minute), allowing them to track fast-moving objects with greater accuracy.

**Studies Supporting Faster Reaction Times in Cats:**

*   A study published in "Animal Behaviour" found that domestic cats responded significantly faster than dogs in a stimulus-response task, wit

# Mixture of Agents work with streaming

In [27]:
messages = [
    {"role": "user","content": """
I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples and ate 1. How many apples did I remain with?
"""}                    
]


completion = client.chat.completions.create(
  model="mistral",
  messages=messages,
  temperature=0.1,
  max_tokens= 200,
  stream=True,
  extra_body={
    "mixture_of_agents": {
        "agent_list": ["mistral", "llama-3.1-8B"],
        "master_agent": "llama-3.1-8B",
    }
  },
)

# print(completion.choices[0].message.content)
#print(completion)
for chunk in completion:
  #print(chunk)
  print(chunk.choices[0].delta.content, end='')

Initially, you had 10 apples. After giving away 2 to the neighbor and 2 to the repairman, you were left with 6 apples. Subsequently, you purchased another 5 apples, increasing your total to 11 apples. Following this, you consumed one apple, leaving you with a remaining count of 10 - 1 = \boxed{9} apples.

# Customize Thinking with Mixture of Agents

In [28]:
thinking_cat = "<cat_thinking>What reason could cat be faster?</cat_thinking>"
thinking_dog = "<dog_thinking>What reason could dog be faster?</dog_thinking>"

# thinking to influence mixture of agents consolidated answer 
thinking_moa = """
<synthesis_guide>
  <context>Synthesize information from hidden references into a standalone answer.</context>
  <question>{question}</question>
  <analysis>
    <source_evaluation>
      <key_points>Core ideas</key_points>
      <relevance>Question relevance</relevance>
      <credibility>Source reliability</credibility>
    </source_evaluation>
    <information_synthesis>
      <consensus>Consistent information</consensus>
      <conflicts>Conflicting points</conflicts>
      <evidence_quality>Supporting data quality</evidence_quality>
    </information_synthesis>
  </analysis>
  <strategy>
    <key_considerations>Important points to include</key_considerations>
    <bias_awareness>Potential source biases</bias_awareness>
    <information_gaps>Incomplete areas</information_gaps>
  </strategy>
  <response>Integrate insights without referencing sources, presenting as a fresh, informed answer.</response>
</synthesis_guide>
"""

In [30]:
messages = [
    {"role": "user","content": """
Is cat or dog faster and why?
"""}                    
]


completion = client.chat.completions.create(
  model="mistral",
  messages=messages,
  temperature=0.1,
  max_tokens= 200,
  stream=True,
  extra_body={
    "thinking_template": thinking_moa,
    "mixture_of_agents": {
        "agent_list": [
            {
                "model": "mistral",
                "thinking_template": thinking_cat,
             },
            {
                "model": "llama-3.1-8B",
                "thinking_template": thinking_dog,
            }
        ]
    }
  },
)

for chunk in completion:
  print(chunk.choices[0].delta.content, end='')

Dogs are generally faster than cats over long distances due to their higher speed capabilities. Some cats may have the potential to be faster due to their unique physical attributes such as lighter weight, shorter legs with longer strides, allowing them to cover more ground with each step. However, cats tend to conserve energy by walking or sprinting rather than maintaining a steady pace over long distances.

In [31]:
completion = client.chat.completions.create(
    model="mistral",
    messages=[{"role": "user", "content": "Is smoking bad for health? Start your answer with 'Yes. Because' or 'No. Because'"}],
    temperature=0.1,
    max_tokens=200,
    extra_body={
        # "leading_prompt": leading_prompt,         # prefix the generation with some string
        # "regex_pattern": regex_pattern,           # define the regex for the whole generation
        #"regex_prefix_pattern": '(Yes|No)\.',     # define the regex to match the starting words
        "stop_words": [","],                       # define the word to stop generation, currently the stop_words is included in generation result
    },
)
print(completion.choices[0].message.content)

Yes. Because smoking causes numerous health problems,
