# Inference through Together.ai

<img src="../asset/togetherai.png" width="400" />

### OpenAI compatibility

In [None]:
!pip install --upgrade openai

In [None]:
import os
import openai

TOGETHER_API_KEY = TOGETHER_API_KEY
BASE_URL = "https://api.together.xyz/v1"

client = openai.OpenAI(
    api_key=TOGETHER_API_KEY,
    base_url=BASE_URL,
)

In [None]:
import os
import openai
import time

start = time.time()

response = client.chat.completions.create(
  model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
  messages=[
    {"role": "system", "content": "You are a travel agent. Be descriptive and helpful."},
    {"role": "user", "content": "Tell me about San Francisco"},
  ]
)

print(f"Took {time.time()-start:.2f} secs")
print(response.choices[0].message.content)

In [None]:
import os
import openai
import time

start = time.time()

response = client.chat.completions.create(
  model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
  messages=[
    {"role": "system", "content": "You are a travel agent. Be descriptive and helpful."},
    {"role": "user", "content": "Tell me about San Francisco"},
  ]
)

print(f"Took {time.time()-start:.2f} secs")
print(response.choices[0].message.content)

In [None]:
import os
import openai
import time

start = time.time()

response = client.chat.completions.create(
  model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
  messages=[
    {"role": "system", "content": "You are a travel agent. Be descriptive and helpful."},
    {"role": "user", "content": "Tell me about San Francisco"},
  ]
)

print(f"Took {time.time()-start:.2f} secs")
print(response.choices[0].message.content)

### Together inference pipeline

In [None]:
!pip install --upgrade together

In [None]:
import os
from together import Together

TOGETHER_API_KEY = TOGETHER_API_KEY

client = Together(api_key=TOGETHER_API_KEY)

In [None]:
import os
from together import Together
import time

start = time.time()

response = client.chat.completions.create(
    model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
    messages=[{"role": "user", "content": "What are some fun things to do in New York?"}],
)

print(f"Took {time.time()-start:.2f} secs")
print(response.choices[0].message.content)

In [None]:
import os
from together import Together
import time

start = time.time()

response = client.chat.completions.create(
    model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
    messages=[
      {"role": "user", "content": "What are some fun things to do in New York?"},
      {"role": "assistant", "content": "You could go to the Empire State Building!"},
      {"role": "user", "content": "That sounds fun! Where is it?"},
    ],
)

print(f"Took {time.time()-start:.2f} secs")
print(response.choices[0].message.content)

In [None]:
# mistralai/Mixtral-8x7B-Instruct-v0.1, mistralai/Mistral-7B-Instruct-v0.1, togethercomputer/CodeLlama-34b-Instruct

import os
import json
import openai
import time

client = openai.OpenAI(
    base_url = "https://api.together.xyz/v1",
    api_key = TOGETHER_API_KEY,
)

tools = [
  {
    "type": "function",
    "function": {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA"
          },
          "unit": {
            "type": "string",
            "enum": [
              "celsius",
              "fahrenheit"
            ]
          }
        }
      }
    }
  }
]

messages = [
    {"role": "system", "content": "You are a helpful assistant that can access external functions. The responses from these function calls will be appended to this dialogue. Please provide responses based on the information from these function calls."},
    {"role": "user", "content": "What is the current temperature of New York, San Francisco and Chicago?"}
]

start = time.time()

response = client.chat.completions.create(
    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
    messages=messages,
    tools=tools,
    tool_choice="auto",
)

print(f"Took {time.time()-start:.2f} secs")
print(json.dumps(response.choices[0].message.model_dump()['tool_calls'], indent=2))