In [None]:
!pip install 'litellm'==1.44.9

In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('openai-colab')
os.environ["COHERE_API_KEY"] = userdata.get('cohere')

In [None]:
from litellm import completion, acompletion
from pprint import pprint

messages = [
    {
        "role": "user", # system, assistant
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

response = completion(
        model="cohere/command-r-plus-08-2024",
        messages=messages,
        temperature=0.5,
        max_tokens=200
    )

In [None]:
response.model

'command-r-plus-08-2024'

In [None]:
pprint(response.choices[0].message.content)

### SDK Logging

In [None]:
import litellm
import os
import json

logs_dir = "./llm-logs"
os.makedirs(logs_dir, exist_ok=True)

def log_success(kwargs, completion_obj, start_time, end_time):
    with open(f"{logs_dir}/success-logs.jsonl", "a") as dest:
        dest.write(
            json.dumps({
                "kwargs": kwargs,
                "completion_obj": completion_obj,
                "start_time": start_time,
                "end_time": end_time,
            }, ensure_ascii=False, default=str ) + "\n"
        )

def log_failure(kwargs, completion_obj, start_time, end_time):
    with open(f"{logs_dir}/failure-logs.jsonl", "a") as dest:
        dest.write(
            json.dumps({
                "kwargs": kwargs,
                "completion_obj": completion_obj,
                "start_time": start_time,
                "end_time": end_time,
            }, ensure_ascii=False, default=str ) + "\n"
        )

litellm.success_callback = [log_success]
litellm.failure_callback = [log_failure]

In [None]:
from litellm import completion

messages = [
    {
        "role": "user", # system, assistant
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

response = completion(
            model="openai/gpt-4o-mini",
            messages=messages,
            temperature=0.5,
            max_tokens=200
        )

### Proxy Server

In [None]:
!pip install 'litellm[proxy]'==1.44.9 openai==1.42.0

In [None]:
# ============ check any litellm processes
# !pgrep -fl litellm

# ============ kill any litellm processes
# !pkill -f litellm

In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('openai-colab')
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

In [None]:
%%writefile llm.yaml
model_list:
  - model_name: "groq-gemma9b"
    litellm_params:
      model: "groq/gemma2-9b-it"
      api_key: "os.environ/GROQ_API_KEY"

  - model_name: "groq-mixtral"
    litellm_params:
      model: "groq/mixtral-8x7b-32768"
      api_key: "os.environ/GROQ_API_KEY"

  - model_name: "openai-gpt4o-mini"
    litellm_params:
      model: "openai/gpt-4o-mini"
      api_key: "os.environ/OPENAI_API_KEY"

Overwriting llm.yaml


In [None]:
!nohup litellm --port 4000 --config llm.yaml &
!sleep 10 && tail nohup.out

In [None]:
import openai
from pprint import pprint

client = openai.OpenAI(
    api_key="any key",
    base_url="http://0.0.0.0:4000"
)

In [None]:
messages = [
    {
        "role": "user", # system, assistant
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

response = client.chat.completions.create(
    model="groq-mixtral",
    messages=messages,
)

In [None]:
pprint(
    response.choices[0].message.content
)

### Load Balancer

In [None]:
%%writefile llm-lb.yaml
model_list:
  - model_name: "global-llm"
    litellm_params:
      model: "groq/gemma2-9b-it"
      api_key: "os.environ/GROQ_API_KEY"
      rpm: 20

  - model_name: "global-llm"
    litellm_params:
      model: "groq/mixtral-8x7b-32768"
      api_key: "os.environ/GROQ_API_KEY"
      rpm: 20

  - model_name: "global-llm"
    litellm_params:
      model: "openai/gpt-4o-mini"
      api_key: "os.environ/OPENAI_API_KEY"
      rpm: 10

routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy",]

Writing llm-lb.yaml


In [None]:
!nohup litellm --port 4000 --config llm-lb.yaml &
!sleep 10 && tail nohup.out

In [None]:
import openai
from pprint import pprint

client = openai.OpenAI(
    api_key="any key",
    base_url="http://0.0.0.0:4000"
)

In [None]:
messages = [
    {
        "role": "user", # system, assistant
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

response = client.chat.completions.create(
    model="global-llm",
    messages=messages,
)

In [None]:
response.model

'groq/mixtral-8x7b-32768'

### Fallbacks

In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('openai-colab')
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

In [None]:
%%writefile llm-fallback.yaml
router_settings:
  enable_pre_call_checks: true

model_list:
  - model_name: "groq-gemma9b"
    litellm_params:
      model: "groq/gemma2-9b-it"
      api_key: "os.environ/GROQ_API_KEY"

  - model_name: "groq-mixtral"
    litellm_params:
      model: "groq/mixtral-8x7b-32768"
      api_key: "os.environ/GROQ_API_KEY"

  - model_name: "openai-gpt4o-mini"
    litellm_params:
      model: "openai/gpt-4o-mini"
      api_key: "os.environ/OPENAI_API_KEY"
      rpm: 20

litellm_settings:
  num_retries: 3
  fallbacks: [{"openai-gpt4o-mini": "groq-mixtral"}]
  request_timeout: 10
  allowed_fails: 3 # per minute
  cooldown_time: 30

Writing llm-fallback.yaml


In [None]:
!nohup litellm --port 4000 --config llm-fallback.yaml &
!sleep 10 && tail nohup.out

### Observation

In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('openai-colab')
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-27f7fa53-b370-46d2-82f0-6f32851dfc92"
os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-c3571355-5d0c-48bb-ac92-c3dfaecea1c2"
os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com"

In [None]:
userdata.get('LANGFUSE_SECRET_KEY_X')

'pk-lf-27f7fa53-b370-46d2-82f0-6f32851dfc92'

In [None]:
!pip install langfuse==2.52.2

In [None]:
%%writefile llm-lanfuse.yaml
model_list:
  - model_name: "groq-gemma9b"
    litellm_params:
      model: "groq/gemma2-9b-it"
      api_key: "os.environ/GROQ_API_KEY"

  - model_name: "groq-mixtral"
    litellm_params:
      model: "groq/mixtral-8x7b-32768"
      api_key: "os.environ/GROQ_API_KEY"

  - model_name: "openai-gpt4o-mini"
    litellm_params:
      model: "openai/gpt-4o-mini"
      api_key: "os.environ/OPENAI_API_KEY"

litellm_settings:
  drop_params: True
  success_callback: ["langfuse"]
  failure_callback: ["langfuse"]
  redact_user_api_key_info: true

Overwriting llm-lanfuse.yaml


In [None]:
!nohup litellm --port 4000 --config llm-lanfuse.yaml &
!sleep 10 && tail nohup.out

In [None]:
import openai
from pprint import pprint

client = openai.OpenAI(
    api_key="any key",
    base_url="http://0.0.0.0:4000"
)

In [None]:
messages = [
    {
        "role": "user", # system, assistant
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

response = client.chat.completions.create(
    model="openai-gpt4o-mini",
    messages=messages,
)

In [None]:
pprint(
    response.choices[0].message.content
)

### LiteLLm + LangChain

In [None]:
!pip install -qU langchain-openai langchain langchain_community

In [None]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader([
                            "https://lilianweng.github.io/posts/2023-06-23-agent/",
                            "https://lilianweng.github.io/posts/2024-07-07-hallucination/",
                            "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/",
                        ])
docs = loader.load()

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

llm = ChatOpenAI(
    openai_api_base="http://0.0.0.0:4000",
    model = "openai-gpt4o-mini",
    temperature=0.1
)

In [None]:
map_prompt = ChatPromptTemplate.from_messages(
    [("system", "Write a concise summary of the following:\\n\\n{context}")]
)

map_chain = map_prompt | llm | StrOutputParser()

In [None]:
# Invoke chain
result = map_chain.invoke({"context": docs})

In [None]:
result

'The document titled "LLM Powered Autonomous Agents" by Lilian Weng discusses the concept of building autonomous agents using large language models (LLMs) as their core controllers. It highlights several proof-of-concept demonstrations, such as AutoGPT, GPT-Engineer, and BabyAGI, showcasing LLMs\' potential as general problem solvers beyond mere text generation. The document outlines the architecture of LLM-powered agents, which includes components for planning, memory, and tool use. \n\nKey components include:\n1. **Planning**: Agents break down tasks into manageable subgoals and reflect on past actions to improve future performance.\n2. **Memory**: Agents utilize short-term and long-term memory to retain and recall information, often leveraging external vector stores for efficient retrieval.\n3. **Tool Use**: Agents can call external APIs to access information and perform tasks beyond their pre-trained capabilities.\n\nThe document also addresses challenges faced by LLM-powered agent