In [1]:
!pip install 'litellm'==1.44.9

Collecting litellm==1.44.9
  Downloading litellm-1.44.9-py3-none-any.whl.metadata (32 kB)
Collecting python-dotenv>=0.2.0 (from litellm==1.44.9)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting tiktoken>=0.7.0 (from litellm==1.44.9)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading litellm-1.44.9-py3-none-any.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m41.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-dotenv, tiktoken, litellm
Successfully installed litellm-1.44.9 python-dotenv-1.0.1 tiktoken-0.8.0


In [2]:
import os
from google.colab import userdata

os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')
os.environ["GEMINI_API_KEY"] = userdata.get('GOOGLE_API_KEY')
os.environ["COHERE_API_KEY"] = userdata.get('COHERE_API_KEY')
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')



In [3]:
from litellm import completion, acompletion
from pprint import pprint

messages = [
    {
        "role": "user", # system, assistant
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

response = completion(
        model="cohere/command-r-plus-08-2024",
        messages=messages,
        temperature=0.5,
        max_tokens=200
    )

In [4]:
response.model

'command-r-plus-08-2024'

In [5]:
pprint(response.choices[0].message.content)

('تبدو السماء زرقاء خلال النهار بسبب ظاهرة تسمى تشتت رايلي. عندما تصل أشعة '
 'الشمس إلى الغلاف الجوي للأرض، فإنها تصطدم بجزيئات الغلاف الجوي مثل '
 'النيتروجين والأكسجين. هذه الجزيئات أصغر بكثير من أطوال موجات الضوء المرئي، '
 'مما يؤدي إلى تشتت الضوء بجميع ألوان الطيف.\n'
 '\n'
 'يتم تشتيت الضوء الأزرق بشكل أكبر من الألوان الأخرى بسبب طوله الموجي الأقصر. '
 'عندما تنتشر جزيئات الغلاف الجوي الضوء الأزرق في جميع الاتجاهات، فإنه ينتشر '
 'عبر السماء، مما يجعلها تبدو زرقاء للعين المجردة.\n'
 '\n'
 'يتم تشتيت الألوان الأخرى أيضًا، ولكن بدرجة أقل من اللون الأزرق. على سبيل '
 'المثال، يتم تشتيت الضوء الأحمر')


### SDK Logging

In [6]:
import litellm
import os
import json

logs_dir = "./llm-logs"
os.makedirs(logs_dir, exist_ok=True)

def log_success(kwargs, completion_obj, start_time, end_time):
    with open(f"{logs_dir}/success-logs.jsonl", "a") as dest:
        dest.write(
            json.dumps({
                "kwargs": kwargs,
                "completion_obj": completion_obj,
                "start_time": start_time,
                "end_time": end_time,
            }, ensure_ascii=False, default=str ) + "\n"
        )

def log_failure(kwargs, completion_obj, start_time, end_time):
    with open(f"{logs_dir}/failure-logs.jsonl", "a") as dest:
        dest.write(
            json.dumps({
                "kwargs": kwargs,
                "completion_obj": completion_obj,
                "start_time": start_time,
                "end_time": end_time,
            }, ensure_ascii=False, default=str ) + "\n"
        )

litellm.success_callback = [log_success]
litellm.failure_callback = [log_failure]

In [7]:
from litellm import completion

messages = [
    {
        "role": "user", # system, assistant
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

response = completion(
            model="gemini/gemini-pro",
            messages=messages,
            temperature=0.5,
            max_tokens=200
        )

### Proxy Server

In [8]:
!pip install 'litellm[proxy]'==1.44.9

Collecting apscheduler<4.0.0,>=3.10.4 (from litellm[proxy]==1.44.9)
  Downloading APScheduler-3.11.0-py3-none-any.whl.metadata (6.4 kB)
Collecting backoff (from litellm[proxy]==1.44.9)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting cryptography<43.0.0,>=42.0.5 (from litellm[proxy]==1.44.9)
  Downloading cryptography-42.0.8-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (5.3 kB)
Collecting fastapi<0.112.0,>=0.111.0 (from litellm[proxy]==1.44.9)
  Downloading fastapi-0.111.1-py3-none-any.whl.metadata (26 kB)
Collecting fastapi-sso<0.11.0,>=0.10.0 (from litellm[proxy]==1.44.9)
  Downloading fastapi_sso-0.10.0-py3-none-any.whl.metadata (4.4 kB)
Collecting gunicorn<23.0.0,>=22.0.0 (from litellm[proxy]==1.44.9)
  Downloading gunicorn-22.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting pynacl<2.0.0,>=1.5.0 (from litellm[proxy]==1.44.9)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Collecting

In [9]:
# ============ check any litellm processes
# !pgrep -fl litellm

# ============ kill any litellm processes
# !pkill -f litellm

In [10]:
import os
from google.colab import userdata


os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

In [11]:
%%writefile llm.yaml
model_list:
  - model_name: "groq-gemma9b"
    litellm_params:
      model: "groq/gemma2-9b-it"
      api_key: "os.environ/GROQ_API_KEY"

  - model_name: "groq-mixtral"
    litellm_params:
      model: "groq/mixtral-8x7b-32768"
      api_key: "os.environ/GROQ_API_KEY"


Writing llm.yaml


In [12]:
!nohup litellm --port 4000 --config llm.yaml &
!sleep 10 && tail nohup.out

nohup: appending output to 'nohup.out'
INFO:     Started server process [907]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:4000 (Press CTRL+C to quit)


In [None]:
# import openai
# from pprint import pprint

# client = openai.OpenAI(
#     api_key="any key",
#     base_url="http://0.0.0.0:4000"
# )

In [13]:
messages = [
    {
        "role": "user", # system, assistant
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

# response = client.chat.completions.create(
#     model="groq-mixtral",
#     messages=messages,
# )


response = completion(
        model="groq/mixtral-8x7b-32768",
        messages=messages,
        temperature=0.5,
        max_tokens=200
    )

In [14]:
pprint(
    response.choices[0].message.content
)

('The reason why the sky appears blue during the day is due to a phenomenon '
 "called Rayleigh scattering. As sunlight reaches Earth's atmosphere, it is "
 'made up of different colors that are represented in the light spectrum. Each '
 'of these colors has a different wavelength, with red having the longest '
 'wavelength and violet having the shortest.\n'
 '\n'
 'When sunlight enters the atmosphere, it collides with molecules and '
 'particles present in the air, such as nitrogen and oxygen. These collisions '
 'cause the light to change direction, a process known as scattering. Shorter '
 'wavelengths of light, such as violet and blue, are scattered more than '
 'longer wavelengths, like red and yellow.\n'
 '\n'
 'However, despite violet light being scattered more than blue light, our eyes '
 'are more sensitive to blue light and less sensitive to violet light. '
 'Additionally, sunlight reaches us with less violet light to begin with due '
 'to the absorption and re-emission of li

In [15]:
messages = [
    {
        "role": "user", # system, assistant
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

# response = client.chat.completions.create(
#     model="groq-mixtral",
#     messages=messages,
# )

response = completion(
        model="groq/llama-3.3-70b-versatile",
        messages=messages,
        temperature=0.5,
        max_tokens=200
    )
pprint(
    response.choices[0].message.content
)

('السماء تبدو زرقاء في النهار بسبب ظاهرة تسمى تشتت الضوء. عندما يأتي ضوء الشمس '
 'إلى الأرض، يتكون من مجموعة من الألوان المختلفة، بما في ذلك الأحمر والبرتقالي '
 'والأصفر والأزرق. عندما يمر هذا الضوء خلال الغلاف الجوي للأرض، يصطدم '
 'بالجزيئات الصغيرة في الهواء، مثل جزيئات النيتروجين والاوكسجين. هذه الجزيئات '
 'الصغيرة تطرد الضوء الأزرق أكثر من الألوان الأخرى لأن طول موجته أقصر. هذا '
 'يعني أن الضوء الأزرق ينتشر في جميع الاتجاهات، مما يجعل السماء تبدو زرقاء '
 'عندما ننظر إليها.')


### Load Balancer

In [16]:
%%writefile llm-lb.yaml
model_list:
  - model_name: "global-llm"
    litellm_params:
      model: "groq/gemma2-9b-it"
      api_key: "os.environ/GROQ_API_KEY"
      rpm: 20

  - model_name: "global-llm"
    litellm_params:
      model: "groq/mixtral-8x7b-32768"
      api_key: "os.environ/GROQ_API_KEY"
      rpm: 20

routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy",]

Writing llm-lb.yaml


In [17]:
!nohup litellm --port 4000 --config llm-lb.yaml &
!sleep 10 && tail nohup.out

nohup: appending output to 'nohup.out'
INFO:     Started server process [907]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:4000 (Press CTRL+C to quit)
INFO:     Started server process [1234]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8828 (Press CTRL+C to quit)


In [18]:
# import openai
# from pprint import pprint

# client = openai.OpenAI(
#     api_key="any key",
#     base_url="http://0.0.0.0:4000"
# )

In [27]:
# messages = [
#     {
#         "role": "user", # system, assistant
#         "content": "لماذا تبدو السماء زرقاء بالنهار؟"
#     }
# ]

# response = client.chat.completions.create(
#     model="global-llm",
#     messages=messages,
# )

# Initialize llmlite client
client = litellm.LiteLLM()

messages = [
    {
        "role": "user",  # 'system' or 'assistant' roles can also be used
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

# Send request to the specified model
response = client.chat.completions.create(
    model="groq/llama-3.3-70b-versatile",  # Use the model name from the YAML file
    messages=messages
)




In [29]:
# Output the response
pprint(response["choices"][0]["message"]["content"])

('السبب الرئيسي لذلك هو ظاهرة gọi là "الانتشار الرايلي" (Rayleigh scattering). '
 'عندما يمر الضوء الشمس عبر الغلاف الجوي للأرض، فإنه يصطدم بالجزيئات الصغيرة '
 'مثل الغازات والجسيمات في الهواء. ونتيجة لذلك، يتم تشتيت الألوان المختلفة في '
 'اتجاهات مختلفة، وذلك بسبب Differences في uzunluk الموجات. \n'
 '\n'
 'ألون الضوء الأزرق هي الأقصر وأكثرها انتشارًا من بين الألوان الأخرى، لذلك فهي '
 'تتسرب في كل الاتجاهات، مما يجعل السماء تظهر باللون الأزرق. بينما الألوان '
 'الأخرى مثل الأحمر والأصفر، لها طول موجي أطول، لذلك لا تنتشر بشكل كبير، '
 'وتتحرك في خطوط مستقيمة حتى تصل إلى عيننا، مما يجعلها أقل وضوحًا في السماء.')


In [30]:
response.model

'groq/llama-3.3-70b-versatile'

### Fallbacks

In [None]:
import os
from google.colab import userdata


os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

In [None]:
%%writefile llm-fallback.yaml
router_settings:
  enable_pre_call_checks: true

model_list:
  - model_name: "groq-gemma9b"
    litellm_params:
      model: "groq/gemma2-9b-it"
      api_key: "os.environ/GROQ_API_KEY"

  - model_name: "groq-mixtral"
    litellm_params:
      model: "groq/mixtral-8x7b-32768"
      api_key: "os.environ/GROQ_API_KEY"



litellm_settings:
  num_retries: 3
  fallbacks: [{"openai-gpt4o-mini": "groq-mixtral"}]
  request_timeout: 10
  allowed_fails: 3 # per minute
  cooldown_time: 30

Writing llm-fallback.yaml


In [32]:
!nohup litellm --port 4000 --config llm-fallback.yaml &
!sleep 10 && tail nohup.out

nohup: appending output to 'nohup.out'


[32mLiteLLM: Proxy initialized with Config, Set models:[0m
[32m    groq-gemma9b[0m
[32m    groq-mixtral[0m
INFO:     127.0.0.1:50358 - "POST /chat/completions HTTP/1.1" 400 Bad Request
INFO:     Started server process [5433]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:32663 (Press CTRL+C to quit)


### Observation

In [31]:
import os
from google.colab import userdata

os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

os.environ["LANGFUSE_PUBLIC_KEY"] = userdata.get('LANGFUSE_PUBLIC_KEY')
os.environ["LANGFUSE_SECRET_KEY"] = userdata.get('LANGFUSE_SECRET_KEY')
os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com"

In [39]:
!pip install langfuse==2.52.2

Collecting langfuse==2.52.2
  Downloading langfuse-2.52.2-py3-none-any.whl.metadata (3.2 kB)
Collecting anyio<5.0.0,>=4.4.0 (from langfuse==2.52.2)
  Downloading anyio-4.7.0-py3-none-any.whl.metadata (4.7 kB)
Downloading langfuse-2.52.2-py3-none-any.whl (220 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/220.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.9/220.9 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading anyio-4.7.0-py3-none-any.whl (93 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/93.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.1/93.1 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anyio, langfuse
  Attempting uninstall: anyio
    Found existing installation: anyio 3.7.1
    Uninstalling anyio-3.7.1:
      Successfully uninstalled anyio-3.7.1
[31mERROR: pip's

In [41]:
%%writefile llm-lanfuse.yaml
model_list:
  - model_name: "groq-gemma9b"
    litellm_params:
      model: "groq/gemma2-9b-it"
      api_key: "os.environ/GROQ_API_KEY"

  - model_name: "groq-mixtral"
    litellm_params:
      model: "groq/mixtral-8x7b-32768"
      api_key: "os.environ/GROQ_API_KEY"

  - model_name: "openai-gpt4o-mini"
    litellm_params:
      model: "openai/gpt-4o-mini"
      api_key: "os.environ/OPENAI_API_KEY"

litellm_settings:
  drop_params: True
  success_callback: ["langfuse"]
  failure_callback: ["langfuse"]
  redact_user_api_key_info: true

Overwriting llm-lanfuse.yaml


In [64]:
!nohup litellm --port 4000 --config llm-lanfuse.yaml &
!sleep 10 && tail nohup.out

nohup: appending output to 'nohup.out'

[94m Initialized Success Callbacks - ['langfuse'] [0m
[94m Initialized Failure Callbacks - ['langfuse'] [0m
[32mLiteLLM: Proxy initialized with Config, Set models:[0m
[32m    groq-gemma9b[0m
[32m    groq-mixtral[0m
INFO:     Started server process [12596]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:4000 (Press CTRL+C to quit)


In [67]:
import openai
from pprint import pprint

client = openai.OpenAI(
    api_key="OPENAI_API_KEY",
    base_url="http://0.0.0.0:4000"
)

In [61]:
messages = [
    {
        "role": "user", # system, assistant
        "content": "لماذا تبدو السماء زرقاء بالنهار؟"
    }
]

response = client.chat.completions.create(
    model="openai-gpt4o-mini",
    messages=messages,
)

# import openai

# # Initialize the OpenAI client with the proxy server's base URL
# client =  openai.OpenAI(
#     api_key="os.environ/GROQ_API_KEY",  # The actual key is managed by the proxy
#     base_url="http://0.0.0.0:4000"  # URL where the litellm proxy is running
# )

# messages = [
#     {
#         "role": "user",
#         "content": "لماذا تبدو السماء زرقاء بالنهار؟"
#     }
# ]

# # Send the request to the proxy server
# response = client.chat.completions.create(
#     model="groq-gemma9b",
#     messages=messages
# )



In [67]:
pprint(
    response.choices[0].message.content
)

('يبدو السماء زرقاء خلال النهار بسبب ظاهرة تسمى **散射** الضوء (Rayleigh '
 'scattering).\n'
 '\n'
 'عندما يدخل الضوء الشمسي الغلاف الجوي للأرض، فإنه يتفاعل مع جزيئات الهواء مثل '
 'الأكسجين والهيدروجين. تنتشر هذه الجزيئات الضوء القصير الموجى (الأزرق '
 'والأرجواني) في جميع الاتجاهات بشكل أكبر من الضوء الطويل الموجى (الأحمر '
 'والبرتقالي). \n'
 '\n'
 'هذا يعني أنك ترى المزيد من الضوء الأزرق من خلال جميع جهات السماء، مما يجعلها '
 'تبدو زرقاء. \n'
 '\n'
 '**أسباب أخرى:**\n'
 '\n'
 '* **زاوية الشمس:** تظهر السماء زرقاء بشكل أكثر وضوحًا عندما تكون الشمس في '
 'السماء العليا. عندما تكون الشمس قريبة من الأفق، يمر الضوء عبر كمية أكبر من '
 'الغلاف الجوي، فيؤدي ذلك إلى تشتيت لون الأزرق بشكل أكبر ويظهر لون الخافت '
 'للأحمر والأصفر.\n'
 '* **الضباب:** يمكن أن تؤثر جزيئات الغبرة والضباب على مدى تشتيت الضوء، مما '
 'يؤدي إلى إتلاف اللون الأزرق للسماء.\n'
 '* **الطقس:** قد يغير السحب والقيمان لون السماء من الأزرق إلى الأبيض أو '
 'الداكن.\n'
 '\n'
 '\n'
 '\n'
 'أتمنى أن يكون هذا مفيدًا!\n'
 '\n')


### LiteLLm + LangChain

In [72]:
!pip install -qU langchain-openai langchain langchain_community transformers

In [74]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get('HUGGINGFACE_API_KEY')


In [None]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader([
                            "https://lilianweng.github.io/posts/2023-06-23-agent/",
                            "https://lilianweng.github.io/posts/2024-07-07-hallucination/",
                            "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/",
                        ])
docs = loader.load()

In [93]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.llms import HuggingFaceHub
from langchain.document_loaders import WebBaseLoader
from langchain.chains import LLMChain


llm = ChatOpenAI(
    openai_api_base="http://0.0.0.0:4000",
    model = "openai-gpt4o-mini",
    temperature=0.1
)
# llm = HuggingFaceHub(
#     repo_id="Qwen/QwQ-32B-Preview",  # Replace with the desired model ID
#     model_kwargs={"temperature": 0.1}
# )

In [98]:
map_prompt = ChatPromptTemplate.from_messages(
    [("system", "Write a concise summary of the following:\n\n{context}")]
)

map_chain = LLMChain(
    prompt=map_prompt,
    llm=llm,
    output_parser=StrOutputParser()
)

In [109]:
result = map_chain.invoke({"context": docs})

In [None]:
result

'The document titled "LLM Powered Autonomous Agents" by Lilian Weng discusses the concept of building autonomous agents using large language models (LLMs) as their core controllers. It highlights several proof-of-concept demonstrations, such as AutoGPT, GPT-Engineer, and BabyAGI, showcasing LLMs\' potential as general problem solvers beyond mere text generation. The document outlines the architecture of LLM-powered agents, which includes components for planning, memory, and tool use. \n\nKey components include:\n1. **Planning**: Agents break down tasks into manageable subgoals and reflect on past actions to improve future performance.\n2. **Memory**: Agents utilize short-term and long-term memory to retain and recall information, often leveraging external vector stores for efficient retrieval.\n3. **Tool Use**: Agents can call external APIs to access information and perform tasks beyond their pre-trained capabilities.\n\nThe document also addresses challenges faced by LLM-powered agent

In [94]:
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size=1000,  # Adjust based on your model's token limit
#     chunk_overlap=200  # Overlap to maintain context between chunks
# )
# doc_texts = [doc.page_content for doc in docs]
# chunks = text_splitter.split_text(" ".join(doc_texts))


In [108]:
# Invoke chain

# summaries = []
# for chunk in chunks:
#     result = map_chain.invoke({"context": chunk})
#     summaries.append(result)

# # Combine summaries into a final summary
# final_summary = " ".join(summaries)
# print(final_summary)

# print(result)

# from langchain.chains import MapReduceDocumentsChain
# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size=1000,  # Adjust based on your model's token limit
#     chunk_overlap=200  # Overlap to maintain context between chunks
# )
# doc_texts = [doc.page_content for doc in docs]
# chunks = text_splitter.split_text(" ".join(doc_texts))

# map_prompt = ChatPromptTemplate.from_messages(
#     [("system", "Write a concise summary of the following:\n\n{context}")]
# )
# map_reduce_chain = MapReduceDocumentsChain(
#     llm_chain=LLMChain(
#         prompt=map_prompt,
#         llm=llm,
#         output_parser=StrOutputParser()
#     )
# )

# map_prompt = ChatPromptTemplate.from_messages(
#     [("system", "Write a concise summary of the following:\\n\\n{context}")]
# )

# map_chain = map_prompt | llm | StrOutputParser()