In [1]:
# Libraries, Variables and Constants
import autogen, os
import json
from dotenv import load_dotenv

# Accepted file formats for that can be stored in a vector database instance
from autogen.retrieve_utils import TEXT_FORMATS # requires pip install markdownify

load_dotenv("./../credentials_my.env")

cache_seed = 41 # default seed is 41

print (f"SUPPORTED TEXT_FORMATS: {TEXT_FORMATS}")

SUPPORTED TEXT_FORMATS: ['txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml', 'pdf']


In [2]:
# Setting configurations for autogen
config_list = autogen.config_list_from_json(
    env_or_file='models_list.json',
    filter_dict={ "model": {"gpt4-0125preview-128k"} }
)

llm_config={
        "cache_seed": cache_seed,  # seed for caching and reproducibility
        "config_list": config_list,  # a list of OpenAI API configurations
        "temperature": 0  # temperature for sampling        
    }

llm_config

{'cache_seed': 41,
 'config_list': [{'model': 'gpt4-0125preview-128k',
   'api_key': '5948e5b2b4a146cba9940adb3308d731',
   'base_url': 'https://mmopenaiscus.openai.azure.com/',
   'api_type': 'azure',
   'api_version': '2024-02-15-preview'}],
 'temperature': 0}

In [3]:
# import agents
# you need to run pip install "pyautogen[retrievechat]" before running this cell, because retrievechat is
# an optional feature of the pyautogen package that requires additional dependencies not included in the base package

from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent # requires pypdf

In [4]:
# Create an 'RetrieveAssistantAgent' instance

assistant = RetrieveAssistantAgent(
    name="assistant",
    system_message="You are a helpful assistant.",
    llm_config=llm_config
)

assistant

<autogen.agentchat.contrib.retrieve_assistant_agent.RetrieveAssistantAgent at 0x7fd3df65bb10>

# Create "ragproxyagent"
By default, the human_input_mode is "ALWAYS", which means the agent will ask for human input at every step. We set it to "NEVER" here.
`docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,
it is set to None, which works only if the collection is already created.
`task` indicates the kind of task we're working on. In this example, it's a `qa` task.
`chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.
`custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.
This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types.
In this example, we set it to ["mdx"] to only process markdown files. Since no mdx files are included in the `websit/docs`,
no files there will be processed. However, the explicitly included urls will still be processed.

In [5]:
import chromadb
ragproxyagent = RetrieveUserProxyAgent(
    name="ragproxyagent",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=3,
    retrieve_config={
        "task": "qa",
        "docs_path": [
            "https://raw.githubusercontent.com/microsoft/autogen/main/README.md",
            "https://drlee.io/harnessing-the-power-of-autogen-and-openai-gpts-for-advanced-code-interpretation-and-development-571ddb6f814c",
            "https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md",
            # "https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md",
            # os.path.join(os.path.abspath(""), "..", "website", "docs"),
        ],
        "custom_text_types": TEXT_FORMATS, # ["mdx"],
        "chunk_token_size": 2000,
        "model": "gpt-4", # https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
        "client": chromadb.PersistentClient(path="/tmp/chromadb"),
        "collection_name": "mauromi_collection_042",
        "embedding_model": "all-mpnet-base-v2",
        "get_or_create": False,  # set to False if you don't want to reuse an existing collection, but you'll need to remove the collection manually
    },
    code_execution_config=False,  # set to False if you don't want to execute the code
)

ragproxyagent

<autogen.agentchat.contrib.retrieve_user_proxy_agent.RetrieveUserProxyAgent at 0x7fd3df659710>

# Example 1
**Note**: if `sentence_transformers` python package is not installed. Please install it with `pip install sentence_transformers`

In [6]:
# reset the assistant. Always reset the assistant before starting a new conversation.
assistant.reset()

# given a problem, we use the ragproxyagent to generate a prompt to be sent to the assistant as the initial message.
# the assistant receives the message and generates a response. The response will be sent back to the ragproxyagent for processing.
# The conversation continues until the termination condition is met, in RetrieveChat, the termination condition when no human-in-loop is no code block detected.
# With human-in-loop, the conversation will continue until the user says "exit".
question = "What is autogen?" # "what are the limitations?" #  "whom is autogen particularly useful for?"
answer = ragproxyagent.initiate_chat(
    assistant, 
    message=ragproxyagent.message_generator, 
    problem=question,
    search_string="autogen" # used as an extra filter for the embeddings search: in this case, we only want to search documents that contain "autogen"
)  

Trying to create collection.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Number of requested results 20 is greater than number of elements in index 6, updating n_results = 6


doc_ids:  [['doc_3', 'doc_0', 'doc_4', 'doc_1', 'doc_2']]
[32mAdding doc_id doc_3 to context.[0m
[32mAdding doc_id doc_0 to context.[0m
[32mAdding doc_id doc_4 to context.[0m
[32mAdding doc_id doc_1 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
context provided by the user.
If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
You must give as short an answer as possible.

User's question is: What is autogen?

Context is: # Harnessing the Power of AutoGen and OpenAI GPTs for Advanced Code Interpretation and Development | by Dr. Ernesto Lee | Medium

Harnessing the Power of AutoGen and OpenAI GPTs for Advanced Code Interpretation and Development | by Dr. Ernesto Lee | Medium[Open in app](https://rsci.app.link/?%24canonical_url=https%3A%2F%2Fmedium.com%2Fp%2F571ddb6f814c&%7Efeature=LoOpenInAppButton&%7Echannel=Sho

### assistant (to ragproxyagent):

AutoGen is a framework for developing LLM applications using multi-agent conversations, enabling complex task solving with minimal effort.

## Ask the same question using normal UserProxyAgent / AssistantAgent

In [7]:
import autogen

user_proxy = autogen.UserProxyAgent(
    name                       = "user_proxy",
    human_input_mode           = "NEVER", # NEVER / ALWAYS / TERMINATE
    max_consecutive_auto_reply = 1,

    # if the x["content"] ends by "TERMINATE", is_termination_msg-->True; otherwise, is_termination_msg--> False
    is_termination_msg         = lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
    
    code_execution_config = {
        "work_dir": "coding",
        
        # Using docker is safer than running the generated code directly.
        # set use_docker=True if docker is available to run the generated code. 
        "use_docker": False
    },
    
    llm_config=llm_config
)

chat_res = user_proxy.initiate_chat(
    assistant,
    message = "What is Autogen?",
    summary_method = "reflection_with_llm",
)

[33muser_proxy[0m (to assistant):

What is Autogen?

--------------------------------------------------------------------------------
[33massistant[0m (to user_proxy):

"Autogen" can refer to different concepts depending on the context in which it's used. Here are a few possibilities:

1. **Software Development**: In the context of programming and software development, "Autogen" often refers to tools or scripts that automatically generate code or documentation. These tools can help in creating boilerplate code, APIs, or even documentation from a set of specifications or templates. This automation can significantly speed up development processes and ensure consistency across large projects.

2. **Medical or Biological Context**: The term could also be used in a medical or biological context, referring to autogenous or autogenic training or therapy. This is a technique that teaches your body to respond to your verbal commands, helping to control bodily functions that are normally con

KeyboardInterrupt: 

### assistant (to user_proxy):

"Autogen" can refer to different concepts depending on the context in which it's used. Here are a few possibilities:

1. **Software Development**: In the context of programming and software development, "Autogen" often refers to tools or scripts that automatically generate code or documentation. These tools can help in creating boilerplate code, APIs, client libraries, or even documentation from a predefined template or source code annotations. This automation can significantly speed up development processes and ensure consistency across large projects.

2. **Medical or Biological Term**: The term "autogenic" or "autogenesis" can refer to processes or responses that originate from within an organism, cell, or system. For example, autogenic training is a relaxation technique developed by German psychiatrist Johannes Heinrich Schultz, which involves self-induced suggestive relaxation and stress reduction.

3. **Specific Software or Libraries**: There are specific tools or libraries named "Autogen" designed for various purposes, such as generating code from other specifications. For example, in the context of GNU/Linux systems, there is a tool called "AutoGen" that is used to simplify the creation and maintenance of programs that contain large amounts of repetitive text.

Without more context, it's challenging to provide a precise definition of "Autogen." If you have a specific context in mind, please provide more details for a more accurate explanation.

## Customize Embedding Function with Azure text-embedding-3-large

In [9]:
from chromadb.utils import embedding_functions

azure_openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    model_name=os.environ["EMBEDDING_ADA_003_LARGE"],
    api_base=os.environ["AZURE_OPENAI_ENDPOINT_CA"],
    api_key=os.environ["AZURE_OPENAI_API_KEY_CA"],
    api_type=os.environ["OPENAI_API_TYPE"],
    api_version=os.environ["OPENAI_API_VERSION"]
)

In [10]:
import chromadb
ragproxyagent = RetrieveUserProxyAgent(
    name="ragproxyagent",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=3,
    retrieve_config={
        "task": "qa",
        "docs_path": [
            "https://raw.githubusercontent.com/microsoft/autogen/main/README.md",
            "https://drlee.io/harnessing-the-power-of-autogen-and-openai-gpts-for-advanced-code-interpretation-and-development-571ddb6f814c",
            "https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md",
            # "https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md",
            # os.path.join(os.path.abspath(""), "..", "website", "docs"),
        ],
        "custom_text_types": TEXT_FORMATS, # ["mdx"],
        "chunk_token_size": 2000,
        "model": "gpt-4", # https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
        "client": chromadb.PersistentClient(path="/tmp/chromadb"),
        "collection_name": "mauromi_collection_043",
        "embedding_function": azure_openai_ef,
        "get_or_create": False,  # set to False if you don't want to reuse an existing collection, but you'll need to remove the collection manually
    },
    code_execution_config=False,  # set to False if you don't want to execute the code
)

ragproxyagent

<autogen.agentchat.contrib.retrieve_user_proxy_agent.RetrieveUserProxyAgent at 0x7fd2f2b7f690>

In [11]:
question = "what are the enhanced LLM inference features?"
answer = ragproxyagent.initiate_chat(
    assistant, message=ragproxyagent.message_generator,
    problem=question,
    search_string="autogen" # used as an extra filter for the embeddings search: in this case, we only want to search documents that contain "autogen"
)

Trying to create collection.


Number of requested results 20 is greater than number of elements in index 6, updating n_results = 6


doc_ids:  [['doc_1', 'doc_3', 'doc_4', 'doc_0', 'doc_2']]
[32mAdding doc_id doc_1 to context.[0m
[32mAdding doc_id doc_3 to context.[0m
[32mAdding doc_id doc_4 to context.[0m
[32mAdding doc_id doc_0 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
context provided by the user.
If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
You must give as short an answer as possible.

User's question is: what are the enhanced LLM inference features?

Context is: 
<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
  <a href="#readme-top" style="text-decoration: none; color: blue; font-weight: bold;">
    ↑ Back to Top ↑
  </a>
</p>

## [Installation](https://microsoft.github.io/autogen/docs/Installation)
### Option 1. Install and Run AutoGen in Docker

Find detailed instructions for users [here](https:

### assistant (to ragproxyagent)

Caching, error handling, multi-config inference, templating.

In [12]:
question = "What Python versions are supported by Autogen?"
answer = ragproxyagent.initiate_chat(
    assistant, message=ragproxyagent.message_generator,
    problem=question,
    search_string="autogen"
)

Number of requested results 20 is greater than number of elements in index 6, updating n_results = 6


doc_ids:  [['doc_0', 'doc_1', 'doc_3', 'doc_4', 'doc_2']]
[32mAdding doc_id doc_0 to context.[0m
[32mAdding doc_id doc_1 to context.[0m
[32mAdding doc_id doc_3 to context.[0m
[32mAdding doc_id doc_4 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
context provided by the user.
If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
You must give as short an answer as possible.

User's question is: What Python versions are supported by Autogen?

Context is: <a name="readme-top"></a>

[![PyPI version](https://badge.fury.io/py/pyautogen.svg)](https://badge.fury.io/py/pyautogen)
[![Build](https://github.com/microsoft/autogen/actions/workflows/python-package.yml/badge.svg)](https://github.com/microsoft/autogen/actions/workflows/python-package.yml)
![Python Version](https://img.shields.io/badge/3.8%20%7C%203.9%20%7C%203.10

### assistant (to ragproxyagent):

3.8 | 3.9 | 3.10 | 3.11 | 3.12

# Check the cache DB with SQL Lite

In [13]:
# read the content of the cache

import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect(f'./.cache/{cache_seed}/cache.db')

# Create a cursor object
cur = conn.cursor()

# List all tables in the database
cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(f"Tables in this DB: {cur.fetchall()}\n")

# Execute a query
cur.execute("SELECT * FROM Cache")

i = 1
# Fetch and print the results
for row in cur.fetchall():
    print(f"\n\n\n++++++++++++++++ ROW {i} ++++++++++++++++++\n\n\n")    
    print(row)
    i=i+1

# Close the connection
conn.close()

Tables in this DB: [('Settings',), ('Cache',)]




++++++++++++++++ ROW 1 ++++++++++++++++++






++++++++++++++++ ROW 2 ++++++++++++++++++



(2, '{"messages": [{"content": "You are a helpful assistant.", "role": "system"}, {"content": "What is Autogen?", "role": "user"}], "model": "gpt4-0125preview-128k", "temperature": 0}', 1, 1713883550.986583, None, 1713883550.986583, 0, None, 0, 4, None, b'\x80\x05\x95\xec\n\x00\x00\x00\x00\x00\x00\x8c!openai.types.chat.chat_completion\x94\x8c\x0eChatCompletion\x94\x93\x94)\x81\x94}\x94(\x8c\x08__dict__\x94}\x94(\x8c\x02id\x94\x8c&chatcmpl-9HBgaM63a0BZkPC53DAQHHd8SoxQt\x94\x8c\x07choices\x94]\x94h\x00\x8c\x06Choice\x94\x93\x94)\x81\x94}\x94(h\x05}\x94(\x8c\rfinish_reason\x94\x8c\x04stop\x94\x8c\x05index\x94K\x00\x8c\x08logprobs\x94N\x8c\x07message\x94\x8c)openai.types.chat.chat_completion_message\x94\x8c\x15ChatCompletionMessage\x94\x93\x94)\x81\x94}\x94(h\x05}\x94(\x8c\x07content\x94X\xdb\x06\x00\x00"Autogen" can refer to different concepts dep

In [None]:
import autogen

user_proxy = autogen.UserProxyAgent(
    name                       = "user_proxy",
    human_input_mode           = "NEVER", # NEVER / ALWAYS / TERMINATE
    max_consecutive_auto_reply = 1,

    # if the x["content"] ends by "TERMINATE", is_termination_msg-->True; otherwise, is_termination_msg--> False
    is_termination_msg         = lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
    
    code_execution_config = {
        "work_dir": "coding",
        
        # Using docker is safer than running the generated code directly.
        # set use_docker=True if docker is available to run the generated code. 
        "use_docker": False
    },
    
    llm_config=llm_config
)

chat_res = user_proxy.initiate_chat(
    assistant,
    message = "What is Autogen?",
    summary_method = "reflection_with_llm",
)

### assistant (to user_proxy):

"Autogen" can refer to different concepts depending on the context in which it's used. Here are a few possibilities:

1. **Software Development**: In the context of programming and software development, "Autogen" often refers to tools or processes that automatically generate code or documentation. These tools can help in creating boilerplate code, APIs, client libraries, or even documentation from a set of definitions or templates. This automation can significantly speed up development and ensure consistency across different parts of a project.

2. **Autogenic Training**: In a completely different context, "Autogen" could be a shorthand or misspelling for "autogenic training," which is a relaxation technique developed by the German psychiatrist Johannes Heinrich Schultz. Autogenic training involves a series of exercises designed to induce a state of relaxation and reduce stress through self-suggestion and focusing on bodily sensations.

3. **Other Contexts**: The term could also be part of a brand name, product, or specific technology in various industries, such as automotive, aerospace, or health and wellness. Without more specific context, it's challenging to pinpoint exactly what "Autogen" refers to.

If you have a specific context in mind for "Autogen," providing more details could help in giving a more accurate and targeted explanation.