## Config LLM + Embedding model

In [1]:
import autogen
from memgpt.autogen.memgpt_agent import create_memgpt_autogen_agent_from_config
from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
from autogen import UserProxyAgent, config_list_from_json
from autogen.agentchat.contrib.teachable_agent import TeachableAgent
import os
import random
random_seed = random.randint(0, 1000)

config_list = [
    {
        #"model": "zephyr-7B-beta",
        "model":"dolphin-2.1-mistral-7b",
        #"api_base": "http://172.19.208.1:1300/v1",
        "api_base": "http://localhost:5001/v1",
        "request_timeout":600,
        "timeout": 120,

    }
]

code_execution_config={"work_dir": "coding",
                       "use_docker":"python:3"}

llm_config = {"config_list": config_list,
              #"context":"",
              #"prompt":"{problem} Solve the problem and explain the reasoning step by step",
              "use_cache":True,
              "seed": 1,
              "temperature":0
              }

from chromadb.utils import embedding_functions
#ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2") 
ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")




  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer
max_seq_length  512


## Load documents

In [2]:
from autogen.retrieve_utils import get_file_from_url,get_files_from_dir
import requests
from bs4 import BeautifulSoup
from langchain.document_loaders import BSHTMLLoader

urls = ["https://uca.edu/politicalscience/home/research-projects/dadm-project/middle-eastnorth-africapersian-gulf-region/israelpalestine-1948-present",
        "https://news.un.org/fr/story/2023/10/1140027",
        "https://www.amnesty.org/fr/location/middle-east-and-north-africa/israel-and-occupied-palestinian-territories/report-israel-and-occupied-palestinian-territories",
        "https://www.unicef.fr/article/israel-palestine-les-enfants-paient-le-prix-de-la-guerre",
        
        ]

for url in urls:
    # Download the HTML file from the URL
    response = requests.get(url)
    # Parse the downloaded content using BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")
    # Create or open the output file in write mode
    output_path_html = 'data_html/'+url.split("/")[-1]+'.html'
    with open(output_path_html, 'w') as f:
        # Write the parsed HTML code to the file
        f.write(str(soup))
    output_path_txt = 'data/'+url.split("/")[-1]+'.txt'
    soup_processed = BSHTMLLoader(output_path_html).load()
    with open(output_path_txt, 'w') as f:
        # Write the processed HTML code to a txt file
        f.write(str(soup_processed))


get_files_from_dir("data/")
#get_file_from_url(output_path)

['data/report-israel-and-occupied-palestinian-territories.txt',
 'data/israelpalestine-1948-present.txt',
 'data/israel-palestine-les-enfants-paient-le-prix-de-la-guerre.txt',
 'data/1140027.txt']

## Define agents retrieval

In [3]:
assistant = RetrieveAssistantAgent(
    name="assistant",
    system_message="You are a helpful assistant.",
    llm_config=llm_config,
)

ragproxyagent = RetrieveUserProxyAgent(
    name="ragproxyagent",
    retrieve_config={
        "task": "qa",
        "docs_path": "/home/mohcine/work/personal/autogen_memgpt/data/",
        "embedding_function": ef
    },
)


In [5]:
assistant.reset()
ragproxyagent.initiate_chat(assistant, problem="how to code a parser for html files in python ?")


Number of requested results 20 is greater than number of elements in index 8, updating n_results = 8


doc_ids:  [['doc_0', 'doc_2', 'doc_3', 'doc_1', 'doc_4', 'doc_6', 'doc_5', 'doc_7']]
Adding doc_id doc_0 to context.
Adding doc_id doc_2 to context.
Adding doc_id doc_3 to context.
ragproxyagent (to assistant):

You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
context provided by the user.
If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.
You must give as short an answer as possible.

User's question is: how to code a parser for html files in python ?

Context is: [Document(page_content='\n403 Forbidden\n\n403 Forbidden\nnginx\n\n\n', metadata={'source': 'data_html/report-israel-and-occupied-palestinian-territories.html', 'title': '403 Forbidden'})]
 violence between September 1993 and September 2000.\nCrisis Phase (September 28, 2000-February 8, 2005): Ariel Sharon, right-wing opposition leader, visited the Temple Mount (Haram as-Sharif) in Jerusalem on September 28, 2000

## Code challenge

In [8]:
from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
config_list = [
    {
        "model":"dolphin-2.1-mistral-7b",
        "api_base": "http://localhost:5001/v1",
        "request_timeout":600,
        "timeout": 120,
    }
]

# Load LLM inference endpoints from an env variable or a file
# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
# and OAI_CONFIG_LIST_sample.json
assistant = AssistantAgent("assistant", llm_config={"config_list": config_list})
user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding","use_docker":False})
user_proxy.initiate_chat(assistant, message="Plot a chart of NVDA and TESLA stock price change YTD.")
# This initiates an automated chat between the two agents to solve the task


user_proxy (to assistant):

Plot a chart of NVDA and TESLA stock price change YTD.

--------------------------------------------------------------------------------
assistant (to user_proxy):

```python
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf

# Get Yahoo Finance data for the two companies
nvda_data = yf.download("NVDA", start="2021-01-01")['Close']  # Download only NVDA's close price from January 1, 2021 onwards. The 'start' parameter specifies the data starting date.
tesla_data = yf.download("TSLA", start="2021-01-01")['Close'] # Download only TESLA's close price from January 1, 2021 onwards. The 'start' parameter specifies the data starting date.

# Create a new dataframe where we can store our chart information
chart_data = pd.DataFrame({"NVDA": nvda_data, "TESLA": tesla_data}) # Creating dataframes to hold the NVDA and TESLA data separately so that they have their own columns in the chart_data df which will be used for plotting

# Plot a line char