https://docs.llamaindex.ai/en/stable/examples/cookbooks/crewai_llamaindex/



In [1]:
import sys
sys.path.append('/Users/lzchen/PycharmProjects/llamaindex-sample/src')

In [2]:
import os
from crewai import Agent, Task, Crew, Process
from crewai_tools import LlamaIndexTool
from web_scraping_agent.tools.web_parsing import cai_scraper_tool, cai_url_validation_tool
from web_scraping_agent.utils.config import settings

### llm

In [3]:
from llama_index.llms.azure_openai import AzureOpenAI
import openai
from langchain_openai import AzureChatOpenAI

# llm = openai.AzureOpenAI(
#     azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
#     api_key=settings.AZURE_OPENAI_API_KEY,
#     api_version=settings.AZURE_OPENAI_API_VERSION,
# )

llm = AzureChatOpenAI(
    azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
    deployment_name=settings.AZURE_OPENAI_CHAT_MODEL,
    openai_api_key=settings.AZURE_OPENAI_API_KEY,
    openai_api_version=settings.AZURE_OPENAI_API_VERSION,
)

# llm = AzureOpenAI(
#     azure_deployment=settings.AZURE_OPENAI_CHAT_MODEL,
#     temperature=0.1,
#     azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
#     api_key=settings.AZURE_OPENAI_API_KEY,
#     api_version=settings.AZURE_OPENAI_API_VERSION,
# )


### Agents

In [4]:
investment_assistant = Agent(
    role="Investment Assistant",
    goal="Present information gathered about startup companies in a report format.",
    backstory="""You work at an investing firm.
        Your goal is to write a report about the company of 
        concern based on the gathered information to help your
         firm decide if it is a good investment.""", 
    llm=llm,
    verbose=True,
    allow_delegation=True,
    max_iter=5,
    # tools=[query_tool],
)

company_researcher = Agent(
    role="Company Researcher",
    goal="Find information about companies from their website, linkedin, and other sources.",
    backstory="""You work at a investment firm that is looking to invest 
    in other companies, your goal is to find information about the company of concern.""",
    llm=llm,
    verbose=True,
    allow_delegation=True,
    max_iter=5,
    tools=[cai_scraper_tool, cai_url_validation_tool],
)

### Tasks

In [5]:
main_page_sys_msg = """Scrape the main information about the company on their website {company_website}, including
 their goal, detailed summary of the company, established time, the link to 
 the page which contains information about the members of the company
  (usually a site within the same domain)."""
main_page_exp_output = """a JSON with four keys:
 company_goal, company_summary, established_time, and members_page."""
scrape_main_page_task = Task(
    description=main_page_sys_msg,
    expected_output=main_page_exp_output,
    agent=company_researcher,
)

members_page_sys_msg = """Scrape the information about the members of the company on their members page."""
members_page_exp_output = """The page content"""
scrape_members_page_task = Task(
    description=members_page_sys_msg,
    expected_output=members_page_exp_output,
    agent=company_researcher,
)

members_info_sys_msg ="""Extract all the company members mentioned in the given page content.""" 
members_info_exp_output = """ Return as a JSON with the key 'members' containing a list of
 dictionaries with keys 'name', 'role' and 'linkedin_page'. 
 Note that when linkedIn page is not available, 
 it should be an empty string."""
scrape_members_info_task = Task(
    description=members_info_sys_msg,
    expected_output=members_info_exp_output,
    agent=company_researcher,
)

final_report_sys_msg = """Write a report about the company based on the gathered information."""
final_report_exp_output = """Return a markdown containing the report about the company. It should include the company goal,
    detailed summary, established time, and the list of members with their roles and linkedin pages."""
final_report_task = Task(
    description=final_report_sys_msg,
    expected_output=final_report_exp_output,
    agent=investment_assistant,
)

In [6]:
crew = Crew(
    agents=[investment_assistant, company_researcher],
    tasks=[scrape_main_page_task, scrape_members_page_task, scrape_members_info_task, final_report_task],
    verbose=2,  # You can set it to 1 or 2 to different logging levels
)

In [7]:
# Get your crew to work!
result = crew.kickoff({"company_website": "https://vespa.ai/"})

print("######################")
print(result)

[1m[95m [2024-07-13 13:06:58][DEBUG]: == Working Agent: Company Researcher[00m
[1m[95m [2024-07-13 13:06:58][INFO]: == Starting Task: Scrape the main information about the company on their website https://vespa.ai/, including
 their goal, detailed summary of the company, established time, the link to 
 the page which contains information about the members of the company
  (usually a site within the same domain).[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mTo gather the necessary information about the company from their website, I will start by scraping the content of the main page at https://vespa.ai/. This will help me extract the company goal, a detailed summary of the company, the established time, and the link to the members page if available.

Action: crawl_web_page
Action Input: {"page_url": "https://vespa.ai/"}
[0m[95m 

[![Vespa logo](https://vespa.ai/assets/logo.png)](/)

[](https://search.vespa.ai/)

[Get Started](https://cloud.vespa.ai/en/get

KeyboardInterrupt: 