<a href="https://colab.research.google.com/github/tomasonjo/blogs/blob/master/llm/crewai_industry_report.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!pip install --quiet neo4j crewai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/54.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.4/54.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
import os
from crewai import Agent, Task, Crew, LLM, Process
from crewai.tools import BaseTool
from neo4j import GraphDatabase
import datetime
from pydantic import BaseModel, Field
from typing import Type
import getpass

In [2]:
# Neo4j connection setup
URI = "neo4j+s://demo.neo4jlabs.com"
AUTH = ("companies", "companies")
DATABSE = "companies"
driver = GraphDatabase.driver(URI, auth=AUTH)

In [3]:
# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI key: ")
llm = LLM(model='gpt-4o', temperature=0)

OpenAI key: ··········


In [4]:
industry_options = ["Software Companies", "Professional Service Companies", "Enterprise Software Companies", "Manufacturing Companies", "Software As A Service Companies", "Computer Hardware Companies", "Media And Information Companies", "Financial Services Companies", "Artificial Intelligence Companies", "Advertising Companies"]


class GetCityInfoInput(BaseModel):
    """Input schema for MyCustomTool."""
    city: str = Field(..., description="City name")
    industry: str = Field(..., description=f"Industry name, available options are: {industry_options}")

class GetCityInfo(BaseTool):
    name: str = "Get information about a specific city"
    description: str = "You can use this tools when you want to find information about specific industry within a city."
    args_schema: Type[BaseModel] = GetCityInfoInput

    def _run(self, city: str, industry: str) -> str:
        data, _, _ = driver.execute_query("""MATCH (c:City)<-[:IN_CITY]-(o:Organization)-[:HAS_CATEGORY]->(i:IndustryCategory)
WHERE c.name = $city AND i.name = $industry
WITH o
ORDER BY o.nbrEmployees DESC
RETURN count(o) AS organizationCount,
     sum(CASE WHEN o.isPublic THEN 1 ELSE 0 END) AS publicCompanies,
     sum(o.revenue) AS combinedRevenue,
     collect(CASE WHEN o.nbrEmployees IS NOT NULL THEN o END)[..5] AS topFiveOrganizations""", city=city, industry=industry)
        return [el.data() for el in data]

In [5]:
class GetNews(BaseTool):
    name: str = "Get the latest news for a specific company"
    description: str = "You can use this tool when you want to find the latest news about specific company"

    def _run(self, company: str) -> str:
        data, _, _ = driver.execute_query("""MATCH (c:Chunk)<-[:HAS_CHUNK]-(a:Article)-[:MENTIONS]->(o:Organization)
WHERE o.name = $company AND a.date IS NOT NULL
WITH c, a
ORDER BY a.date DESC
LIMIT 5
RETURN a.title AS title, a.date AS date, a.sentiment AS sentiment, collect(c.text) AS chunks""", company=company)
        return [el.data() for el in data]

In [6]:
# Define Agents
class ReportAgents:
    def __init__(self):
        self.researcher = Agent(
            role='Data Researcher',
            goal='Gather comprehensive information about specific companies that are in relevant cities and industries',
            backstory="""You are an expert data researcher with deep knowledge of
            business ecosystems and city demographics. You excel at analyzing
            complex data relationships.""",
            verbose=True,
            allow_delegation=False,
            tools=[GetCityInfo()],
            llm=llm
        )

        self.news_analyst = Agent(
            role='News Analyst',
            goal='Find and analyze recent news about relevant companies in the specified industry and city',
            backstory="""You are a seasoned news analyst with expertise in
            business journalism and market research. You can identify key trends
            and developments from news articles.""",
            verbose=True,
            allow_delegation=False,
            tools=[GetNews()],
            llm=llm
        )

        self.report_writer = Agent(
            role='Report Writer',
            goal='Create comprehensive, well-structured reports combining the provided research and news analysis. Do not include any information that isnt explicitly provided.',
            backstory="""You are a professional report writer with experience in
            business intelligence and market analysis. You excel at synthesizing
            information into clear, actionable insights. Do not include any information that isn't explicitly provided.""",
            verbose=True,
            allow_delegation=False,
            llm=llm
        )

def generate_report(city_name: str, industry_name: str):
    # Initialize components
    agents = ReportAgents()

    # Define Tasks
    city_research_task = Task(
        description=f"""Research and analyze {city_name} and its business ecosystem in {industry_name} industry:
        1. Get city summary and key information
        2. Find organizations in the specified industry
        3. Analyze business relationships and economic indicators""",
        agent=agents.researcher,
        expected_output="Basic statistics about the companies in the given city and industry as well as top performers"
    )

    news_analysis_task = Task(
        description=f"""Analyze recent news about the companies provided by the city researcher""",
        agent=agents.news_analyst,
        expected_output="Summarization of the latest news for the company and how it might affect the market",
        context=[city_research_task]

    )

    report_writing_task = Task(
        description=f"""Create a detailed markdown report about the
        results you got from city research and news analysis tasks.
        Do not include any information that isn't provided""",
        agent=agents.report_writer,
        expected_output="Markdown summary",
        context=[city_research_task, news_analysis_task]

    )

    # Create and run the crew
    crew = Crew(
        agents=[agents.researcher, agents.news_analyst, agents.report_writer],
        tasks=[city_research_task, news_analysis_task, report_writing_task],
        verbose=True,
                    process=Process.sequential,

    )

    result = crew.kickoff()
    return result

In [7]:
city = "San Francisco"
industry = "Hardware Companies"
report = generate_report(city, industry)
print(report)


LLM value is already an LLM object
LLM value is already an LLM object
LLM value is already an LLM object
[1m[95m# Agent:[00m [1m[92mData Researcher[00m
[95m## Task:[00m [92mResearch and analyze San Francisco and its business ecosystem in Hardware Companies industry:
        1. Get city summary and key information
        2. Find organizations in the specified industry
        3. Analyze business relationships and economic indicators[00m






[1m[95m# Agent:[00m [1m[92mData Researcher[00m
[95m## Using tool:[00m [92mGet information about a specific city[00m
[95m## Tool Input:[00m [92m
"{\"city\": \"San Francisco\", \"industry\": \"Computer Hardware Companies\"}"[00m
[95m## Tool Output:[00m [92m
[{'organizationCount': 51, 'publicCompanies': 6, 'combinedRevenue': 302330297000.0, 'topFiveOrganizations': [{'summary': 'American multinational technology corporation', 'revenue': 198270000000.0, 'isDissolved': False, 'diffbotId': 'https://diffbot.com/entity/EIsFKrN_ZNLSWsvxdQfWutQ', 'nbrEmployees': 221000, 'name': 'Microsoft Corporation', 'motto': 'We’re on a mission to empower every person and every organization on the planet to achieve more.', 'isPublic': True, 'id': 'EIsFKrN_ZNLSWsvxdQfWutQ'}, {'summary': 'Cloud computing management software provider', 'revenue': 102301000000.0, 'isDissolved': False, 'diffbotId': 'https://diffbot.com/entity/E5ZMFNw6VM1S_DXVzjJ-qNQ', 'nbrEmployees': 133000, 'name': 'Dell Technol