In [1]:
# Imports
from crewai import Agent, Task, Crew, Process
from langchain_openai import ChatOpenAI
from crewai_tools import SerperDevTool, ScrapeWebsiteTool, WebsiteSearchTool
from crewai_tools import tool
from pydantic import BaseModel, EmailStr, Field
from typing import Optional, List
import json
import csv

In [2]:
url = 'https://www.sutterhealth.org/'

In [3]:
serper_tool = SerperDevTool()
scrape_tool = ScrapeWebsiteTool()
website_search_rag_tool = WebsiteSearchTool()

In [4]:
# Contact search Agent
search_agent = Agent(
    role = 'Contact Searcher', 
    goal = 'Find the contact details of the marketing team or relevant people from a specified client.',
    backstory = 
    """
    You are a dedicated member of the marketing team at Healee, a digital healthcare company. 
    Your expertise lies in leveraging online resources to quickly gather essential information. 
    You have identified {client} as a potential client for the Healee platform. 
    Your task is to find contact information such as email addresses, phone numbers, and LinkedIn profiles 
    of key individuals within {client}'s organization who can be approached to set up a meeting. 
    This includes members of their marketing team or other relevant stakeholders. 
    """, 
    tools = [SerperDevTool()], 
    allow_delegation = False
)

# Web Scraper agent
scraper_agent = Agent(
    role='Web Scraper',
    goal='Scrape relevant information from specified websites',
    backstory="""
    You are a skilled web scraper working for the marketing team at Healee, a digital healthcare company. 
    Your task is to extract crucial information from websites to support various marketing and research activities. 
    This includes gathering contact details, company profiles, and other relevant data from the given URLs.
    """,
    tools=[ScrapeWebsiteTool()],
    allow_delegation=False
)

# Final Consolidator Agent
consolidator = Agent(
    role='Result Consolidator',
    goal='Combine all the contact information gathered by the scraper agents into a final, organized list of relevant contacts.',
    backstory=(
        "You are a highly skilled data synthesizer, adept at merging information from multiple sources. "
        "Your expertise lies in compiling comprehensive, clear, and concise lists of relevant contact information. "
        "Your meticulous approach ensures no detail is overlooked, providing a valuable resource for your team."
    ),
    verbose=True,
    allow_delegation=False
)


In [5]:
# Tasks
# Contact Search Task 
contact_search_task = Task(
    description=(
        "Use online resources to identify potential websites, LinkedIn profiles, "
        "and other online platforms where contact information of key individuals "
        "within {client}'s organization can be found. Focus on finding members "
        "of their marketing team or other relevant stakeholders. Your final list "
        "should include URLs or online resources where contact information might "
        "be available."
    ),
    expected_output=(
        "A list of at least 5 URLs or online resources where contact information "
        "of key individuals from {client} might be available. Ensure the sources "
        "are relevant and up-to-date."
    ),
    agent=search_agent
)

# Web scraping Task
web_scraping_task = Task(
    description=(
        "Scrape the provided URLs or online resources to extract contact details "
        "such as email addresses, phone numbers, and LinkedIn profiles of key "
        "individuals within {client}'s organization. Ensure the information is "
        "accurate and relevant to the marketing team or other relevant stakeholders."
    ),
    expected_output=(
        "A detailed list of contact information extracted from the provided URLs. "
        "Include names, email addresses, phone numbers, LinkedIn profiles, and "
        "any other relevant contact details."
    ),
    agent=scraper_agent
)

In [6]:
# Pydantic for the final consolidation of results
class ContactInfo(BaseModel):
    name: str
    role_in_company: str
    email_address: Optional[EmailStr] = Field(None, alias='email')
    linkedin: Optional[str] = None
    phone_no: Optional[str] = None

# Consolidation task function
# def consolidation_task_function(results: List[ContactInfo]) -> str: 
#     # Convert the list of ContactInfo objects to JSON
#     json_output = json.dumps([results.dict() for result in results], indent=2)

#     # Convert the list of ContactInfo objects to CSV
#     csv_output = "name,role_in_company,email_address,linkedin,phone_no\n"
#     for result in results:
#         csv_output += f"{result.name},{result.role_in_company},{result.email_address or ''},{result.linkedin or ''},{result.phone_no or ''}\n"
    
#     with open("contacts.csv", "w") as csv_file:
#         csv_file.write(csv_output)

#     return "Consolidation complete. Outputs saved to contacts.json and contacts.csv."

def consolidation_task_function(results: List[dict]) -> str:
    contacts = [ContactInfo(**result) for result in results]
    
    # Convert to JSON
    # json_output = [contact.dict() for contact in contacts]
    # json_file_path = "./contacts.json"
    # with open(json_file_path, "w") as json_file:
    #     json.dump(json_output, json_file, indent=2)
    
    # Convert to CSV
    csv_file_path = "./contacts.csv"
    with open(csv_file_path, "w", newline='') as csv_file:
        fieldnames = ['name', 'role_in_company', 'email_address', 'linkedin', 'phone_no']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for contact in contacts:
            writer.writerow(contact.dict())
    
    return f"Consolidation complete. Outputs saved to contacts.json and contacts.csv."

In [9]:
# Final Consolidation task
consolidation_task = Task(
    description=(
        "Combine all the contact information gathered by the scraper agents into a final, organized list of relevant contacts. "
        "The output should be formatted as CSV files with the fields: Name, Role in company, Email address, LinkedIn, Phone no. "
        "Leave any of Email address, LinkedIn, Phone no. empty if the information is not found."
    ),
    expected_output=(
        "A CSV file containing the consolidated list of contact information. The fields should include: Name, Role in company, Email address, LinkedIn, Phone no."
    ),
    agent = consolidator, 
    function = consolidation_task_function, 
    output_file='contacts.csv'
)

In [10]:
# Create the crew
crew = Crew(
    agents=[search_agent, scraper_agent, consolidator], 
    tasks=[contact_search_task, web_scraping_task, consolidation_task],
    process=Process.sequential
)

result = crew.kickoff(inputs={'client': 'Sutter Health'})
print(result)



[95m 


Search results: Title: Sutter Health Plus health plan media contacts
Link: https://news.sutterhealthplus.org/media-contacts/
Snippet: Call the media relations hotline at 800-428-7377; please note that Sutter will only respond to working media. News Topics. 2019 · 2020 · 2021 · 2022 · 2023 ...
---
Title: Media Contact - Vitals by Sutter Health
Link: https://vitals.sutterhealth.org/media-contact/
Snippet: Media Contact. Media Contacts. (800) 428-7377 (monitored 24/7). Sutter Health Affiliates. Alta Bates Summit Medical Center
---
Title: Sutter Health Marketing Department - RocketReach
Link: https://rocketreach.co/sutter-health-marketing-department_b5c49093f42e0dca
Snippet: Sutter Health's Marketing department is led by Grace Davis (Vice President, Chief Public Affairs and Marketing Officer) and has 35 employees. Get Contacts ...
---
Title: Sutter Roseville Department Directory
Link: https://www.sutterhealth.org/for-patients/department-directory-srmc
Snippet: The hospital's main 