Installing libraries

In [1]:
pip install -qU langgraph langchain langchain_openai langchain_community qdrant-client PyPDF2 beautifulsoup4 requests

Note: you may need to restart the kernel to use updated packages.


Setting up imports

In [2]:
import os
import getpass
from typing import List, Dict, Any, Union, Annotated, Optional
from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tools import BaseTool, tool
from langgraph.graph import StateGraph, END
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.vectorstores import Qdrant
import functools
import operator
from pathlib import Path
import uuid
import requests
from bs4 import BeautifulSoup
import PyPDF2
import io
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest

# Set up API keys
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
os.environ["TAVILY_API_KEY"] = getpass.getpass("Tavily API Key:")


Create a working directory

In [3]:
from pathlib import Path

WORKING_DIRECTORY = Path("./content/data")
WORKING_DIRECTORY.mkdir(parents=True, exist_ok=True)

Initialize Qdrant Client and OpenAI Embeddings

In [4]:
# Initialize Qdrant client
qdrant_client = QdrantClient(":memory:")  # Use in-memory storage for simplicity

# Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings()

State and Helper functions

In [5]:
class State(Dict[str, Any]):
    messages: Annotated[List[BaseMessage], operator.add]
    next: str
    documents: Dict[str, str]

def get_last_message(state: State) -> str:
    return state["messages"][-1].content

def join_graph(response: dict):
    return {"messages": [response["messages"][-1]]}

def prelude(state):
    written_files = []
    try:
        written_files = [f.relative_to(WORKING_DIRECTORY) for f in WORKING_DIRECTORY.glob("*")]
    except:
        pass
    if not written_files:
        return {**state, "current_files": "No files written."}
    return {
        **state,
        "current_files": "\nBelow are files your team has written to the directory:\n"
        + "\n".join([f" - {f}" for f in written_files]),
    }

Setting up Vector Store

In [6]:
def initialize_vector_store():
    qdrant_client.recreate_collection(
        collection_name="sales_pitch_data",
        vectors_config=rest.VectorParams(size=1536, distance=rest.Distance.COSINE),
    )

def add_to_vector_store(text: str, metadata: Dict[str, Any]):
    vector = embeddings.embed_query(text)
    qdrant_client.upsert(
        collection_name="sales_pitch_data",
        points=[rest.PointStruct(id=uuid.uuid4().hex, vector=vector, payload=metadata)],
    )

def query_vector_store(query: str, limit: int = 5) -> List[Dict[str, Any]]:
    query_vector = embeddings.embed_query(query)
    search_result = qdrant_client.search(
        collection_name="sales_pitch_data",
        query_vector=query_vector,
        limit=limit,
    )
    return [hit.payload for hit in search_result]

Tools

In [11]:
tavily_tool = TavilySearchResults(max_results=5)

@tool
def linkedin_check(name: str, company: str) -> bool:
    """Check if a person works at a specific company on LinkedIn."""
    # Note: This is a mock implementation. In a real scenario, you'd use LinkedIn's API or web scraping.
    url = f"https://www.linkedin.com/search/results/people/?keywords={name} {company}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    return company.lower() in soup.text.lower()

@tool
def read_document(
    file_name: Annotated[str, "File path to read the document."],
    start: Annotated[Optional[int], "The start line. Default is 0"] = None,
    end: Annotated[Optional[int], "The end line. Default is None"] = None,
) -> str:
    """Read the specified document."""
    with (WORKING_DIRECTORY / file_name).open("r") as file:
        lines = file.readlines()
    if start is None:
        start = 0
    return "".join(lines[start:end])

@tool
def write_document(
    content: Annotated[str, "Text content to be written into the document."],
    state: Annotated[dict, "The current state of the conversation."],
) -> Annotated[str, "Path of the saved document file."]:
    """Create and save a text document."""
    file_name = state.get("output_file", "output.txt")
    file_path = WORKING_DIRECTORY / file_name
    try:
        with file_path.open("w") as file:
            file.write(content)
        logging.info(f"Successfully wrote content to {file_path}")
        return f"Document saved to {file_path}"
    except IOError as e:
        logging.error(f"Error writing to {file_path}: {e}")
        return f"Error: Could not write to {file_path}"

@tool
def upload_document(file_path: str) -> str:
    """Upload and read a document (PDF or text)."""
    file_extension = Path(file_path).suffix.lower()
    if file_extension == '.pdf':
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text()
    elif file_extension in ['.txt', '.md']:
        with open(file_path, 'r') as file:
            text = file.read()
    else:
        raise ValueError("Unsupported file type. Please upload a PDF or text file.")
    
    # Save the extracted text to a file in the working directory
    output_file = WORKING_DIRECTORY / f"uploaded_document_{uuid.uuid4()}.txt"
    with output_file.open("w") as file:
        file.write(text)
    
    return f"Document uploaded and saved as {output_file.name}"

@tool
def query_vector_db(query: str) -> str:
    """Query the vector database for relevant information."""
    results = query_vector_store(query)
    return "\n".join([f"{item['type']}: {item['content']}" for item in results])

Agent Creation

In [23]:
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI

def create_agent_node(name: str, tools: List[BaseTool], system_prompt: str):
    llm = ChatOpenAI(model="gpt-4-turbo")
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt + "\n\nAfter completing your task, you MUST use the write_document tool to save your output to a file named '{output_file}'. Your response should include confirmation that the file was saved."),
        MessagesPlaceholder(variable_name="messages"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ])
    agent = create_openai_functions_agent(llm, tools, prompt)
    
    def execute_agent(state):
        # Add the output_file to the state
        state["output_file"] = f"{name}.txt"
        result = AgentExecutor(agent=agent, tools=tools).invoke(state)
        output = result['output']
        
        # Ensure 'next' is always set
        next_step = name  # Default to current node if not specified
        if isinstance(output, dict) and 'next' in output:
            next_step = output['next']
        elif isinstance(output, str) and 'next:' in output.lower():
            next_step = output.lower().split('next:')[-1].strip()
        
        return {
            "messages": state["messages"] + [HumanMessage(content=str(output), name=name)],
            "next": next_step,
            "documents": state.get("documents", {})
        }
    
    return execute_agent

def agent_node(state, agent, name):
    result = agent.invoke(state)
    
    new_state = {
        "messages": state["messages"] + [HumanMessage(content=result["output"], name=name)],
        "next": result.get("next", name),  # Default to the current node if 'next' is not set
        "documents": state.get("documents", {})  # Preserve any existing documents
    }
    
    return new_state

Defining Agent Nodes

In [None]:
# Define agent nodes
company_research_alpha = create_agent_node(
    "CompanyResearchAlpha",
    [tavily_tool, read_document, write_document, upload_document, query_vector_db],
    "You are a company research specialist. Your task is to gather initial information about a target company. "
    "Ask the user for the company name or to upload a document with company info. "
    "Create initial documents for company name, description, execs, priorities, and industry priorities."
)

# Company Description Team
company_description_research = create_agent_node(
    "CompanyDescriptionResearch",
    [tavily_tool, read_document, write_document, query_vector_db],
    "You are a company description researcher. Your task is to find and compile a comprehensive description of the company."
)

company_description_writing = create_agent_node(
    "CompanyDescriptionWriting",
    [read_document, write_document],
    "You are a professional writer specializing in company descriptions. Your task is to take the research and craft a well-written company description."
)

company_description_fact_checker = create_agent_node(
    "CompanyDescriptionFactChecker",
    [tavily_tool, read_document, write_document, query_vector_db],
    "You are a fact-checker. Your task is to verify the accuracy of the company description."
)

company_description_copy_editor = create_agent_node(
    "CompanyDescriptionCopyEditor",
    [read_document, write_document],
    "You are a copy editor. Your task is to ensure the company description has correct grammar, spelling, and tone."
)

# Company Execs Team
company_execs_research = create_agent_node(
    "CompanyExecsResearch",
    [tavily_tool, read_document, write_document, query_vector_db],
    "You are an executive research specialist. Your task is to find the top 5 C-level executives of the company."
)

company_execs_linkedin_check = create_agent_node(
    "CompanyExecsLinkedInCheck",
    [linkedin_check, read_document, write_document],
    "You are a LinkedIn verification specialist. Your task is to verify if the executives are currently working at the company."
)

company_execs_editor = create_agent_node(
    "CompanyExecsEditor",
    [read_document, write_document],
    "You are an editor. Your task is to finalize the list of company executives, excluding any that are not currently with the company."
)

# Company Priorities Team
company_priorities_research = create_agent_node(
    "CompanyPrioritiesResearch",
    [tavily_tool, read_document, write_document, query_vector_db],
    "You are a company priorities researcher. Your task is to identify the top 3 priorities of the company."
)

company_priorities_fact_checker = create_agent_node(
    "CompanyPrioritiesFactChecker",
    [tavily_tool, read_document, write_document, query_vector_db],
    "You are a fact-checker. Your task is to verify the accuracy of the identified company priorities."
)

company_priorities_copy_editor = create_agent_node(
    "CompanyPrioritiesCopyEditor",
    [read_document, write_document],
    "You are a copy editor. Your task is to ensure the company priorities are clearly and correctly stated."
)

# Industry Trends Team
industry_trends_research = create_agent_node(
    "IndustryTrendsResearch",
    [tavily_tool, read_document, write_document, query_vector_db],
    "You are an industry trends researcher. Your task is to identify the industry of the company and its top trends and challenges."
)

industry_trends_fact_checker = create_agent_node(
    "IndustryTrendsFactChecker",
    [tavily_tool, read_document, write_document, query_vector_db],
    "You are a fact-checker. Your task is to verify the accuracy of the identified industry trends and challenges."
)

industry_trends_copy_editor = create_agent_node(
    "IndustryTrendsCopyEditor",
    [read_document, write_document],
    "You are a copy editor. Your task is to ensure the industry trends and challenges are clearly and correctly stated."
)

# Sales Pitch Team
value_proposition_research = create_agent_node(
    "ValuePropositionResearch",
    [tavily_tool, read_document, write_document, upload_document, query_vector_db],
    "You are a value proposition researcher. Your task is to research and outline the value proposition of the user's company."
)

value_proposition_fact_checker = create_agent_node(
    "ValuePropositionFactChecker",
    [tavily_tool, read_document, write_document, query_vector_db],
    "You are a fact-checker. Your task is to verify the accuracy of the value proposition."
)

value_proposition_copy_editor = create_agent_node(
    "ValuePropositionCopyEditor",
    [read_document, write_document],
    "You are a copy editor. Your task is to ensure the value proposition is clearly and correctly stated."
)

sales_pitch_creator = create_agent_node(
    "SalesPitchCreator",
    [read_document, write_document, query_vector_db],
    """You are a sales pitch creator. Your task is to create a compelling sales pitch based on the target company's information and the user's company value proposition.
    After creating the pitch, you MUST use the write_document tool to save it as 'final_sales_pitch.txt'.
    Your response should include the content of the sales pitch and confirmation that it was saved."""
)

sales_pitch_fact_checker = create_agent_node(
    "SalesPitchFactChecker",
    [read_document, write_document, query_vector_db],
    "You are a fact-checker. Your task is to verify the accuracy of the sales pitch."
)

sales_pitch_copy_editor = create_agent_node(
    "SalesPitchCopyEditor",
    [read_document, write_document],
    "You are a copy editor. Your task is to ensure the sales pitch has correct grammar, spelling, and tone."
)

Graph Creation

In [24]:
from typing import Dict, List, TypedDict
from langgraph.graph import StateGraph, END
from langchain_core.messages import HumanMessage

# Define the state
class State(TypedDict):
    messages: List[HumanMessage]
    next: str

# Create the graph
graph = StateGraph(State)

# Define a dictionary mapping node names to their corresponding functions
node_functions = {
    "CompanyResearchAlpha": company_research_alpha,
    "CompanyDescriptionResearch": company_description_research,
    "CompanyDescriptionWriting": company_description_writing,
    "CompanyDescriptionFactChecker": company_description_fact_checker,
    "CompanyDescriptionCopyEditor": company_description_copy_editor,
    "CompanyExecsResearch": company_execs_research,
    "CompanyExecsLinkedInCheck": company_execs_linkedin_check,
    "CompanyExecsEditor": company_execs_editor,
    "CompanyPrioritiesResearch": company_priorities_research,
    "CompanyPrioritiesFactChecker": company_priorities_fact_checker,
    "CompanyPrioritiesCopyEditor": company_priorities_copy_editor,
    "IndustryTrendsResearch": industry_trends_research,
    "IndustryTrendsFactChecker": industry_trends_fact_checker,
    "IndustryTrendsCopyEditor": industry_trends_copy_editor,
    "ValuePropositionResearch": value_proposition_research,
    "ValuePropositionFactChecker": value_proposition_fact_checker,
    "ValuePropositionCopyEditor": value_proposition_copy_editor,
    "SalesPitchCreator": sales_pitch_creator,
    "SalesPitchFactChecker": sales_pitch_fact_checker,
    "SalesPitchCopyEditor": sales_pitch_copy_editor
}

# Define the node order
node_order = [
    "CompanyResearchAlpha",
    "CompanyDescriptionResearch",
    "CompanyDescriptionWriting",
    "CompanyDescriptionFactChecker",
    "CompanyDescriptionCopyEditor",
    "CompanyExecsResearch",
    "CompanyExecsLinkedInCheck",
    "CompanyExecsEditor",
    "CompanyPrioritiesResearch",
    "CompanyPrioritiesFactChecker",
    "CompanyPrioritiesCopyEditor",
    "IndustryTrendsResearch",
    "IndustryTrendsFactChecker",
    "IndustryTrendsCopyEditor",
    "ValuePropositionResearch",
    "ValuePropositionFactChecker",
    "ValuePropositionCopyEditor",
    "SalesPitchCreator",
    "SalesPitchFactChecker",
    "SalesPitchCopyEditor"
]

graph = StateGraph(State)

# Add nodes to the graph
for node in node_order:
    graph.add_node(node, node_functions[node])

# Add edges between nodes
for i in range(len(node_order) - 1):
    current_node = node_order[i]
    next_node = node_order[i + 1]
    graph.add_edge(current_node, next_node)

# Add conditional edges for each node
for i, node in enumerate(node_order):
    next_nodes = {next_node: next_node for next_node in node_order[i+1:]}
    next_nodes["END"] = END
    
    graph.add_conditional_edges(
        node,
        lambda x: x["next"],
        next_nodes
    )

# Set the entry point
graph.set_entry_point("CompanyResearchAlpha")

# Compile the graph
workflow = graph.compile()

Main Chain and Execution

In [25]:
def run_sales_pitch_generator(target_company: str, user_company_url: str):
    initial_message = f"Generate a sales pitch for {target_company}. Our company website is {user_company_url}."
    
    state = {"messages": [HumanMessage(content=initial_message)], "next": "CompanyResearchAlpha", "documents": {}}
    
    for step, node_name in enumerate(node_order):
        try:
            logging.info(f"Executing step {step + 1}/{len(node_order)}: {node_name}")
            
            if node_name not in node_functions:
                raise KeyError(f"Node '{node_name}' not found in node_functions dictionary")
            
            node_function = node_functions[node_name]
            new_state = node_function(state)
            
            logging.debug(f"State after {node_name}: {new_state}")
            
            if not isinstance(new_state, dict) or 'messages' not in new_state or 'next' not in new_state:
                raise ValueError(f"Invalid state returned by {node_name}. State: {new_state}")
            
            state = new_state
            
            # Check for file creation after each step
            output_file = f"{node_name}.txt"
            doc_path = WORKING_DIRECTORY / output_file
            if doc_path.exists():
                logging.info(f"File {output_file} exists after step {node_name}")
                try:
                    with doc_path.open("r") as file:
                        content = file.read().strip()
                        if content:
                            logging.info(f"\n{output_file} Contents:\n{content[:500]}...\n---")
                        else:
                            logging.warning(f"File {output_file} is empty")
                except IOError as e:
                    logging.error(f"Error reading {output_file}: {e}")
            else:
                logging.warning(f"File {output_file} does not exist after step {node_name}")
            
            if state['next'] == "END":
                break
                
        except Exception as e:
            logging.error(f"An error occurred during step {step + 1} ({node_name}): {e}")
            logging.exception("Detailed traceback:")
            break

    # After the chain completes, read and return the final sales pitch
    final_pitch_path = WORKING_DIRECTORY / "SalesPitchCreator.txt"
    if final_pitch_path.exists():
        try:
            with final_pitch_path.open("r") as file:
                return file.read().strip()
        except IOError as e:
            logging.error(f"Error reading final sales pitch: {e}")
            return "Error reading final sales pitch file."
    else:
        logging.error("Final sales pitch file not found")
        return "Final sales pitch file not found. There may have been an error in the process."

## Main Code Execution Block

if __name__ == "__main__":
    try:
        logging.info("Initializing vector store...")
        initialize_vector_store()
        
        while True:
            target_company = input("Enter the name of the target company: ").strip()
            if target_company:
                break
            print("Company name cannot be empty. Please try again.")
        
        while True:
            user_company_url = input("Enter your company's URL: ").strip()
            if user_company_url and "." in user_company_url:
                break
            print("Please enter a valid URL.")
        
        logging.info(f"Generating sales pitch for {target_company}...")
        final_pitch = run_sales_pitch_generator(target_company, user_company_url)
        
        logging.info("\nFinal Sales Pitch:")
        print(final_pitch)
        
        # Print contents of all generated files
        for node_name in node_order:
            file_name = f"{node_name}.txt"
            file_path = WORKING_DIRECTORY / file_name
            if file_path.exists():
                try:
                    with file_path.open("r") as file:
                        content = file.read().strip()
                        logging.info(f"\nContents of {file_name}:\n{content[:500]}...\n---")
                except IOError as e:
                    logging.error(f"Error reading {file_name}: {e}")
            else:
                logging.warning(f"File {file_name} was not created during the process")
        
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        logging.exception("Detailed traceback:")
    
    finally:
        logging.info("Cleaning up temporary files...")
        for node_name in node_order:
            file_name = f"{node_name}.txt"
            file_path = WORKING_DIRECTORY / file_name
            try:
                file_path.unlink()
                logging.info(f"Deleted {file_name}")
            except FileNotFoundError:
                logging.warning(f"File {file_name} not found during cleanup")
        
        logging.info("Process completed.")
        
## Logging Info

        import logging

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

def create_agent_node(name: str, tools: List[BaseTool], system_prompt: str):
    # ... (previous code)
    
    def execute_agent(state):
        logging.debug(f"Executing agent: {name}")
        logging.debug(f"Input state: {state}")
        result = AgentExecutor(agent=agent, tools=tools).invoke(state)
        logging.debug(f"Agent result: {result}")
        
        # ... (rest of the function)
        
        logging.debug(f"Output state: {new_state}")
        return new_state
    
    return execute_agent

# In run_sales_pitch_generator
for step, node_name in enumerate(node_order):
    try:
        logging.debug(f"Starting execution of node: {node_name}")
        # ... (rest of the code)
    except Exception as e:
        logging.exception(f"Detailed error in step {step + 1}:")
        break

2024-09-02 16:37:47,236 - INFO - Initializing vector store...


2024-09-02 16:37:57,505 - INFO - Generating sales pitch for Microsoft...
2024-09-02 16:37:57,505 - INFO - Executing step 1/20: CompanyResearchAlpha
2024-09-02 16:37:57,521 - ERROR - An error occurred during step 1 (CompanyResearchAlpha): "Input to ChatPromptTemplate is missing variables {'name'}.  Expected: ['agent_scratchpad', 'messages', 'name'] Received: ['messages', 'next', 'documents', 'intermediate_steps', 'agent_scratchpad']\nNote: if you intended {name} to be part of the string and not a variable, please escape it with double curly braces like: '{{name}}'."
2024-09-02 16:37:57,522 - ERROR - Detailed traceback:
Traceback (most recent call last):
  File "/var/folders/jr/_qkyxp313390z32jmym7j9sm0000gp/T/ipykernel_64437/840569170.py", line 14, in run_sales_pitch_generator
    new_state = node_function(state)
                ^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/jr/_qkyxp313390z32jmym7j9sm0000gp/T/ipykernel_64437/2955531964.py", line 15, in execute_agent
    result = AgentExecut

Final sales pitch file not found. There may have been an error in the process.
