In [1]:
from operator import itemgetter

from langchain.chat_models import ChatOpenAI, ChatAnthropic
from langchain.prompts import SystemMessagePromptTemplate, ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.runnables.openai_functions import OpenAIFunctionsRouter

from permchain.connection_inmemory import InMemoryPubSubConnection
from permchain.pubsub import PubSub
from permchain.topic import Topic

## Content Fetcher

First, we are going to define our content fetcher. This is responsible for taking a search query an getting relevant web pages.

In [2]:
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.document_loaders import AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer

In [3]:
from duckduckgo_search import DDGS

ddgs = DDGS()

In [4]:
def retrieve_documents(query):
    query = query.strip().strip('"')
    search_results = ddgs.text(query)
    urls_to_look = []
    for res in search_results:
        if res.get("href", None):
            urls_to_look.append(res["href"])
        if len(urls_to_look) >= 4:
            break

    # Relevant urls
    # Load, split, and add new urls to vectorstore
    if urls_to_look:
        loader = AsyncHtmlLoader(urls_to_look)
        html2text = Html2TextTransformer()
        docs = loader.load()
        docs = list(html2text.transform_documents(docs))
    else:
        docs = []
    return docs

In [5]:
import nest_asyncio

nest_asyncio.apply()

In [6]:
# docs = retrieve_documents("langchain")

## Summarizer
We will now come up with an actor to summarize the results given a query and some search results

In [7]:
prompt = ChatPromptTemplate.from_template(
    "Answer the user's question given the search results\n\n<question>{question}</question><search_results>{search_results}</search_results>"
)

In [8]:
summarizer_chain = (
    prompt
    | ChatOpenAI(max_retries=0).with_fallbacks(
        [ChatOpenAI(model="gpt-3.5-turbo-16k"), ChatAnthropic(model="claude-2")]
    )
    | StrOutputParser()
)

## All together now!

In [9]:
summarizer_inbox = Topic("summarizer")

In [10]:
search_actor = (
    Topic.IN.subscribe()
    | {
        "search_results": retrieve_documents,
        "question": Topic.IN.current(),
    }
    | summarizer_inbox.publish()
)

In [11]:
summ_actor = (
    summarizer_inbox.subscribe() | {"answer": summarizer_chain} | Topic.OUT.publish()
)

In [12]:
web_researcher = PubSub(
    processes=(search_actor, summ_actor),
    connection=InMemoryPubSubConnection(),
).with_config(run_name="WebResearcher")

In [13]:
web_researcher.invoke("What is langsmith?")

Fetching pages: 100%|###################################################| 4/4 [00:01<00:00,  2.19it/s]


[{'answer': 'LangSmith is a platform built by LangChain to help developers build production-grade language model (LLM) applications. It enables developers to trace and evaluate their LLM applications and intelligent agents, ensuring reliability and maintainability in the production environment. LangSmith integrates seamlessly with LangChain and provides features such as tracing runs, testing, and evaluating prompts or answers generated by LLM applications. It aims to facilitate the development lifecycle, maintenance, and improvement of AI models. For more information, you can refer to the LangSmith documentation.'}]

In [14]:
web_researcher.batch(["what is langsmith", "what is llama"])

Fetching pages:   0%|                                                           | 0/4 [00:00<?, ?it/s]
Fetching pages: 100%|###################################################| 4/4 [00:00<00:00,  6.75it/s][A

Fetching pages: 100%|###################################################| 4/4 [00:01<00:00,  2.99it/s][A


[[{'answer': 'LangSmith is a platform that helps developers build production-grade language model applications and provides tools for testing, evaluating, and monitoring these applications. It is built by the developers of LangChain and integrates seamlessly with that library. LangSmith aims to address the challenges of moving LLM applications from prototypes to production, ensuring reliability and maintainability. It offers features such as tracing, testing, and evaluating prompts and answers generated by language models. For more information, you can refer to the LangSmith documentation.'}],
 [{'answer': 'According to the search results, a llama is a domesticated livestock species that is a descendant of the guanaco and belongs to the camel family. Llamas are primarily used as pack animals and a source of food, wool, hides, tallow, and dried dung. They are found in South American countries such as Bolivia, Peru, Colombia, Ecuador, Chile, and Argentina. Llamas are known for their long

## Trying to use it as a sub component

In [15]:
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser

In [16]:
template = """Write between 2 and 5 sub questions that serve as google search queries to search online that form an objective opinion from the following: {question}"""
functions = [
    {
        "name": "sub_questions",
        "description": "List of sub questions",
        "parameters": {
            "type": "object",
            "properties": {
                "questions": {
                    "type": "array",
                    "description": "List of sub questions to ask.",
                    "items": {"type": "string"},
                },
            },
        },
    },
]
prompt = ChatPromptTemplate.from_template(template)
question_chain = (
    prompt
    | ChatOpenAI(temperature=0).bind(
        functions=functions, function_call={"name": "sub_questions"}
    )
    | JsonKeyOutputFunctionsParser(key_name="questions")
)

In [17]:
question_chain.invoke({"question": "what is langsmith?"})

['What is the purpose of Langsmith?',
 'Who developed Langsmith?',
 'What are the features of Langsmith?',
 'How does Langsmith work?',
 'Are there any alternatives to Langsmith?']

In [18]:
template = """You are tasked with writing a research report to answer the following question:

<question>
{question}
</question>

In order to do that, you first came up with several sub questions and researched those. please find those below:

<research>
{research}
</research>

Now, write your final report answering the original question!"""
prompt = ChatPromptTemplate.from_template(template)
report_chain = prompt | ChatOpenAI() | StrOutputParser()

In [19]:
research_inbox = Topic("research")
writer_inbox = Topic("writer_inbox")

In [20]:
subquestion_actor = (
    # Listed in inputs
    Topic.IN.subscribe()
    | question_chain
    # The draft always goes to the editors inbox
    | research_inbox.publish()
)
research_actor = (
    research_inbox.subscribe()
    | {
        "research": web_researcher.map(),
        # "research": lambda x: [web_researcher.invoke({"question": i}) for i in x],
        "question": Topic.IN.current() | itemgetter("question"),
    }
    | writer_inbox.publish()
)
write_actor = writer_inbox.subscribe() | report_chain | Topic.OUT.publish()

In [21]:
longer_researcher = PubSub(
    processes=(subquestion_actor, research_actor, write_actor),
    connection=InMemoryPubSubConnection(),
).with_config(run_name="LongResearcher")

In [22]:
longer_researcher.invoke({"question": "what is langsmith?"})

Fetching pages:   0%|                                                           | 0/4 [00:00<?, ?it/s]
Fetching pages:   0%|                                                           | 0/4 [00:00<?, ?it/s][A

Fetching pages:   0%|                                                           | 0/4 [00:00<?, ?it/s][A[A


Fetching pages:   0%|                                                           | 0/4 [00:00<?, ?it/s][A[A[A



Fetching pages: 100%|###################################################| 4/4 [00:00<00:00,  5.71it/s][A[A[A[A
Fetching pages: 100%|###################################################| 4/4 [00:00<00:00,  4.80it/s]
Fetching pages: 100%|###################################################| 4/4 [00:01<00:00,  2.77it/s]


Fetching pages: 100%|###################################################| 4/4 [00:01<00:00,  2.70it/s][A[A



Fetching pages: 100%|###################################################| 4/4 [00:03<00:00,  1.22it/s][A[A[A


['Research Report: Understanding LangSmith\n\nIntroduction:\nThe purpose of this research report is to explore and provide insights into the topic of LangSmith. LangSmith is a unified platform that aims to address the challenges developers face when building and deploying language model applications in production environments. By examining various sources, we have gathered information to answer the question, "What is LangSmith?"\n\nFindings:\n\n1. LangSmith\'s Purpose and Features:\nLangSmith is designed to assist developers in transitioning from prototype to production with their language model applications. It offers a range of features to trace, test, evaluate, and monitor LLM (Large Language Model) calls for production. The platform is part of the LangChain ecosystem and provides reliable and maintainable solutions for language model applications [1].\n\n2. Development and Integration:\nLangSmith was developed by the same team that created LangChain, the popular language model soft