In [1]:
from operator import itemgetter

from langchain.chat_models import ChatOpenAI, ChatAnthropic
from langchain.prompts import SystemMessagePromptTemplate, ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.runnables.openai_functions import OpenAIFunctionsRouter

from permchain.connection_inmemory import InMemoryPubSubConnection
from permchain.pubsub import PubSub
from permchain.topic import Topic

## Content Fetcher

First, we are going to define our content fetcher. This is responsible for taking a search query an getting relevant web pages.

In [2]:
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.document_loaders import AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer

In [3]:
# !pip install google-api-python-client
# !pip install html2text

In [4]:
search_tool = GoogleSearchAPIWrapper()

In [5]:
def retrieve_documents(query):
    query=query.strip().strip('"')
    urls_to_look = []
    search_results = search_tool.results(query, 5)
    for res in search_results:
        if res.get("link", None):
            urls_to_look.append(res["link"])
    
    # Relevant urls
    # Load, split, and add new urls to vectorstore
    if urls_to_look:
        loader = AsyncHtmlLoader(urls_to_look)
        html2text = Html2TextTransformer()
        docs = loader.load()
        docs = list(html2text.transform_documents(docs))
    else:
        docs = []
    return docs

In [6]:
#docs = retrieve_documents("langchain")

## Querier

We will now come up with an actor to generate a query to search for given a user request

In [7]:
prompt = ChatPromptTemplate.from_template("Come up with a search query given the user question:\n\n{question}")
query_chain = prompt | ChatOpenAI() | StrOutputParser()

## Summarizer
We will now come up with an actor to summarize the results given a query and some search results

In [8]:
prompt = ChatPromptTemplate.from_template("Answer the user's question given the search results\n\n<question>{question}</question><search_results>{search_results}</search_results>")

In [9]:
summarizer_chain = prompt | ChatOpenAI().with_fallbacks([ChatAnthropic(model="claude-2")]) | StrOutputParser()

## All together now!

In [10]:
query_inbox = Topic("query")
summarizer_inbox = Topic("summarizer")

In [11]:
query_actor = (
    # Listed in inputs
    Topic.IN.subscribe()
    | query_chain
    # The draft always goes to the editors inbox
    | query_inbox.publish()
)

In [12]:
search_actor = (
    query_inbox.subscribe()
    | {
        "search_results": retrieve_documents,
        "question": lambda x: x,
    }
    | summarizer_inbox.publish()
)

In [13]:
summ_actor = (
    summarizer_inbox.subscribe()
    | summarizer_chain
    | Topic.OUT.publish()
)

In [14]:
web_researcher = PubSub(
    processes=(query_actor, search_actor, summ_actor),
    connection=InMemoryPubSubConnection(),
)

In [15]:
#web_researcher.invoke({"question": "What is langsmith?"})

## Trying to use it as a sub component

In [16]:
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser

In [17]:
template = """Write between 2 and 5 sub questions that serve as google search queries to search online that form an objective opinion from the following: {question}"""
functions = [
    {
                "name": "sub_questions",
                "description": "List of sub questions",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "questions": {
                            "type": "array",
                            "description": "List of sub questions to ask.",
                              "items": {
                                "type": "string"
                              }
                        },
                    },
                },
            },
]
prompt = ChatPromptTemplate.from_template(template)
question_chain = prompt | ChatOpenAI(temperature=0).bind(functions=functions, function_call={"name":"sub_questions"}) | JsonKeyOutputFunctionsParser(key_name="questions")

In [18]:
question_chain.invoke({"question": "what is langsmith?"})

['What is the purpose of Langsmith?',
 'Who developed Langsmith?',
 'What are the key features of Langsmith?',
 'How does Langsmith work?',
 'Are there any alternatives to Langsmith?']

In [29]:
template = """You are tasked with writing a research report to answer the following question:

<question>
{question}
</question>

In order to do that, you first came up with several sub questions and researched those. please find those below:

<research>
{research}
</research>

Now, write your final report answering the original question!"""
prompt = ChatPromptTemplate.from_template(template)
report_chain = prompt | ChatOpenAI() | StrOutputParser()

In [30]:
research_inbox = Topic("research")
writer_inbox = Topic("writer_inbox")

In [34]:
subquestion_actor = (
    # Listed in inputs
    Topic.IN.subscribe()
    | question_chain
    # The draft always goes to the editors inbox
    | research_inbox.publish()
)
research_actor = (
    research_inbox.subscribe()
    | {
        "research": lambda x: web_researcher.batch([{"question": i} for i in x]),
        #"research": lambda x: [web_researcher.invoke({"question": i}) for i in x],
        "question": Topic.IN.current() | itemgetter("question"),
    }
    | writer_inbox.publish()
)
write_actor = (
    writer_inbox.subscribe()
    | report_chain
    | Topic.OUT.publish()
)

In [35]:
longer_researcher = PubSub(
    processes=(subquestion_actor, research_actor, write_actor),
    connection=InMemoryPubSubConnection(),
)

In [None]:
longer_researcher.invoke({"question": "what is langsmith?"})

Fetching pages: 100%|#####################################################################################################################################################################################| 5/5 [00:01<00:00,  3.27it/s]
Fetching pages:   0%|                                                                                                                                                                                             | 0/5 [00:00<?, ?it/s]
Fetching pages:   0%|                                                                                                                                                                                             | 0/5 [00:00<?, ?it/s][A

Fetching pages:   0%|                                                                                                                                                                                             | 0/5 [00:00<?, ?it/s][A[A


Fetching pages:   0%|                                   