### Install Dependencies

In [None]:
%pip install --q langchain-community
%pip install --q langchain
%pip install --q duckduckgo-search

# TODO: add the following packages to poetry
%pip install --q requests
%pip install --q beautifulsoup4
%pip install --q python-dotenv

#################################
# Required for PaperSpace Gradient
%pip install --q typing-inspect==0.8.0 typing_extensions==4.5.0
%pip install --q pydantic==1.10.8

### Setup Ollama

In [6]:
# %> curl -fsSL https://ollama.com/install.sh | sh
# %> ollama serve
# %> ollama pull mixtral:instruct     (gemma:7b-instruct | mistral:instruct)

!ollama list

NAME            	ID          	SIZE 	MODIFIED    
mixtral:instruct	7708c059a8bb	26 GB	2 hours ago	


### Accelerator Info

In [None]:
!nvidia-smi

### Load Environment Variables & Config
This includes:
- LANGCHAIN_TRACING_V2
- LANGCHAIN_API_KEY

In [5]:
%load_ext dotenv
%dotenv GenieSearch.env
%env LANGCHAIN_TRACING_V2

'True'

### Prompt

In [4]:
template = """{text}
-----------
Using the above text, answer in short the following question:
> {question}
-----------
if the question cannot be answered using the text, imply summarize the text. Include all
factual information, numbers, stats etc if available.
"""

In [7]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(template)

### Ingest Documents

In [8]:
import requests
from bs4 import BeautifulSoup

def scrape_url(url: str):
    try:
        # set a get request to webpage
        response = requests.get(url)

        # if the request response was successful
        if (response.status_code == 200):
            # parse the content of the request
            soup = BeautifulSoup(response.text, "html.parser")
            text_content = soup.get_text(separator=" ", strip=True)
            return text_content
        else:
            return f"Failed to retrieve page: {response.status_code}"
    except Exception as e:
        print(e)    

    return ""

In [9]:
url = "https://blog.langchain.dev/announcing-langsmith/"

content = scrape_url(url)[:10000]

### Setup Local LLM & Embedding Models

In [10]:
LLM_MODEL = "mixtral:instruct"  # ("gemma:7b-instruct" | "mistral:instruct")
TEMPERATURE = 0.9


#### Load & Test Local LLM Model

#### Testing the Ollama model. No need to import separate llm model.

In [11]:
from langchain.llms import Ollama

_test_llm_model_ = Ollama(
    model=LLM_MODEL,
    temperature=TEMPERATURE,
)

_test_llm_model_("Who are you?")

  warn_deprecated(


' I am a helpful assistant, here to provide information and answer questions to the best of my ability. How can I help you today?'

### Web Search Agent

In [12]:
from langchain.utilities import DuckDuckGoSearchAPIWrapper

NUM_RESULTS_PER_QUESTION = 3
ddg_search = DuckDuckGoSearchAPIWrapper()

async def web_search(query: str, num_results: int = NUM_RESULTS_PER_QUESTION):
    web_query_results = ddg_search.results(query, num_results)
    return [r["link"] for r in web_query_results]


In [13]:
await web_search("What is langsmith?")

['https://blog.logrocket.com/langsmith-test-llms-ai-applications/',
 'https://blog.langchain.dev/announcing-langsmith/',
 'https://cheatsheet.md/langchain-tutorials/langsmith.en']

### Setup Chat Chain

In [19]:
import asyncio
from langchain_community.chat_models import ChatOllama
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

scrape_and_summarize_chain = (
    RunnablePassthrough.assign(text=lambda x: scrape_url(x["url"])[:10000])
    | prompt
    | ChatOllama(model=LLM_MODEL)
    | StrOutputParser()
)

web_search_chain = (
    RunnablePassthrough.assign(urls=lambda x: asyncio.run(web_search(x["question"])))
    | (lambda x: [{"question": x["question"], "url": u} for u in x["urls"]])
    | scrape_and_summarize_chain.map()
)


chat_response = web_search_chain.invoke({"question": "Martin Fabbri"})
chat_response

[' The text does not provide enough information to accurately answer this question. It is unclear whether "Martin Fabbri" refers to a person or an entity, and if so, what their role or relevance is in the context of the provided text. Based on the text, it appears that Martin Fabbri may be a player for the Detroit Red Wings who scored a goal against the Columbus Blue Jackets. However, further information is needed to confirm this.',
 ' The text provided is a section of a webpage from The Detroit News, and it is not possible to directly answer a question with the name "Martin Fabbri" using just this text. However, I can provide some context about what this text is describing:\n\n* It appears to be a portion of the website that is only accessible to subscribers of The Detroit News.\n* There are several sections on this page, including "News," "Sports," "Business," "Autos," "Life + Home," "Entertainment," "Opinion," "Obituaries," and "Weather."\n* Some of these sections have further sub-h

#### Research Web Search

In [20]:
questions_generation = (
    "Write 3 google search queries to search online that form an objective "
    "opinion from the following question: {question}\n "
    "Return a python list in the following format: "
    "['query 1', 'query 2', 'query 3']\n "
    "Do not include anything else after the list."
)

SEARCH_PROMPT = ChatPromptTemplate.from_messages([("user", questions_generation)])

search_qanda_chain = (
    SEARCH_PROMPT
    | ChatOllama(model=LLM_MODEL)
    | StrOutputParser()
    | eval
    | (lambda l: [{"question": q} for q in l])
)

full_research_chain = search_qanda_chain | web_search_chain.map()

lol = full_research_chain.invoke({"question": "what is json?"})
lol

[[' JSON (JavaScript Object Notation) is a lightweight data-interchange format that is easy for humans to read and write and easy for machines to parse and generate. It is based on a subset of the JavaScript Programming Language, Standard ECMA-262 3rd Edition - December 1999. JSON is a text format that is completely language independent but uses conventions that are familiar to programmers of the C-family of languages, including C, C++, C#, Java, JavaScript, Perl, Python, and many others. These properties make JSON an ideal data-interchange language.\nJSON is built on two structures:\n\n1. A collection of name/value pairs. In various languages, this is realized as an object, record, struct, dictionary, hash table, keyed list, or associative array.\n2. An ordered list of values. In most languages, this is realized as an array, vector, list, or sequence.\nHere are some facts about JSON:\n\n* JSON is a text format that is completely language independent but uses conventions that are famil

### Report Writer Chain

In [None]:
def collapse_list_of_lists(list_of_lists):
    content = []
    for l in list_of_lists:
        content.append("\n\n".join(l))
    return "\n\n".join(content)

In [None]:
collapse_list_of_lists(lol)

### Research Writer Prompt

In [None]:
WRITER_SYSTEM_PROMPT = (
    "You are an AI critical thinker research assistant. Your sole purpose "
    "is to write well written, critically acclaimed, objective and structured "
    "reports on given text."
)

RESEARCH_REPORT_TEMPLATE = """Information:
--------
{research_summary}
--------
Using the above information, answer the following question or topic: "{question}" 
in a detailed report -- The report should focus on the answer to the question, 
should be well structured, informative,
in depth, with facts and numbers if available and a minimum of 1,200 words.
You should strive to write the report as long as you can using all relevant 
and necessary information provided.
You must write the report with markdown syntax.
You MUST determine your own concrete and valid opinion based on the given 
information. Do NOT deter to general and meaningless conclusions.
Write all used source urls at the end of the report, and make sure to not 
add duplicated sources, but only one reference for each.
You must write the report in apa format.
Please do your best, this is very important to my career."""

research_writer_prompt = ChatPromptTemplate.from_messages(
    [("system", WRITER_SYSTEM_PROMPT), ("user", RESEARCH_REPORT_TEMPLATE)]
)

### Research Writer Chain

In [None]:
research_writer_chain = (
    RunnablePassthrough.assign(
        research_summary=full_research_chain | collapse_list_of_lists
    )
    | research_writer_prompt
    | ChatOllama(model=LLM_MODEL)
    | StrOutputParser()
)

In [None]:
research_writer_chain.invoke({"question": "Who is Martin Fabbri?"})