In [85]:
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate
import requests
from bs4 import BeautifulSoup

In [86]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [87]:
template = """"
Summarize the following question based on the context:

Question: {question}

Context:

{context}

Response:
"""

In [177]:
SUMMARY_PROMPT = ChatPromptTemplate.from_template(template)

In [88]:
prompt = ChatPromptTemplate.from_template(template=template)

In [89]:
def scrape_text(url: str):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            page_text = soup.get_text(separator=" ", strip=True)
            return page_text
        else:
            return f"Failed to retrieve the webpage: Status code {response.status_code}"
    except Exception as e:
        print(e)
        return f"Failed to retrieve the webpage: {e}"

In [90]:
url="https://www.gitselect.com/post/what-is-langchain-langsmith-and-langserve#:~:text=LangSmith%20and%20LangServe%20are%20integral,of%20your%20LLM%2Dpowered%20applications."

In [91]:
page_content = scrape_text(url)[:10000]

In [92]:
page_content

"What is Langchain, LangSmith and LangServe? top of page Home About AI Cloud Code Learn More Use tab to navigate through the menu items. All Posts Technology AI Cloud Search Log in / Sign up Guest Contributor Feb 20 5 min read What is Langchain, LangSmith and LangServe? The rise of artificial intelligence (AI) has brought about a revolution in numerous industries, leading to the development of innovative tools and platforms. One of the most recent developments in this sphere is LangChain, a unique AI-powered tool that offers a new way to build, observe, and deploy applications. But what is LangChain, and how can it transform the landscape of Language Learning Machines (LLM) and AI? Let's explore this in detail. An Introduction to LangChain LangChain is a development platform and library powered by artificial intelligence. It's specifically designed to aid developers in creating applications based on Language Learning Machines (LLM). The essence of LangChain lies in its ability to trans

In [93]:
model = ChatGroq(model="llama3-8b-8192")

In [94]:
from langchain.schema.output_parser import StrOutputParser

parser = StrOutputParser()

chain = prompt | model | parser

In [95]:
chain.invoke({
    "question": "What is langchain?",
    "context": page_content
})

'Based on the context, the question being asked is:\n\n"What is Langchain?"'

### RunnablePassThrough

In [96]:
from langchain_core.runnables import RunnablePassthrough

In [97]:
chain = RunnablePassthrough.assign(
    context = lambda x: scrape_text(x["url"])[:10000]
) | prompt | model | parser

In [98]:
chain

RunnableAssign(mapper={
  context: RunnableLambda(lambda x: scrape_text(x['url'])[:10000])
})
| ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='"\nSummarize the following question based on the context:\n\nQuestion: {question}\n\nContext:\n\n{context}\n\nResponse:\n'))])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000218EC702D80>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000218EC7B9790>, model_name='llama3-8b-8192', groq_api_key=SecretStr('**********'))
| StrOutputParser()

In [99]:
response = chain.invoke({
    "question": "What is langsmith?",
    "url": url
})

In [100]:
print(response)

Based on the context, the question being asked is "What is Langsmith?"


### search API

In [101]:
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")

In [102]:
from langchain_community.tools.tavily_search import TavilySearchResults

In [103]:
search = TavilySearchResults()

In [104]:
# search.invoke(
#     {
#         "query": "What is langchain?"
#     }
# )

### DUCKDUCKGO

In [105]:
from langchain.utilities import DuckDuckGoSearchAPIWrapper

RESULTS_PER_REQUEST = 3

ddg_search = DuckDuckGoSearchAPIWrapper()

In [106]:
def web_search(query: str, num_results: int= RESULTS_PER_REQUEST):
    results = ddg_search.results(query, num_results)
    return [r["link"] for r in results]

# this returns a list of links like here
# results = [
#     {"link": "https://example.com/page1"},
#     {"link": "https://example.com/page2"},
#     {"link": "https://example.com/page3"}
# ]

In [107]:
web_search("What is Langchain?")

['https://www.geeksforgeeks.org/introduction-to-langchain/',
 'https://www.freecodecamp.org/news/beginners-guide-to-langchain/',
 'https://www.baeldung.com/java-langchain-basics']

In [108]:
scrape_text("https://www.geeksforgeeks.org/introduction-to-langchain/")

"Introduction to LangChain - GeeksforGeeks Skip to content Tutorials Python Tutorial Taking Input in Python Python Operators Python Data Types Python Numbers Python String Python Lists Python Tuples Sets in Python Python Dictionary Python Loops and Control Flow Python Conditional Statements Python Loops Python Functions Python OOPS Concept Python Data Structures Python DSA Linked List Stack Queue Tree Heap Hashing Graph Sets Map Advance Data Structure Sorting Algorithms Searching Algorithms Python Exception Handling Python File Handling Python Exercises Python List Exercise Python String Exercise Python Tuple Exercise Python Dictionary Exercise Python Set Exercise Python Design Patterns Python Programming Examples Python Practice Questions Java Java Programming Language Java Tutorial Data Types Variables Operators Flow Control in Java Loops in Java Methods Strings Arrays OOPs Concepts OOPs Concepts Classes and Objects Access Modifiers Inheritance Abstraction Encapsulation Polymorphism 

In [109]:
scrape_and_summarize_chain = RunnablePassthrough.assign(
    context = lambda x: scrape_text(x["url"])[:10000]
) | prompt | model | parser

In [110]:
scrape_and_summarize_chain

RunnableAssign(mapper={
  context: RunnableLambda(lambda x: scrape_text(x['url'])[:10000])
})
| ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='"\nSummarize the following question based on the context:\n\nQuestion: {question}\n\nContext:\n\n{context}\n\nResponse:\n'))])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000218EC702D80>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000218EC7B9790>, model_name='llama3-8b-8192', groq_api_key=SecretStr('**********'))
| StrOutputParser()

In [111]:
from langchain_core.runnables import RunnableLambda

In [112]:
chain = RunnablePassthrough.assign(
    urls = lambda x: web_search(x["question"])
) | (lambda x: [{"question": x["question"], "url": u} for u in x["urls"]]) | scrape_and_summarize_chain.map()

In [113]:
chain

RunnableAssign(mapper={
  urls: RunnableLambda(lambda x: web_search(x['question']))
})
| RunnableLambda(...)
| RunnableEach(bound=RunnableAssign(mapper={
    context: RunnableLambda(lambda x: scrape_text(x['url'])[:10000])
  })
  | ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='"\nSummarize the following question based on the context:\n\nQuestion: {question}\n\nContext:\n\n{context}\n\nResponse:\n'))])
  | ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000218EC702D80>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000218EC7B9790>, model_name='llama3-8b-8192', groq_api_key=SecretStr('**********'))
  | StrOutputParser())

In [114]:
response = chain.invoke({
    "question": "what is Langchain and LangSmith?"
})

In [117]:
len(response)

3

In [118]:
for r in response:
    print(r)

Based on the context, the question "What is Langchain and LangSmith?" is asking about the differences and purposes of two AI language model tools, Langchain and LangSmith.

Langchain is described as a versatile open-source framework that enables developers to build applications using large language models (LLM) like GPT-3. It provides a standardized interface for chains and supports creating complex chains for various applications.

LangSmith, on the other hand, is built on top of Langchain and is described as a dashboard that helps monitor and debug the performance of LLM applications. It provides features such as in-depth debugging capabilities, evaluation and monitoring tools, and facilitates production-grade application development.

In summary, Langchain is a framework for building LLM applications, while LangSmith is a tool for monitoring and debugging those applications.
I apologize, but there is no context provided, as the response is a "Failed to retrieve the webpage: Status c

### LLM Prompt

In [136]:
SEARCH_PROMPT = ChatPromptTemplate.from_messages(
    [
        # ("system", "{agent_prompt}"),
        (
            "user",
            "Write 3 google search queries to search form an"
            "Objective opinion from the following: {question}\n"
            "You must response with a list of strings in the following format: "
            "['query 1', 'query 2', 'query 3']"
            "only return the list and nothing else.",
        )
    ]
)

In [137]:
import json

In [135]:
# import re

# def get_list(response: str):
#     string = 'hello these are items ["items"] how you like it'
#     items = re.findall(r'\["([^"]+)"\]', string)

#     print(items)
    
# get_list("""Here are three Google search queries to search for an objective opinion on the difference between LangChain and LangSmith:

# ['"difference between LangChain and LangSmith" site:researchpapers.io', 'What are the key features of LangChain vs LangSmith', 'Comparison of LangChain and LangSmith: Which is better for language processing?']

# These queries aim to:""")

['items']


In [149]:
search_question_chain = SEARCH_PROMPT | model | StrOutputParser()

In [150]:
search_question_chain

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template="Write 3 google search queries to search form anObjective opinion from the following: {question}\nYou must response with a list of strings in the following format: ['query 1', 'query 2', 'query 3']only return the list and nothing else."))])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000218EC702D80>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000218EC7B9790>, model_name='llama3-8b-8192', groq_api_key=SecretStr('**********'))
| StrOutputParser()

In [153]:
response = search_question_chain.invoke({
    "question": "What is the difference between langchain and langsmith?"
})

In [154]:
print(response)

['Langchain vs Langsmith comparison', 'What are the key differences between Langchain and Langsmith', 'Objectively comparing Langchain and Langsmith AI models']


In [156]:
web_search_chain = RunnablePassthrough.assign(
    urls= lambda x: web_search(x["question"])
) | (lambda x: [{"question": x["question"], "url": u} for u in x["urls"]]) | scrape_and_summarize_chain.map()

In [160]:
chain = search_question_chain | (lambda x: [{"question": q} for q in x]) | web_search_chain.map()

In [161]:
chain

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template="Write 3 google search queries to search form anObjective opinion from the following: {question}\nYou must response with a list of strings in the following format: ['query 1', 'query 2', 'query 3']only return the list and nothing else."))])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000218EC702D80>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000218EC7B9790>, model_name='llama3-8b-8192', groq_api_key=SecretStr('**********'))
| StrOutputParser()
| RunnableLambda(lambda x: [{'question': q} for q in x])
| RunnableEach(bound=RunnableAssign(mapper={
    urls: RunnableLambda(lambda x: web_search(x['question']))
  })
  | RunnableLambda(...)
  | RunnableEach(bound=RunnableAssign(mapper={
      context: RunnableLambda(lambda x: scrape_text(x['url'])[:10000])
    })
    | ChatPromptTem

In [None]:
response = chain.invoke({
    "question": "What is the difference between langchain and langsmith?"
})

### RESEARCH PROMPT:

In [165]:
WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."

In [163]:
RESEARCH_REPORT_TEMPLATE = """Information:
--------
{research_summary}
--------
Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
The report should focus on the answer to the question, should be well structured, informative, \
in depth, with facts and numbers if available and a minimum of 1,200 words.
You should strive to write the report as long as you can using all relevant and necessary information provided.
You must write the report with markdown syntax.
You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
You must write the report in apa format.
Please do your best, this is very important to my career."""

In [166]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", WRITER_SYSTEM_PROMPT),
        ("user", RESEARCH_REPORT_TEMPLATE),
    ]
)

In [168]:
full_research_chain = search_question_chain | (lambda x: [{"question": q} for q in x]) | web_search_chain.map()
full_research_chain

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template="Write 3 google search queries to search form anObjective opinion from the following: {question}\nYou must response with a list of strings in the following format: ['query 1', 'query 2', 'query 3']only return the list and nothing else."))])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000218EC702D80>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000218EC7B9790>, model_name='llama3-8b-8192', groq_api_key=SecretStr('**********'))
| StrOutputParser()
| RunnableLambda(lambda x: [{'question': q} for q in x])
| RunnableEach(bound=RunnableAssign(mapper={
    urls: RunnableLambda(lambda x: web_search(x['question']))
  })
  | RunnableLambda(...)
  | RunnableEach(bound=RunnableAssign(mapper={
      context: RunnableLambda(lambda x: scrape_text(x['url'])[:10000])
    })
    | ChatPromptTem

In [171]:
def collapse_list_of_lists(list_of_lists):
    content = []
    for l in list_of_lists:
        content.append("\n\n".join(l))
    return "\n\n".join(content)

In [173]:
chain = RunnablePassthrough.assign(
    research_summary=full_research_chain | collapse_list_of_lists
) | prompt | model | StrOutputParser()

In [174]:
chain

RunnableAssign(mapper={
  research_summary: ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template="Write 3 google search queries to search form anObjective opinion from the following: {question}\nYou must response with a list of strings in the following format: ['query 1', 'query 2', 'query 3']only return the list and nothing else."))])
                    | ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000218EC702D80>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000218EC7B9790>, model_name='llama3-8b-8192', groq_api_key=SecretStr('**********'))
                    | StrOutputParser()
                    | RunnableLambda(lambda x: [{'question': q} for q in x])
                    | RunnableEach(bound=RunnableAssign(mapper={
                        urls: RunnableLambda(lambda x: web_search(x['question']))
                      })
    

### URL

In [178]:
scrape_and_summarize_chain = RunnablePassthrough.assign(
    summary = RunnablePassthrough.assign(
        context= lambda x: scrape_text(x["url"])[:10000]
    ) | SUMMARY_PROMPT | model | StrOutputParser()
) | (lambda x: f"URL: {x["url"]}\n\nSUMMARY: {x["summary"]}")

In [179]:
scrape_and_summarize_chain

RunnableAssign(mapper={
  summary: RunnableAssign(mapper={
             context: RunnableLambda(lambda x: scrape_text(x['url'])[:10000])
           })
           | ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='"\nSummarize the following question based on the context:\n\nQuestion: {question}\n\nContext:\n\n{context}\n\nResponse:\n'))])
           | ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000218EC702D80>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000218EC7B9790>, model_name='llama3-8b-8192', groq_api_key=SecretStr('**********'))
           | StrOutputParser()
})
| RunnableLambda(...)