In [12]:
from transformers import pipeline
from langgraph.graph import StateGraph
import requests
from bs4 import BeautifulSoup


In [13]:
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

Device set to use cpu


In [14]:
def crawl_web_content(url, max_paragraphs=10):
    try:
        response = requests.get(url, timeout=5)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract and clean paragraph text
        paragraphs = soup.find_all('p')
        cleaned_text = " ".join(p.get_text().strip() for p in paragraphs[:max_paragraphs])
        return cleaned_text if cleaned_text else "No relevant content found."
    except Exception as e:
        return f"Failed to fetch content: {e}"

In [21]:
def answer_question(state):
    question = state['user_input']
    context = state['context']  # No 'url' used here

    if "Failed to fetch" in context or context.strip() == "":
        return {"user_input": question, "context": context, "answer": "Couldn't fetch relevant information from the URL."}

    result = qa_pipeline(question=question, context=context)
    return {"user_input": question, "context": context, "answer": result['answer']}

# Dummy finish node
def finish_node(state):
    return state


In [22]:
graph_builder = StateGraph(dict)
graph_builder.add_node("Answer", answer_question)
graph_builder.set_entry_point("Answer")
graph_builder.set_finish_point("Finish")
graph_builder.add_edge("Answer", "Finish")
graph_builder.add_node("Finish", lambda state: state)


graph = graph_builder.compile()

In [23]:
graph.get_graph().print_ascii()

+-----------+  
| __start__ |  
+-----------+  
      *        
      *        
      *        
  +--------+   
  | Answer |   
  +--------+   
      *        
      *        
      *        
  +--------+   
  | Finish |   
  +--------+   
      *        
      *        
      *        
 +---------+   
 | __end__ |   
 +---------+   


In [25]:
if __name__ == "__main__":
    url = input("Enter a website URL to search from (e.g., https://en.wikipedia.org/wiki/LangChain): ").strip()
    context = crawl_web_content(url)

    print("\nContext loaded. You can now ask questions. Type 'exit' to quit.\n")
    while True:
        user_input = input("Ask a question: ")
        if user_input.lower() in ["exit", "quit"]:
            print("Goodbye!See you next time.")
            break
        state = {"user_input": user_input, "context": context}
        final_state = graph.invoke(state)
        print(f"Question: {user_input}")
        print(f"Answer: {final_state['answer']}\n")


Context loaded. You can now ask questions. Type 'exit' to quit.

Question: what is langchain?
Answer: a software framework

Goodbye!See you next time.
