In [11]:
#llm model= tinyllama
#embedding model=all-MiniLM-L6-v2

!pip install langchain langchain-huggingface langchain_community langgraph langchain-text-splitters sentence-transformers

from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline

import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

from langchain_community.embeddings import HuggingFaceEmbeddings


from langchain_core.vectorstores import InMemoryVectorStore





In [24]:


# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://www.short-story.me/stories/horror-stories/1926-the-mystery-inside-the-fog",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs=loader.load()

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# ✅ Embedding model wrapped for LangChain
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# ✅ Vector store
vector_store = InMemoryVectorStore(embedding=embedding_model)
vector_store.add_documents(all_splits)

#llm model
llm=HuggingFacePipeline.from_model_id(
    model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation",
)

model=ChatHuggingFace(llm=llm)

prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = model.invoke(messages)
    return {"answer": response.content}


# Compile application
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
#graph_builder.add_edge("retrieve","generate")
graph = graph_builder.compile()

#test
response = graph.invoke({"question": "What is the name of the story?"})
print(response["answer"])

Device set to use cuda:0


<|user|>
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: What is the name of the story? 
Context:  
Answer:</s>
<|assistant|>
Yes, I can provide you with the context for the question "What is the name of the story?" Here are the relevant pieces of retrieved context:

- The story is being narrated by a person who claims to have lived in a world where they were born the same day as Jesus Christ.
- The person has encountered several challenges in their journey, including a group of people who resent them for their existence, a society that has been destroyed, and a group of individuals who wish to enslave them.
- The story takes place in a post-apocalyptic world, where there are no longer any humans, but instead, a new race of intelligent beings known as "the Ones."
- The person meets another