In [1]:
from langchain_community.document_loaders import BSHTMLLoader, DirectoryLoader

class HTMLDirectoryLoader:
    def __init__(self, directory):
        self.directory = directory

    def load(self):
        loader = DirectoryLoader(path=self.directory, glob="**/*.html", loader_cls=BSHTMLLoader)
        documents = loader.load()
        return documents

In [2]:
import os

input_dir = os.path.join(os.getcwd(), 'data')
db_dir = os.path.join(os.getcwd(), 'database')
loader = HTMLDirectoryLoader(input_dir)
documents = loader.load()

len(documents)

17

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=100,
    length_function=len,
)

chunks = text_splitter.split_documents(documents)

len(chunks)

17

In [4]:
from langchain_community.embeddings.ollama import OllamaEmbeddings

embedding_func = OllamaEmbeddings(show_progress=True, model='mistral')
embedding_func

OllamaEmbeddings(base_url='http://localhost:11434', model='mistral', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=True, headers=None, model_kwargs=None)

In [9]:
from langchain_community.vectorstores import Chroma

vectordb = Chroma.from_documents(chunks, embedding=embedding_func, persist_directory=db_dir)

vectordb

OllamaEmbeddings: 100%|██████████| 17/17 [06:07<00:00, 21.62s/it]


<langchain_community.vectorstores.chroma.Chroma at 0x74f84a4e1210>

In [5]:
from langchain_core.prompts import ChatPromptTemplate

PROMPT_TEMPLATE = """
Answer the question based on the context below. If you can't answer the question, reply "I don't know".
Ensure your responses are clear, concise, and helpful.

Context: {context}

Question: {question}

"""

prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='\nAnswer the question based on the context below. If you can\'t answer the question, reply "I don\'t know".\nEnsure your responses are clear, concise, and helpful.\n\nContext: {context}\n\nQuestion: {question}\n\n'))])

In [8]:
from langchain_ollama.llms import OllamaLLM
from langchain_community.vectorstores import Chroma

model = OllamaLLM(model='mistral')
query_text = "How state is managed in Angular?"

database = Chroma(persist_directory=db_dir, embedding_function=embedding_func)

results = database.similarity_search_with_score(query_text, k=5)
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])

chain = prompt | model

chain.invoke({"question": query_text, "context": context_text })

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


" In Angular, state is managed using various methods such as services, Subjects, RxJS Observables, Redux, NgRx, or even vanilla JavaScript. However, the most common method is by using services and Subjects. Services are used to share data between components while Subjects are used for broadcasting events in a more efficient manner compared to events. When it comes to managing complex state, libraries like Redux or NgRx can be used for better organization and handling of the application's state. Additionally, the Content Development Kit (CDK) Virtual Scroll can also aid in efficient management of large lists of elements by only rendering the items in view."