In [9]:
from langchain_community.document_loaders import BSHTMLLoader, DirectoryLoader

class HTMLDirectoryLoader:
    def __init__(self, directory):
        self.directory = directory

    def load(self):
        loader = DirectoryLoader(path=self.directory, glob="**/*.html", loader_cls=BSHTMLLoader)
        documents = loader.load()
        return documents

In [10]:
import os

input_dir = os.path.join(os.getcwd(), 'data')
db_dir = os.path.join(os.getcwd(), 'database')
loader = HTMLDirectoryLoader(input_dir)
documents = loader.load()

len(documents)

17

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200000,
    chunk_overlap=70,
    length_function=len,
)

chunks = text_splitter.split_documents(documents)

len(chunks)

17

In [12]:
from langchain_community.embeddings.ollama import OllamaEmbeddings

embedding_func = OllamaEmbeddings(show_progress=True, model='mistral')
embedding_func

OllamaEmbeddings(base_url='http://localhost:11434', model='mistral', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=True, headers=None, model_kwargs=None)

In [9]:
from langchain_community.vectorstores import Chroma

vectordb = Chroma.from_documents(chunks, embedding=embedding_func, persist_directory=db_dir)

vectordb

OllamaEmbeddings: 100%|██████████| 17/17 [06:07<00:00, 21.62s/it]


<langchain_community.vectorstores.chroma.Chroma at 0x74f84a4e1210>

In [13]:
from langchain.prompts import PromptTemplate

PROMPT_TEMPLATE = """
Answer the question based on the context below. If you can't answer the question, reply "I don't know".
Ensure your responses are clear, concise, and helpful.

Context: {context}

Question: {question}

"""

pt = PromptTemplate(
            template=PROMPT_TEMPLATE, input_variables=["context", "question"]
    )

pt

PromptTemplate(input_variables=['context', 'question'], template='\nAnswer the question based on the context below. If you can\'t answer the question, reply "I don\'t know".\nEnsure your responses are clear, concise, and helpful.\n\nContext: {context}\n\nQuestion: {question}\n\n')

In [14]:
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain.memory import ConversationSummaryMemory
from langchain_ollama.llms import OllamaLLM
from langchain_community.vectorstores import Chroma

model = OllamaLLM(model='mistral')

database = Chroma(embedding_function=embedding_func, persist_directory=db_dir)

rag = RetrievalQA.from_chain_type(
            llm=model,
            retriever=database.as_retriever(),
            memory=ConversationSummaryMemory(llm = model),
            chain_type_kwargs={"prompt": pt, "verbose": True},
            
        )

rag.invoke("What are features of angular?")

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  1.19it/s]




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Answer the question based on the context below. If you can't answer the question, reply "I don't know".
Ensure your responses are clear, concise, and helpful.

Context: Cache API callsIntroductionUse Angular CLIMaintain proper folder structureFollow consistent Angular coding stylesUse ES6 featuresUse trackBy along with ngForBreak down into small reusable componentsUse Lazy LoadingUse Index.tsAvoid logic in templates Cache API callsUse async pipe in templatesDeclare safe stringsAvoid any type when declaring constants and variablesState managementUse CDK Virtual Scroll Cache API callsResponses from some API calls do not change frequently. In those cases, we can add a caching mechanism and store the value from an API. When another request to the same API is made, we get a response from the check. If there is no value available in the cache then we make an

{'query': 'What are features of angular?',
 'history': '',
 'result': ' The features of Angular mentioned in the context include:\n\n1. Proper folder structure and consistent Angular coding styles.\n2. Use of ES6 features (e.g., Arrow Functions, String interpolation, Object Literals, Let and Const, Destructuring, Default).\n3. Utilizing trackBy along with ngFor to break down components into small, reusable pieces.\n4. Lazy Loading for optimized performance.\n5. Use of async pipe in templates for efficient handling of asynchronous data.\n6. Declaring safe strings and avoiding using the "any" type when declaring constants or variables for better type safety.\n7. Utilizing CDK Virtual Scroll for efficient rendering of large lists.'}