In [1]:
from langchain_community.document_loaders import BSHTMLLoader, DirectoryLoader

class HTMLDirectoryLoader:
    def __init__(self, directory):
        self.directory = directory

    def load(self):
        loader = DirectoryLoader(path=self.directory, glob="**/*.html", loader_cls=BSHTMLLoader)
        documents = loader.load()
        return documents

In [2]:
import os

input_dir = os.path.join(os.getcwd(), 'data')
loader = HTMLDirectoryLoader(input_dir)
documents = loader.load()

len(documents)

17

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=20000,
    chunk_overlap=200,
    length_function=len,
)

chunks = text_splitter.split_documents(documents)

chunks[1].to_json()

{'lc': 1,
 'type': 'constructor',
 'id': ['langchain', 'schema', 'document', 'Document'],
 'kwargs': {'metadata': {'source': '/home/metapercept/ChatbotTest/data/use_lazy_loading.html',
   'title': 'Use Lazy Loading'},
  'page_content': "Use Lazy LoadingIntroductionUse Angular CLIMaintain proper folder structureFollow consistent Angular coding stylesUse ES6 featuresUse trackBy along with ngForBreak down into small reusable componentsUse Lazy LoadingUse Index.tsAvoid logic in templates Cache API callsUse async pipe in templatesDeclare safe stringsAvoid any type when declaring constants and variablesState managementUse CDK Virtual ScrollUse Lazy LoadingTry to lazy load the modules in an Angular application whenever possible. Lazy loading loads something only when it is used. This reduces the size of the application load initial time and improves the application boot time by not loading unused modules.Without lazy loading// app.routing.tsimport { WithoutLazyLoadedComponent } from './withou

In [4]:
import chromadb
import ollama

chroma_client = chromadb.Client()

collection = chroma_client.create_collection(name='docs')

inputs = [x.page_content for x in chunks]

for i, d in enumerate(inputs):
  response = ollama.embeddings(model="mxbai-embed-large", prompt=d)
  embedding = response["embedding"]
  collection.add(
    ids=[str(i)],
    embeddings=[embedding],
    documents=[d]
  )
  print(collection)

Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, name=docs)
Collection(id=d214bf03-ebb6-4b4c-84b2-35f52c66d399, nam

In [6]:
col1 = chroma_client.count_collections()
col1

1

In [13]:
prompt = "What is Angular?"

response = ollama.embeddings(
  prompt=prompt,
  model="mxbai-embed-large"
)

results = collection.query(
  query_embeddings=[response["embedding"]],
  n_results=5
)

data = results['documents'][0]

data

['Angular Standards\n\n\nAngular Standards\n\n\nIntroduction\nUse Angular CLI\nMaintain proper folder structure\nFollow consistent Angular coding styles\nUse ES6 features\nUse trackBy along with ngFor\nBreak down into small reusable components\nUse Lazy Loading\nUse Index.ts\nAvoid logic in templates\n Cache API calls\nUse async pipe in templates\nDeclare safe strings\nAvoid any type when declaring constants and variables\nState management\nUse CDK Virtual Scroll',
 'Use Angular CLIIntroductionUse Angular CLIMaintain proper folder structureFollow consistent Angular coding stylesUse ES6 featuresUse trackBy along with ngForBreak down into small reusable componentsUse Lazy LoadingUse Index.tsAvoid logic in templates Cache API callsUse async pipe in templatesDeclare safe stringsAvoid any type when declaring constants and variablesState managementUse CDK Virtual ScrollUse Angular CLIAngular CLI is one of the most powerful accessibility tools available when developing apps with Angular. Angu

In [14]:
output = ollama.generate(
  model="mistral",
  prompt=f"Using this data: {data}. Respond to this prompt: {prompt}"
)
output['response']

{'model': 'mistral',
 'created_at': '2024-07-22T12:29:20.397973169Z',
 'response': ' Angular is an open-source web application framework developed by Google, used for building dynamic, single-page applications (SPAs) with Typescript. It provides a complete solution for building scalable and maintainable applications that can be easily tested, debugged, and optimized for performance. Angular follows best practices such as MVC (Model-View-Controller), Dependency Injection, and is extensible with a rich ecosystem of third-party libraries and tools like the Angular CLI. It leverages features from ECMAScript 6 (ES6) to make development faster and easier.',
 'done': True,
 'done_reason': 'stop',
 'context': [3,
  29473,
  10384,
  1224,
  1946,
  29515,
  6704,
  10969,
  1866,
  8381,
  2946,
  29524,
  29479,
  29524,
  29479,
  29524,
  29479,
  10969,
  1866,
  8381,
  2946,
  29524,
  29479,
  29524,
  29479,
  29524,
  29479,
  27998,
  29524,
  29479,
  9311,
  4431,
  1866,
  1102,
 

In [7]:
from langchain.prompts import PromptTemplate

PROMPT_TEMPLATE = """
Answer the question based on the context below. If you can't answer the question, reply "I don't know".
Ensure your responses are clear, concise, and helpful.

Context: {context}

Question: {question}

"""

pt = PromptTemplate(
            template=PROMPT_TEMPLATE, input_variables=["context", "question"]
    )

pt

PromptTemplate(input_variables=['context', 'question'], template='\nAnswer the question based on the context below. If you can\'t answer the question, reply "I don\'t know".\nEnsure your responses are clear, concise, and helpful.\n\nContext: {context}\n\nQuestion: {question}\n\n')