In [1]:
from langchain_community.document_loaders import BSHTMLLoader, DirectoryLoader

class HTMLDirectoryLoader:
    def __init__(self, directory):
        self.directory = directory

    def load(self):
        loader = DirectoryLoader(path=self.directory, glob="**/*.html", loader_cls=BSHTMLLoader)
        documents = loader.load()
        return documents

In [2]:
import os

input_dir = os.path.join(os.getcwd(), 'data')
loader = HTMLDirectoryLoader(input_dir)
documents = loader.load()

len(documents)

17

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=20000,
    chunk_overlap=200,
    length_function=len,
)

chunks = text_splitter.split_documents(documents)

chunks[1].to_json()

{'lc': 1,
 'type': 'constructor',
 'id': ['langchain', 'schema', 'document', 'Document'],
 'kwargs': {'metadata': {'source': '/home/metapercept/ChatbotTest/data/use_lazy_loading.html',
   'title': 'Use Lazy Loading'},
  'page_content': "Use Lazy LoadingIntroductionUse Angular CLIMaintain proper folder structureFollow consistent Angular coding stylesUse ES6 featuresUse trackBy along with ngForBreak down into small reusable componentsUse Lazy LoadingUse Index.tsAvoid logic in templates Cache API callsUse async pipe in templatesDeclare safe stringsAvoid any type when declaring constants and variablesState managementUse CDK Virtual ScrollUse Lazy LoadingTry to lazy load the modules in an Angular application whenever possible. Lazy loading loads something only when it is used. This reduces the size of the application load initial time and improves the application boot time by not loading unused modules.Without lazy loading// app.routing.tsimport { WithoutLazyLoadedComponent } from './withou

In [4]:
import chromadb
import ollama

chroma_client = chromadb.Client()

collection = chroma_client.create_collection(name='docs')

inputs = [x.page_content for x in chunks]

for i, d in enumerate(inputs):
  response = ollama.embeddings(model="llama2", prompt=d)
  embedding = response["embedding"]
  collection.add(
    ids=[str(i)],
    embeddings=[embedding],
    documents=[d]
  )
  print(collection)

Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, name=docs)
Collection(id=276e8206-0572-4352-92b9-db0645bb1aa5, nam

In [5]:
col1 = chroma_client.count_collections()
col1

1

In [5]:
prompt = "What is Angular?"

response = ollama.embeddings(
  prompt=prompt,
  model="llama2"
)

results = collection.query(
  query_embeddings=[response["embedding"]],
  n_results=5
)

data = results['documents'][0]

data

['Angular Standards\n\n\nAngular Standards\n\n\nIntroduction\nUse Angular CLI\nMaintain proper folder structure\nFollow consistent Angular coding styles\nUse ES6 features\nUse trackBy along with ngFor\nBreak down into small reusable components\nUse Lazy Loading\nUse Index.ts\nAvoid logic in templates\n Cache API calls\nUse async pipe in templates\nDeclare safe strings\nAvoid any type when declaring constants and variables\nState management\nUse CDK Virtual Scroll',
 'State managementIntroductionUse Angular CLIMaintain proper folder structureFollow consistent Angular coding stylesUse ES6 featuresUse trackBy along with ngForBreak down into small reusable componentsUse Lazy LoadingUse Index.tsAvoid logic in templates Cache API callsUse async pipe in templatesDeclare safe stringsAvoid any type when declaring constants and variablesState managementUse CDK Virtual ScrollState managementOne of the most challenging things in software development is state management. State management in Angular

In [6]:
output = ollama.generate(
  model="mistral",
  prompt=f"Using this data: {data}. Respond to this prompt: {prompt}"
)
output['response']

' Angular is a powerful open-source web application framework developed by Google. It allows developers to build dynamic, scalable, and maintainable applications using TypeScript and JavaScript. The framework provides various features such as data binding, dependency injection, modularity, routing, and reusable components that make it easy to develop complex web applications quickly and efficiently. Angular is based on the Model-View-Controller (MVC) architecture and is known for its flexibility, performance, and robustness. It also comes with a command-line tool called Angular CLI, which simplifies the creation, development, testing, and debugging of Angular applications.'