In [2]:
import os
from uuid import uuid4
from langchain_community.document_loaders import PyPDFLoader

from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama

import chromadb
from tqdm import tqdm
import logging

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager

from langchain_chroma import Chroma
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
)
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

### Logging

In [3]:
logger = logging.getLogger()
logging.basicConfig(level=logging.DEBUG)

### Config

In [4]:
config = {
    "data": {
        "data_dir": r"data\pdfs",
        "persist_directory": "chroma_langchain_db",
    },
    "train": {
        "output_document_from_vector_store": 5,
        "chunk_size": 1000,
        "chunk_overlap": 10,
    },
    "model": {
        "embedding_model": "mxbai-embed-large",
        "llm_model": "smollm:1.7b",
    },
}

### Modify the Path here

In [5]:
from pathlib import Path

data_dir = Path("__file__").resolve().parent.parent.parent / config["data"]["data_dir"]
persist_directory = (
    Path("__file__").resolve().parent.parent.parent / config['data']["persist_directory"]
)
config["data"]["data_dir"] = data_dir
config['data']["persist_directory"] = persist_directory

In [6]:
config

{'data': {'data_dir': WindowsPath('D:/Python/InsightAI/data/pdfs'),
  'persist_directory': WindowsPath('D:/Python/InsightAI/chroma_langchain_db')},
 'train': {'output_document_from_vector_store': 5,
  'chunk_size': 1000,
  'chunk_overlap': 10},
 'model': {'embedding_model': 'mxbai-embed-large', 'llm_model': 'smollm:1.7b'}}

In [7]:
class_name_mapping_dict = {
    "c0": "Alienware alpha or Alienware steam machine",
    "c1": "XPS 27 7760",
    "c2": "Alienware 13 R3",
    "c3": "Dell Alienware m16 R1",
    "c4": "Alienware m17 R4",
    "c5": "Alienware x17 R2",
    "c6": "Chromebook 11 3180",
    "c7": "Dell G15 5510",
    "c8": "ASUS ROG Strix SCAR 17 (2023)",
    "c9": "ROG Zephyrus G16 (2024) GU605",
    "c10": "Dell XPS 13 9370",
    "c11": "Dell XPS 14 9440",
    "c12": "Dell XPS 15 9500",
    "c13": "Dell XPS 16 9640",
    "c14": "XPS 17 9730",
    "c15": "Dell Alienware m16 R2",
    "c16": "Alienware x14 R2",
}

In [8]:
embeddings = OllamaEmbeddings(
    model=config['model']["embedding_model"],
)

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# llm = ChatOllama(model=config['model']["llm_model"], callbacks=callback_manager)
llm = ChatOllama(model=config['model']["llm_model"])

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='c:\\Users\\sugam\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\insightai-WytcrPNm-py3.12\\Lib\\site-packages\\certifi\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='c:\\Users\\sugam\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\insightai-WytcrPNm-py3.12\\Lib\\site-packages\\certifi\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='c:\\Users\\sugam\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\insightai-WytcrPNm-py3.12\\Lib\\site-packages\\certifi\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='c:\\Users\\sugam\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\insightai-WytcrPNm-py3.12\\Lib\\site-packages\\certifi\\cace

## DO NOT RUN THE CELL BELOW Twice!!

In [None]:
def get_data():
    """Iterate over the data directory. Splits the pdf's and returns list of documents.
    Args
    ----
    None

    Returns
    -------
    documents: list
        List of splitted documents.
    """

    documents: list = []


    text_splitter = RecursiveCharacterTextSplitter(

        chunk_size=config["train"]["chunk_size"],

        chunk_overlap=config["train"]["chunk_overlap"],

        length_function=len,
    )

    class_abbreviation: list[str] = os.listdir(config['data']["data_dir"])


    for item in tqdm(class_abbreviation):

        path_till_individual_folder: str = config['data']["data_dir"] / item

        for individual_pdf in os.listdir(path_till_individual_folder):

            actual_name_pdf: str = class_name_mapping_dict[item].strip()

            loader = PyPDFLoader(
                os.path.join(path_till_individual_folder, individual_pdf)
            )

            temp_docs = loader.load()

            splitted_docs = text_splitter.split_documents(temp_docs)

            for doc in splitted_docs:
                doc.metadata["category"] = actual_name_pdf
                doc.metadata.pop("source")
                doc.metadata.pop("page")

            documents.extend(splitted_docs)

    logger.info(f"The total length of the extracted pdf: {len(documents)}")
    return documents

documents = get_data()
uuids = [str(uuid4()) for _ in range(len(documents))]

In [None]:
documents[0]

In [9]:
# Initiate Vector Store
persistent_client = chromadb.PersistentClient(path=str(config['data']["persist_directory"]))
collection = persistent_client.get_or_create_collection("InsightAICollection")
vector_store_from_client = Chroma(
    client=persistent_client,
    collection_name="test_collection",
    embedding_function=embeddings,
)

INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
DEBUG:chromadb.config:Starting component System
DEBUG:chromadb.config:Starting component Posthog
DEBUG:chromadb.config:Starting component OpenTelemetryClient
DEBUG:chromadb.config:Starting component SqliteDB
DEBUG:chromadb.config:Starting component QuotaEnforcer
DEBUG:chromadb.config:Starting component LocalSegmentManager
DEBUG:chromadb.config:Starting component SegmentAPI
DEBUG:chromadb.api.segment:Collection InsightAICollection already exists, returning existing collection.
DEBUG:chromadb.api.segment:Collection test_collection already exists, returning existing collection.


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): us.i.posthog.com:443
DEBUG:urllib3.connectionpool:https://us.i.posthog.com:443 "POST /batch/ HTTP/11" 200 15


In [10]:
persistent_client.heartbeat()

1727855055407903300

### Uncomment the below cell to add documents. 

In [None]:
# for doc, uuid in tqdm(zip(documents, uuids), total=len(documents), desc="Adding documents"):
#     vector_store_from_client.add_documents(documents=[doc], ids=[uuid])

In [12]:
query = "What is the RAM of the model?"
image_class = "Alienware alpha or Alienware steam machine"

In [13]:
retrieved_docs = vector_store_from_client.similarity_search_with_score(
    query=query, k=5, filter={"category": image_class}
)
query_embeddings = embeddings.embed_query(query)
retrieved_docs_from_embeddings = vector_store_from_client.similarity_search_by_vector(
    query_embeddings, k=5, filter={"category": image_class}
)
print(retrieved_docs)
print(retrieved_docs_from_embeddings)

DEBUG:httpcore.connection:close.started
DEBUG:httpcore.connection:close.complete
DEBUG:httpcore.connection:connect_tcp.started host='127.0.0.1' port=11434 local_address=None timeout=None socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000002AC01F78710>
DEBUG:httpcore.http11:send_request_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_headers.complete
DEBUG:httpcore.http11:send_request_body.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_body.complete
DEBUG:httpcore.http11:receive_response_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Content-Type', b'application/json; charset=utf-8'), (b'Date', b'Wed, 02 Oct 2024 07:47:46 GMT'), (b'Transfer-Encoding', b'chunked')])
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
DEBUG:httpcore

[(Document(metadata={'category': 'Alienware alpha or Alienware steam machine'}, page_content='Specifications Views\nSystem  \nInformationMemoryPorts and  \nConnectorsDimensions and \nWeightStorage Communications Video Audio\nPower AdapterComputer \nEnvironmentMemory\nConnector Two SODIMM slots\nType DDR3L\nSpeed 1600 MHz\nConfigurations supported 2 GB, 4 GB, 8 GB, and 16 GB'), 0.5036019086837769), (Document(metadata={'category': 'Alienware alpha or Alienware steam machine'}, page_content='Specifications Views\nSystem  \nInformationMemoryPorts and  \nConnectorsDimensions and \nWeightStorage Communications Video Audio\nPower AdapterComputer \nEnvironmentVideo\nController NVIDIA GeForce GPU\nMemory 2 GB GDDR5'), 0.6564028859138489), (Document(metadata={'category': 'Alienware alpha or Alienware steam machine'}, page_content='Specifications Views\nSystem  \nInformationMemoryPorts and  \nConnectorsDimensions and \nWeightStorage Communications Video Audio\nPower AdapterComputer \nEnvironmentS

In [14]:
query = "What are the steps to remove the base cover ?"
image_class = "Dell Alienware m16 R1"
retriever = vector_store_from_client.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5, "filter": {"category": image_class}},
)
retrived_query = retriever.invoke(query)
formatated_docs = "\n\n".join(doc.page_content for doc in retrived_query)
response = llm.invoke(f"""You are an assistant for question-answering tasks.
            Use the following pieces context to answer the question.
            If the context does not contain answer, just say that you don't know. Do not add anything on your own.\n\n
            Question: {query}
           Context:{formatated_docs}
           """)

DEBUG:httpcore.connection:close.started
DEBUG:httpcore.connection:close.complete
DEBUG:httpcore.connection:connect_tcp.started host='127.0.0.1' port=11434 local_address=None timeout=None socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000002AC01F7B650>
DEBUG:httpcore.http11:send_request_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_headers.complete
DEBUG:httpcore.http11:send_request_body.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_body.complete
DEBUG:httpcore.http11:receive_response_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Content-Type', b'application/json; charset=utf-8'), (b'Date', b'Wed, 02 Oct 2024 07:47:59 GMT'), (b'Transfer-Encoding', b'chunked')])
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
DEBUG:httpcore

In [17]:
print(response.content)

Here is the answer to your question:

**Question:** What are the steps to remove the base cover ?

**Context:** Major components of Alienware m16 R1 ...............................
Base cover ...............................
Removing the base cover ...............
Installing the base cover ...............

1.Remove the six screws (M2.5x5) that secure the base cover to the palm-rest and keyboard assembly.
2.Loosen the two captive screws (M2.5x8) that secure the base cover to the palm-rest and keyboard assembly.
3.Using a plastic scribe, pry the base cover from the bottom left and continue to work on the sides to open the base cover.
4.Slide and lift the base cover off the palm-rest and keyboard assembly.
5.Peel the tape that secures the battery cable to the battery.
6.Disconnect the battery cable from the system board.

**Steps:**

1. Remove the base cover .
2. Follow the procedure in After working inside your computer .
3. Remove the base cover .
4. Remove the battery .
5. Remove the sp