### Import libraries


In [1]:
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
import sys

### Split the doucment into Chunks & Store them in Vector Store

In [2]:
def ingest():
    # Get the doc
    loader = PyPDFLoader("toyota.pdf")
    pages = loader.load_and_split()
    # Split the pages by char
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1024,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(pages)
    print(f"Split {len(pages)} documents into {len(chunks)} chunks.")
    #
    embedding = FastEmbedEmbeddings()
    #Create vector store
    Chroma.from_documents(documents=chunks,  embedding=embedding, persist_directory="./sql_chroma_db")

In [6]:
# only run this once to generate vector store
ingest()

Split 6 documents into 11 chunks.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [8]:
from huggingface_hub import login
access_token_read = "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"
access_token_write = "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"
login(token = access_token_read)

HTTPError: Invalid user token. If you didn't pass a user token, make sure you are properly logged in by executing `huggingface-cli login`, and if you did pass a user token, double-check it's correct.

### Create a RAG chain that retreives relevent chunks and prepares a response

In [3]:
def rag_chain():
    model = ChatOllama(model="llama3")
    #
    prompt = PromptTemplate.from_template(
        """
        <s> [Instructions] You are a friendly assistant. Answer the question based only on the following context. 
        If you don't know the answer, then reply, No Context availabel for this question {input}. [/Instructions] </s> 
        [Instructions] Question: {input} 
        Context: {context} 
        Answer: [/Instructions]
        """
    )
    #Load vector store
    embedding = FastEmbedEmbeddings()
    vector_store = Chroma(persist_directory="./sql_chroma_db", embedding_function=embedding)

    #Create chain
    retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={
            "k": 3,
            "score_threshold": 0.5,
        },
    )

    document_chain = create_stuff_documents_chain(model, prompt)
    chain = create_retrieval_chain(retriever, document_chain)
    #
    return chain


In [7]:
print("Test")

Test


In [6]:
print("Test")

Test


In [4]:
def ask(query: str):
    #
    chain = rag_chain()
    # invoke chain
    result = chain.invoke({"input": query})
    # print results
    print(result["answer"])
    for doc in result["context"]:
        print("Source: ", doc.metadata["source"])

### Ask Question to get relevant information from the document

In [8]:
ask("How do we check null values?")

Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]

According to the provided context, to check null values, you can use the unary operators IS NULL and IS NOT NULL. Specifically, IS NULL returns true if the operand is NULL, and IS NOT NULL returns true if the operand is defined.

In the example given, you would search for NULL values in a table by using the following syntax:

`SELECT ... FROM ... WHERE MiddleName IS NULL;`

This statement will return all rows from the `Person` table where the `MiddleName` column contains a null value.
Source:  data\SQL Server 2012 T-SQL Recipes.pdf
Source:  data\SQL Server 2012 T-SQL Recipes.pdf
Source:  data\SQL Server 2012 T-SQL Recipes.pdf


In [9]:
ask("How can we perform wildcard search in SQL?")

Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]

You can perform a wildcard search in SQL using the LIKE predicate. The available wildcards are:

* `%`: Represents a string of zero or more characters
* `_`: Represents a single character
* `[]`: A list of characters enclosed within square brackets, representing a single character from among any in the list.

Create a string using these wildcards to serve as a search expression and use it with the LIKE predicate to find all values that match that pattern.
Source:  data\SQL Server 2012 T-SQL Recipes.pdf
Source:  data\SQL Server 2012 T-SQL Recipes.pdf
Source:  data\SQL Server 2012 T-SQL Recipes.pdf
