### This jupiter notebook is to make vector search locally

# important libraries


In [2]:
import os 
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
import json
from pathlib import Path
from pprint import pprint
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain_groq import ChatGroq
from langchain.chains.retrieval_qa.base import RetrievalQA


  from tqdm.autonotebook import tqdm, trange


In [3]:
# Uploading json source

file_path='all_tables.json'
data = json.loads(Path(file_path).read_text())


# turning json source into text format 

texts = [json.dumps(item) for item in data]

In [4]:
# embedding model

embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
# optional 

# ## model = SentenceTransformer('all-MiniLM-L6-v2')

In [5]:
# splitting text source into chunks

documents = [Document(page_content=text) for text in texts]

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=200
)

splits = text_splitter.split_documents(documents)

In [41]:
len(splits)

2394

In [7]:
# llm. open source llama3 via chatgroq

llm = ChatGroq(
                    temperature=0,
                    model="llama3-70b-8192",
                    api_key="CHAT_GROQ_API_KEY"
                )

In [10]:
# use of llm for any topic ( not tuned with source)

llm.invoke("cement").content

"Cement is a binding agent that is used to hold together aggregates such as sand, gravel, and crushed stone to form concrete. It is a fine powder that is produced by grinding limestone, clay, and other minerals into a fine powder.\n\nHere are some interesting facts about cement:\n\n1. **History**: The earliest known use of cement dates back to around 2500 BC in ancient Egypt. The Egyptians used a mixture of lime, gypsum, and water to create a mortar that was used to build the pyramids.\n2. **Composition**: Cement is typically composed of limestone (60-70%), clay (15-20%), silica (5-10%), and other minerals such as iron oxide and alumina.\n3. **Manufacturing process**: Cement is manufactured through a process known as calcination, where limestone and clay are heated to high temperatures (around 1450Â°C) to produce clinker. The clinker is then ground into a fine powder to produce cement.\n4. **Types of cement**: There are several types of cement, including:\n\t* Portland cement: the most

In [18]:
# Creating vector store

vector_store = FAISS.from_texts([split.page_content for split in splits], embeddings)

In [22]:
# Saving vector store on our local. it will take time in the first time creating vector store. so that, i saved our vector store for using after.

vector_store.save_local("vector_store")

In [6]:
# Loading saved vector store 

vectorstore = FAISS.load_local(folder_path="vector_store", embeddings=embeddings,allow_dangerous_deserialization=True)

In [8]:
#This function searches a vector store for the most relevant results to a query, 
#   sorts them by similarity score in descending order, and returns the top N results.

K = 10
N = 5

query = " "

def search_in_vector_store(query, top_n = N):
    chunk_text = vectorstore.similarity_search_with_score(query=query,k=K)
    sorted_results = sorted(chunk_text, key=lambda x: x[1], reverse=True)
    top_results = sorted_results[:top_n]
    return top_results

chunk_text = search_in_vector_store(query)  


[(Document(page_content='{"description": "(Maximum 20% water absorption (indoors), 15 N/mm\\u00b2) bending strength)", "unit": "", "purchased at": "", "price": ""}, {"description": "Plain vitrified tiles, any color", "unit": "m\\u00b2", "purchased at": "On the job", "price": "100,00"}, {"description": "(20 cm x 20 cm)", "unit": "", "purchased at": "", "price": ""}, {"description": "Plain vitrified curbs, any color", "unit": "m\\u00b2", "purchased at": "On the job", "price": "107,00"}, {"description": "(20 cm x 20 cm)", "unit": "", "purchased at": "", "price": ""}, {"description": "Plain vitrified corner, any color", "unit": "m\\u00b2", "purchased at": "On the job", "price": "113,00"}, {"description": "(10 cm x 10 cm)", "unit": "", "purchased at": "", "price": ""}, {"description": "Plain, embossed, vitrified tiles, any color", "unit": "m\\u00b2", "purchased at": "On the job", "price": "107,00"}, {"description": "(20 cm x 20 cm)", "unit": "", "purchased at": "", "price": ""}, {"descripti

In [9]:
# below, most related chunks from our source. 

chunk_text

In [44]:
# Below code is a simple rag system for query a material with the help of llm and vector store that we created.

query = "price of 4 GN 1/3 tub Chilled Garnish. only give the price."



qa_chain = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=vectorstore.as_retriever())


result = qa_chain.run(query)
print(result)

690,63
