In [216]:
import openai, langchain, os
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain_community.embeddings import SentenceTransformerEmbeddings
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_community.llms import OpenAI 
from dotenv import load_dotenv 
from sentence_transformers import SentenceTransformer  

In [217]:
model = SentenceTransformer('paraphrase-MiniLM-L3-v2')
embeddings = SentenceTransformerEmbeddings(model_name='paraphrase-MiniLM-L3-v2')

load_dotenv()


False

In [219]:
from langchain.document_loaders import DirectoryLoader, TextLoader, CSVLoader, UnstructuredFileLoader
from langchain_community.document_loaders import PyPDFLoader
from pathlib import Path

def load_docs_from_directory(path):
    def custom_loader(file_path_str):
        file_path = Path(file_path_str)
        ext = file_path.suffix.lower()

        loader_map = {
            ".pdf": PyPDFLoader,
            ".txt": TextLoader,
            ".csv": CSVLoader,
        }

        loader_cls = loader_map.get(ext, UnstructuredFileLoader)
        return loader_cls(str(file_path))

    loader = DirectoryLoader(
        path,
        glob="**/*",
        loader_cls=custom_loader
    )
    documents = loader.load()
    print(documents)
    return documents



In [220]:
def chunk_data(docs, chunk_size=800, chunk_overlap=50):
    text_spliter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    doc = text_spliter.split_documents(docs)
    print(type(doc), type(doc[0]), len(doc))
    
    return doc


In [221]:
data = load_docs_from_directory('./static')
doc_list = chunk_data(docs=data)

vector = embeddings.embed_query("any text")
dimension = len(vector)
print(dimension)

[Document(metadata={'source': 'static\\13.txt'}, page_content='An alien called Jasper landed on Planet Mars in the year 1945.\n\nJasper had 3 heads and 6 legs making it almost impossible for his enemy to chase it. \n\n'), Document(metadata={'source': 'static\\20.txt'}, page_content='During friendly encounters and bonding, tigers rub against each other\'s bodies.[97] Facial expressions include the "defence threat", which involves a wrinkled face, bared teeth, pulled-back ears and widened pupils.[98][47] Both males and females show a flehmen response, a characteristic curled-lip grimace, when smelling urine markings. Males also use the flehmen to detect the markings made by tigresses in oestrus.[47] Tigers will move their ears around to display the white spots, particularly during aggressive encounters and between mothers and cubs.[99] They also use their tails to signal their mood. To show cordiality, the tail sticks up and sways slowly, while an apprehensive tiger lowers its tail or wa

In [None]:
api_key=os.getenv("PINECONE_API_KEY")

pcsk_6R8ytv_37LPKM55quM3i51HJX3EpchECJW1e67Y8Utp9PP9xGBn1btMDCY9gy9MGLuWVK5


In [223]:
index_name = "lcvector"

vectorstore_from_docs = PineconeVectorStore.from_documents(
        doc_list,
        index_name=index_name,
        embedding=embeddings
)


In [224]:
def retrieve_query(query, k=2):
    matching_results = vectorstore_from_docs.similarity_search(query, k=k
                                                               )
    
    print(matching_results)
    return matching_results


In [225]:
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import Ollama

# Initialize the local LLM via Ollama
llm = Ollama(model="mistral", temperature=0.1)

# Load QA chain
chain = load_qa_chain(llm, chain_type="stuff")

def retreive_answers(query):
    doc_search=retrieve_query(query)
    response= chain.run(input_documents=doc_search, question=query) 
    return response 



In [226]:
our_query= "Who is Jasper from Planet Mars? How many legs it had?"
answer=retreive_answers(our_query)
print(answer)

[Document(id='b77818cc-3733-4ad6-90df-95f1a1dcf128', metadata={'source': 'static\\13.txt'}, page_content='An alien called Jasper landed on Planet Mars in the year 1945.\n\nJasper had 3 heads and 6 legs making it almost impossible for his enemy to chase it.'), Document(id='2005647d-0ed4-4bac-b288-da602b955b47', metadata={'source': 'static\\13.txt'}, page_content='An alien called Jasper landed on Planet Mars in the year 1945.\n\nJasper had 3 heads and 6 legs making it almost impossible for his enemy to chase it.')]
 Jasper is an alien who landed on Planet Mars in the year 1945. He has 6 legs.


Deletion from DB code

In [227]:
# idx = pc.Index(index_name)

# # Delete all vectors from the index
# idx.delete(delete_all=True)

# print("All vectors have been deleted from the index.")

In [228]:
# idx = pc.Index(index_name)
# pdf_path = "static\\RahatBhambriResume_alter.pdf"

# idx.delete(
#     filter={
#         # "source": pdf_path
#     }
# )
# print(f"Vectors with source '{pdf_path}' have been deleted.")
