In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

In [3]:
loader=TextLoader("AI_NLP_LLM.txt")
documents = loader.load()

In [4]:
Text_splitter=CharacterTextSplitter(chunk_size=50,chunk_overlap=20)

In [6]:
docs= Text_splitter.split_documents(documents)

Created a chunk of size 212, which is longer than the specified 50
Created a chunk of size 272, which is longer than the specified 50
Created a chunk of size 180, which is longer than the specified 50
Created a chunk of size 139, which is longer than the specified 50
Created a chunk of size 180, which is longer than the specified 50


In [8]:
embeddings=OllamaEmbeddings(model="gemma:2b")
db= FAISS.from_documents(docs,embedding=embeddings)

In [11]:
query="what is nlp?"
docs=db.similarity_search(query)
docs[0].page_content

'2. Natural Language Processing (NLP)\nNatural Language Processing (NLP) is a subfield of AI that focuses on enabling computers to understand, interpret, and generate human language.'

In [13]:
retriever = db.as_retriever()
retriever.invoke(query)
docs[0].page_content

'2. Natural Language Processing (NLP)\nNatural Language Processing (NLP) is a subfield of AI that focuses on enabling computers to understand, interpret, and generate human language.'

In [14]:
docs_score =  db.similarity_search_with_score(query)
docs_score

[(Document(id='2434c9d2-94a1-4d25-adc5-bba84ef4da80', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='2. Natural Language Processing (NLP)\nNatural Language Processing (NLP) is a subfield of AI that focuses on enabling computers to understand, interpret, and generate human language.'),
  np.float32(2975.6196)),
 (Document(id='f357ed80-f02a-49ef-9986-009e59dbed37', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='5. Conclusion\nAI and NLP have revolutionized how we interact with machines. With advancements in LLMs and techniques like RAG, AI-powered applications are becoming more accurate and context-aware.'),
  np.float32(3356.2595)),
 (Document(id='44b482e8-e149-40e4-a4e7-67eaf46e750e', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='3. Large Language Models (LLMs)\nLLMs are advanced deep learning models trained on massive amounts of text data to generate human-like text.'),
  np.float32(3517.1455)),
 (Document(id='a9943238-db8e-4edc-80f9-6b79e5e9be4a', metadata={'source

In [15]:
embedding_vector= embeddings.embed_query(query)
embedding_vector

[0.09939254075288773,
 -1.0149716138839722,
 0.5502493977546692,
 2.1144168376922607,
 1.9522075653076172,
 2.013023614883423,
 -0.07864425331354141,
 -1.2161587476730347,
 -0.1408083587884903,
 0.5801036953926086,
 0.48451393842697144,
 0.8476352095603943,
 0.11351554840803146,
 0.661194384098053,
 1.9693371057510376,
 -0.7453369498252869,
 6.786112308502197,
 0.0545034222304821,
 -0.03386100381612778,
 0.9963345527648926,
 0.6488193869590759,
 -0.6997354030609131,
 -0.1767936646938324,
 0.13902118802070618,
 -1.812618374824524,
 -0.4661827087402344,
 -0.9204182624816895,
 1.52694571018219,
 0.6563441157341003,
 -2.364166021347046,
 1.2599480152130127,
 -0.10518611967563629,
 -1.1221321821212769,
 -0.5521511435508728,
 -0.24741582572460175,
 0.23867720365524292,
 1.923508644104004,
 2.3574442863464355,
 1.947831153869629,
 -1.0991625785827637,
 0.25363337993621826,
 0.45740923285484314,
 -0.3572806119918823,
 0.050213735550642014,
 -0.341341495513916,
 0.29384392499923706,
 1.54531741

In [16]:
docs_score=db.similarity_search_by_vector(embedding_vector)
docs_score

[Document(id='2434c9d2-94a1-4d25-adc5-bba84ef4da80', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='2. Natural Language Processing (NLP)\nNatural Language Processing (NLP) is a subfield of AI that focuses on enabling computers to understand, interpret, and generate human language.'),
 Document(id='f357ed80-f02a-49ef-9986-009e59dbed37', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='5. Conclusion\nAI and NLP have revolutionized how we interact with machines. With advancements in LLMs and techniques like RAG, AI-powered applications are becoming more accurate and context-aware.'),
 Document(id='44b482e8-e149-40e4-a4e7-67eaf46e750e', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='3. Large Language Models (LLMs)\nLLMs are advanced deep learning models trained on massive amounts of text data to generate human-like text.'),
 Document(id='a9943238-db8e-4edc-80f9-6b79e5e9be4a', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='1. Introduction to Artificial Intelligence (AI

In [17]:
db.save_local("daiss_index")

In [19]:
new_df = FAISS.load_local("daiss_index",embeddings,allow_dangerous_deserialization=True)

In [20]:
docs=new_df.similarity_search(query)

In [21]:
docs

[Document(id='2434c9d2-94a1-4d25-adc5-bba84ef4da80', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='2. Natural Language Processing (NLP)\nNatural Language Processing (NLP) is a subfield of AI that focuses on enabling computers to understand, interpret, and generate human language.'),
 Document(id='f357ed80-f02a-49ef-9986-009e59dbed37', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='5. Conclusion\nAI and NLP have revolutionized how we interact with machines. With advancements in LLMs and techniques like RAG, AI-powered applications are becoming more accurate and context-aware.'),
 Document(id='44b482e8-e149-40e4-a4e7-67eaf46e750e', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='3. Large Language Models (LLMs)\nLLMs are advanced deep learning models trained on massive amounts of text data to generate human-like text.'),
 Document(id='a9943238-db8e-4edc-80f9-6b79e5e9be4a', metadata={'source': 'AI_NLP_LLM.txt'}, page_content='1. Introduction to Artificial Intelligence (AI