In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("solar_system.txt")
documents = loader.load()
text_splitter=CharacterTextSplitter(chunk_size=500,chunk_overlap=30)
docs = text_splitter.split_documents(documents) 

In [2]:
docs

[Document(metadata={'source': 'solar_system.txt'}, page_content="The Solar System is a vast and fascinating part of the universe, consisting of the Sun and everything bound to it by gravity. This includes eight planets, their moons, asteroids, comets, and meteoroids. The Sun, located at the center, is by far the largest object in the Solar System, containing about 99.8% of the system's total mass. The planets orbit the Sun in elliptical paths, with varying distances from the Sun."),
 Document(metadata={'source': 'solar_system.txt'}, page_content='The four inner planets—Mercury, Venus, Earth, and Mars—are terrestrial planets, composed mostly of rock and metal. The outer planets—Jupiter, Saturn, Uranus, and Neptune—are gas giants, consisting mainly of hydrogen and helium. Each planet has its unique characteristics and features, such as rings around Saturn and the massive storm on Jupiter known as the Great Red Spot.'),
 Document(metadata={'source': 'solar_system.txt'}, page_content='Apar

In [3]:
embeddings = OllamaEmbeddings(model="mxbai-embed-large")
db=FAISS.from_documents(docs , embeddings)
db 

<langchain_community.vectorstores.faiss.FAISS at 0x17a7ad28770>

In [7]:
# querying 
query = "Name the four inner planets ?"
k=2
doc = db.similarity_search(query,k)
doc


[Document(metadata={'source': 'solar_system.txt'}, page_content='The four inner planets—Mercury, Venus, Earth, and Mars—are terrestrial planets, composed mostly of rock and metal. The outer planets—Jupiter, Saturn, Uranus, and Neptune—are gas giants, consisting mainly of hydrogen and helium. Each planet has its unique characteristics and features, such as rings around Saturn and the massive storm on Jupiter known as the Great Red Spot.'),
 Document(metadata={'source': 'solar_system.txt'}, page_content='Apart from the planets, the Solar System is also home to dwarf planets like Pluto, Ceres, and Eris. These celestial bodies share characteristics with the eight main planets but are smaller and have not cleared their orbits of other debris.')]

In [8]:
doc[0].page_content

'The four inner planets—Mercury, Venus, Earth, and Mars—are terrestrial planets, composed mostly of rock and metal. The outer planets—Jupiter, Saturn, Uranus, and Neptune—are gas giants, consisting mainly of hydrogen and helium. Each planet has its unique characteristics and features, such as rings around Saturn and the massive storm on Jupiter known as the Great Red Spot.'

In [9]:
doc_and_score = db.similarity_search_with_score(query)
doc_and_score

[(Document(metadata={'source': 'solar_system.txt'}, page_content='The four inner planets—Mercury, Venus, Earth, and Mars—are terrestrial planets, composed mostly of rock and metal. The outer planets—Jupiter, Saturn, Uranus, and Neptune—are gas giants, consisting mainly of hydrogen and helium. Each planet has its unique characteristics and features, such as rings around Saturn and the massive storm on Jupiter known as the Great Red Spot.'),
  141.68187),
 (Document(metadata={'source': 'solar_system.txt'}, page_content='Apart from the planets, the Solar System is also home to dwarf planets like Pluto, Ceres, and Eris. These celestial bodies share characteristics with the eight main planets but are smaller and have not cleared their orbits of other debris.'),
  179.49039),
 (Document(metadata={'source': 'solar_system.txt'}, page_content="The Solar System is a vast and fascinating part of the universe, consisting of the Sun and everything bound to it by gravity. This includes eight planets

In [10]:
embedding_vector = embeddings.embed_query(query)
embedding_vector

[0.23761549592018127,
 -0.21193793416023254,
 -0.17758074402809143,
 -0.7876365780830383,
 -0.8348212838172913,
 -0.006703678518533707,
 -0.5102159380912781,
 0.5782114863395691,
 0.1169181615114212,
 0.17070229351520538,
 0.5594577789306641,
 0.7174216508865356,
 -0.6898633241653442,
 -0.24078425765037537,
 -0.9189942479133606,
 -0.21670518815517426,
 -0.013250522315502167,
 -0.11510534584522247,
 -1.1106503009796143,
 -0.6745681762695312,
 -0.22791361808776855,
 -0.13898974657058716,
 -1.1867362260818481,
 -0.1703660488128662,
 0.11915554106235504,
 0.5154615640640259,
 0.658218264579773,
 0.9142153263092041,
 0.3025018572807312,
 0.6433854103088379,
 -0.03201023489236832,
 -0.951778769493103,
 1.5113866329193115,
 -0.2679016888141632,
 -0.5706025958061218,
 -0.10307822376489639,
 0.8482567071914673,
 -0.20877645909786224,
 -0.33656978607177734,
 -1.6026430130004883,
 0.4554310142993927,
 -0.5809925198554993,
 0.4858267307281494,
 -0.13121303915977478,
 -0.10173948109149933,
 0.67480

In [11]:
doc_vector = db.similarity_search_by_vector(embedding_vector)
doc_vector

[Document(metadata={'source': 'solar_system.txt'}, page_content='The four inner planets—Mercury, Venus, Earth, and Mars—are terrestrial planets, composed mostly of rock and metal. The outer planets—Jupiter, Saturn, Uranus, and Neptune—are gas giants, consisting mainly of hydrogen and helium. Each planet has its unique characteristics and features, such as rings around Saturn and the massive storm on Jupiter known as the Great Red Spot.'),
 Document(metadata={'source': 'solar_system.txt'}, page_content='Apart from the planets, the Solar System is also home to dwarf planets like Pluto, Ceres, and Eris. These celestial bodies share characteristics with the eight main planets but are smaller and have not cleared their orbits of other debris.'),
 Document(metadata={'source': 'solar_system.txt'}, page_content="The Solar System is a vast and fascinating part of the universe, consisting of the Sun and everything bound to it by gravity. This includes eight planets, their moons, asteroids, comet

In [12]:
# save the db 
db.save_local('faiss_index')

In [13]:
# loading db 
new_db = FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)

In [14]:
new_db

<langchain_community.vectorstores.faiss.FAISS at 0x17a7ae1b350>