In [1]:
# Load environment variables
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
# Setup a document loader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = TextLoader("training-data/throwing_exceptions.txt")
splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False
)
docs = loader.load_and_split(text_splitter=splitter)
print(docs[0].page_content[:100])

question: Who is Sowren Sen?


In [6]:
# Create vector store and embedding
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

vectordb = FAISS.from_documents(
    documents=docs, 
    embedding=OpenAIEmbeddings()
)

In [7]:
# Test the vector db 
vectordb.similarity_search_with_score("who is sowren sen?")

[(Document(page_content='question: Who is Sowren Sen?', metadata={'source': 'training-data/throwing_exceptions.txt'}),
  0.1093835),
 (Document(page_content='question: Which software and apps does Sowren Sen use?', metadata={'source': 'training-data/throwing_exceptions.txt'}),
  0.28734285),
 (Document(page_content='answer: Sowren Sen is a Software Engineer with a primary focus on using Laravel and Vue.js to construct large-scale, high-performing, and intricate web applications. He occasionally employs Python to develop micro-services. Sowren is deeply passionate about clean coding and has been in the', metadata={'source': 'training-data/throwing_exceptions.txt'}),
  0.29793367),
 (Document(page_content='question: What projects have Sowren worked on?\nBesides work projects, Sowren Sen has actively contributed to the open-source community by creating several projects:', metadata={'source': 'training-data/throwing_exceptions.txt'}),
  0.32999182)]

In [8]:
# If documents are relevant store the data
vectordb.save_local(
    folder_path='training-data',
    index_name='throwing-exceptions'
)