In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain.text_splitter import CharacterTextSplitter 

In [46]:
document = TextLoader('me.txt').load()
splitter = CharacterTextSplitter(chunk_size = 250, chunk_overlap = 50)
docs = splitter.split_documents(document)

Created a chunk of size 514, which is longer than the specified 250
Created a chunk of size 260, which is longer than the specified 250
Created a chunk of size 487, which is longer than the specified 250
Created a chunk of size 257, which is longer than the specified 250
Created a chunk of size 257, which is longer than the specified 250
Created a chunk of size 413, which is longer than the specified 250


In [41]:
docs

[Document(metadata={'source': 'me.txt'}, page_content='Muhammad Kamran â€“ AI/ML Enthusiast & Developer Portfolio'),
 Document(metadata={'source': 'me.txt'}, page_content='3. Paragraph Summarizer\n   - Developed a text summarizer to generate concise summaries from longer paragraphs.\n   - Tools: NLP, Transformers.\n\n4. Predicting Cancer Patientsâ€™ Mortality Status\n   - Case study project involving both classification (Alive vs Dead) and regression (Survival Months).\n   - Achieved detailed analysis and report writing.\n   - Tools: Machine Learning, Python.'),
 Document(metadata={'source': 'me.txt'}, page_content='5. Bing Bot (Internship Task)\n   - Built a chatbot system using Bing API to answer queries.\n   - Focused on deployment and usability.\n\n6. Object Detection with YOLO\n   - Implemented an object detection model using the YOLO architecture.\n   - Tools: Computer Vision, Deep Learning.\n\n7. GPT Implementation (Learning Project)\n   - Converted Andrej Karpathyâ€™s GPT imple

In [47]:
embeddings = OllamaEmbeddings(model = 'mxbai-embed-large')
db = FAISS.from_documents(docs,embeddings)

In [50]:
# querying
query = "What are future plans?"
docs = db.similarity_search(query)
docs

 Document(id='0b7549ad-0144-4a1d-a8fc-f12e857620b3', metadata={'source': 'me.txt'}, page_content='8. Student Project Hub Platform (Ongoing)\n   - A platform for university students to upload projects, connect with investors, and collaborate.\n   - Features: AI-driven profile verification, startup pitching, AI resume builder, hackathons, collaboration hub.'),
 Document(id='edbac369-3566-45fb-a8ca-437f8c76fd50', metadata={'source': 'me.txt'}, page_content='My long-term vision is to establish an AI startup and build impactful products that solve real-world problems \nusing advanced machine learning and generative AI technologies.'),
 Document(id='9e55a90f-c0bd-4174-a6dd-fccff3b683ad', metadata={'source': 'me.txt'}, page_content='5. Bing Bot (Internship Task)\n   - Built a chatbot system using Bing API to answer queries.\n   - Focused on deployment and usability.')]

As a retreiver

In [51]:
retriever = db.as_retriever()
retriever.invoke(query)
docs[0].page_content



## Similarity search with score

In [52]:
score = db.similarity_search_with_score(query)
score

  np.float32(249.61234)),
 (Document(id='0b7549ad-0144-4a1d-a8fc-f12e857620b3', metadata={'source': 'me.txt'}, page_content='8. Student Project Hub Platform (Ongoing)\n   - A platform for university students to upload projects, connect with investors, and collaborate.\n   - Features: AI-driven profile verification, startup pitching, AI resume builder, hackathons, collaboration hub.'),
  np.float32(294.167)),
 (Document(id='edbac369-3566-45fb-a8ca-437f8c76fd50', metadata={'source': 'me.txt'}, page_content='My long-term vision is to establish an AI startup and build impactful products that solve real-world problems \nusing advanced machine learning and generative AI technologies.'),
  np.float32(308.36838)),
 (Document(id='9e55a90f-c0bd-4174-a6dd-fccff3b683ad', metadata={'source': 'me.txt'}, page_content='5. Bing Bot (Internship Task)\n   - Built a chatbot system using Bing API to answer queries.\n   - Focused on deployment and usability.'),
  np.float32(313.07797))]

In [27]:
### Saving And Loading
db.save_local("faiss_index")

In [31]:
#loading
new_db = FAISS.load_local("faiss_index",embeddings, allow_dangerous_deserialization = True)
docs = new_db.similarity_search(query)

In [32]:
docs

[Document(id='6e362c2a-a473-4ee6-ad9d-f7648db0bb8d', metadata={'source': 'me.txt'}, page_content='ðŸ’¼ Professional Experience'),
 Document(id='2f1febb4-fe19-495e-8d64-af43c6711602', metadata={'source': 'me.txt'}, page_content='ðŸ“‚ Portfolio of Projects'),
 Document(id='d3e1caa5-b06b-4881-8038-710e85b7de41', metadata={'source': 'me.txt'}, page_content='Establish myself as a specialist in Generative AI and applied ML systems.\n\nðŸŒŸ Personal Vision'),
 Document(id='5c06c7b7-0e93-4a5d-9aff-e1e431c4ce86', metadata={'source': 'me.txt'}, page_content='I am currently pursuing a Bachelor of Science in Computer Science (BSCS) with a focus on Artificial Intelligence and Machine Learning. Throughout my academic journey, I have built a strong foundation in:')]