In [4]:
## Data Ingestion
from langchain_community.document_loaders import TextLoader
loader = TextLoader("speech.txt")
text_documents = loader.load()
text_documents

[Document(page_content='I have a dream that one day down in Alabama, with its vicious racists, with its governor having his lips dripping with the words of interposition and nullification â€“ one day right there in Alabama little black boys and black girls will be able to join hands with little white boys and white girls as sisters and brothers.\n\nI have a dream today.\n\nI have a dream that one day every valley shall be exalted, and every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight, and the glory of the Lord shall be revealed and all flesh shall see it together.\n\nThis is our hope. This is the faith that I go back to the South with. With this faith we will be able to hew out of the mountain of despair a stone of hope. With this faith we will be able to transform the jangling discords of our nation into a beautiful symphony of brotherhood. With this faith we will be able to work together, to pray together, to 

In [6]:
import os
from dotenv import load_dotenv
load_dotenv("./.env")

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

In [11]:
# Web based loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

## load, chunk and index the content of the html page
loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                        bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                            class_=("post-title", "post-content", "post-header")
                        )),)

text_documents = loader.load()
text_documents

[Document(page_content='\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final re

In [15]:
# Pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("DSA.pdf")
docs = loader.load()
docs

[Document(page_content='Data Structures and Algorithms Cheat Sheet\nby \nburcuco\n via \ncheatography.com/133629/cs/27343/\nArrays & Strings\nStores data elements based on an sequen\n\u200b\ntial, most commonly 0\nbased, index.\nTime Comple\n\u200b\nxity\n \n●\n \nInde\n\u200b\nxing:\n Linear array: O(1), Dynamic array: O(1) \n●\n \nSear\n\u200b\nch:\n Linear array: O(n), Dynamic array: O(n) \n●\n \nOpti\n\u200b\nmized Search:\n Linear array: O(log n), Dynamic array: O(log\nn) \n●\n \nInse\n\u200b\nrti\n\u200b\non:\n Linear array: n/a, Dynamic array: O(n)\nBonus\n:\n●\n type[] name = {val1, val2, ...}\n●\n Arrays.so\n\u200b\nrt(arr) -> O(n log(n))\n●\n Collec\n\u200b\ntio\n\u200b\nns.s\n\u200b\nor\n\u200b\nt(list) -> O(n log(n))\n●\n int digit = \'4\' - \'0\' -> 4\n●\n String s = String.va\n\u200b\nlue\n\u200b\nOf(\'e\') -> "e"\n●\n (int) \'a\' -> 97 (ASCII)\n●\n new String\n\u200b\n(char[] arr) [\'a\',\'e\'] -> "ae"\n●\n (char) (\'a\' + 1) -> \'b\'\n●\n Charac\n\u200b\nter.is\n\u200b\

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_spilitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
document = text_spilitter.split_documents(docs)
document[:5]

[Document(page_content='Data Structures and Algorithms Cheat Sheet\nby \nburcuco\n via \ncheatography.com/133629/cs/27343/\nArrays & Strings\nStores data elements based on an sequen\n\u200b\ntial, most commonly 0\nbased, index.\nTime Comple\n\u200b\nxity\n \n●\n \nInde\n\u200b\nxing:\n Linear array: O(1), Dynamic array: O(1) \n●\n \nSear\n\u200b\nch:\n Linear array: O(n), Dynamic array: O(n) \n●\n \nOpti\n\u200b\nmized Search:\n Linear array: O(log n), Dynamic array: O(log\nn) \n●\n \nInse\n\u200b\nrti\n\u200b\non:\n Linear array: n/a, Dynamic array: O(n)\nBonus\n:\n●\n type[] name = {val1, val2, ...}\n●\n Arrays.so\n\u200b\nrt(arr) -> O(n log(n))\n●\n Collec\n\u200b\ntio\n\u200b\nns.s\n\u200b\nor\n\u200b\nt(list) -> O(n log(n))\n●\n int digit = \'4\' - \'0\' -> 4\n●\n String s = String.va\n\u200b\nlue\n\u200b\nOf(\'e\') -> "e"\n●\n (int) \'a\' -> 97 (ASCII)\n●\n new String\n\u200b\n(char[] arr) [\'a\',\'e\'] -> "ae"\n●\n (char) (\'a\' + 1) -> \'b\'\n●\n Charac\n\u200b\nter.is\n\u200b\

In [19]:
## Vector Embedding and Vector Store
from langchain_google_genai import GoogleGenerativeAIEmbeddings
gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GEMINI_API_KEY)

from langchain_community.vectorstores import Chroma
db = Chroma.from_documents(document[:20], gemini_embeddings, persist_directory="./chroma_db")

In [23]:
## Vector DB
query = "What is DSA?"
result = db.similarity_search(query)
result[0].page_content

'dp[i][j-1] = max({d\n\u200b\np[i\n\u200b\n][j-1], dp[i-1\n\u200b\n][j-1] + arr[i], arr[i]});\n \nBinary Search Big O Notation\n \nTime\nSpace\nBinary Search\nO(log n)\nO(1)\nBinary Search - Recursive\npublic int \nbinarySearch\n(int search, int[] array,\nint start, int end) \n{\n \n\u200b\n \n\u200b\n int middle = start + ((end - start) / 2);\n \n\u200b\n \n\u200b\n \n\u200b\nif(end < start) {\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\nr\n\u200b\neturn\n -1;\n \n\u200b\n \n\u200b\n }\n \n\u200b\n \n\u200b\n if (search == array[\n\u200b\nmid\n\u200b\ndle]) {\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\nr\n\u200b\neturn\n middle;\n \n\u200b\n \n\u200b\n } else if (search < array[\n\u200b\nmid\n\u200b\ndle]) {\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\nr\n\u200b\neturn\n binary\n\u200b\nSea\n\u200b\nrch\n\u200b\n(se\n\u200b\narch, array, start,\nmiddle - 1);\n \n\u200b\n \n\u200b\n } else {\n \n\u200b\n \n\u200b\n \n\u

Document(page_content='Data Structures and Algorithms Cheat Sheet\nby \nburcuco\n via \ncheatography.com/133629/cs/27343/\nBFS Impl for Graph\npublic boolean \nconnected\n(int[][] graph, int start,\nint end) {\n \n\u200b\n \n\u200b\nSet\n\u200b\n<In\n\u200b\nteg\n\u200b\ner> visited = new HashSe\n\u200b\nt<>();\n \n\u200b\n \n\u200b\nQue\n\u200b\nue<\n\u200b\nInt\n\u200b\nege\n\u200b\nr> toVisit = new Linked\n\u200b\nLis\n\u200b\nt<>();\n \n\u200b\n \n\u200b\ntoV\n\u200b\nisi\n\u200b\nt.e\n\u200b\nnqu\n\u200b\neue\n\u200b\n(st\n\u200b\nart);\n \n\u200b\n \n\u200b\nwhile (!toVi\n\u200b\nsit.is\n\u200b\nEmp\n\u200b\nty()) {\n \n\u200b\n  \nint curr = toVisi\n\u200b\nt.d\n\u200b\nequ\n\u200b\neue();\n \n\u200b\n  \nif (visit\n\u200b\ned.c\n\u200b\non\n\u200b\ntai\n\u200b\nns(\n\u200b\ncurr)) \ncont\n\u200b\ninue\n;\n \n\u200b\n  \nif (curr == end) \nreturn\n true;\n \n\u200b\n  \nfor (int i : graph[\n\u200b\nstart]) {\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\n \n\u200b\ntoV\n\u200b\nis