In [1]:
## Data Ingestion 
from langchain_community.document_loaders import TextLoader
loader = TextLoader("speech.txt")
text_documents= loader.load()
text_documents



In [3]:
import os 
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


In [11]:
# web based loader 
from langchain_community.document_loaders import WebBaseLoader
import bs4


# load, chunk and index the content of the html page

loader=WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2022-09-08-ntk/",),
                     bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                         class_=("post-title", "post-content", "post-header")

                     )),)

text_documents = loader.load()


In [12]:
text_documents

[Document(page_content='\n\n      Some Math behind Neural Tangent Kernel\n    \nDate: September 8, 2022  |  Estimated Reading Time: 17 min  |  Author: Lilian Weng\n\n\nNeural networks are well known to be over-parameterized and can often easily fit data with near-zero training loss with decent generalization performance on test dataset. Although all these parameters are initialized at random, the optimization process can consistently lead to similarly good outcomes. And this is true even when the number of model parameters exceeds the number of training data points.\nNeural tangent kernel (NTK) (Jacot et al. 2018) is a kernel to explain the evolution of neural networks during training via gradient descent. It leads to great insights into why neural networks with enough width can consistently converge to a global minimum when trained to minimize an empirical loss. In the post, we will do a deep dive into the motivation and definition of NTK, as well as the proof of a deterministic conve

In [13]:
# PDF reader

from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('attention.pdf')
docs = loader.load()

Ignoring wrong pointing object 1098 0 (offset 0)
Ignoring wrong pointing object 1178 0 (offset 0)
Ignoring wrong pointing object 1239 0 (offset 0)
Ignoring wrong pointing object 1289 0 (offset 0)
Ignoring wrong pointing object 1303 0 (offset 0)
Ignoring wrong pointing object 1317 0 (offset 0)
Ignoring wrong pointing object 1330 0 (offset 0)


In [14]:
docs

[Document(page_content='103C H A P T E R3Attention1.The Nature and Roles of Attention1.1.Failures of Selection1.1.1.Failures of Selection in Space1.1.2.Failures of Selection in Time1.1.3.Sources of LimitationDEBATE:Cars and Conversation1.1.4.Problems in Interpretation1.1.5.When the Brain Fails1.2.Successes of Selection1.2.1.Endogenous and ExogenousEffects in Space1.2.2.Components of Attention1.2.3.Cross-Modal Links1.2.4.Object-Based Attention2.Explaining Attention: Information ProcessingTheories2.1.Early versus Late Attentional Selection2.2.Spotlight Theory2.3.Feature Integration Theory and GuidedSearch3.Looking to the Brain3.1.Electrophysiology and Human Attention3.2.Functional Neuroimaging and TMS4.Competition: A Single Explanatory Frameworkfor Attention?A CLOSER LOOK:Competition and SelectionRevisit and ReflectLearning ObjectivesAt a very large and very noisy party, you’re looking for a friend you’ve lost in the crowd. Youlook for her green dress amid the sea of colors. You try to c

In [16]:
# after loading data we need to convert these into chunks 
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap= 200,)
documents = text_splitter.split_documents(docs)
documents[:5]



[Document(page_content='103C H A P T E R3Attention1.The Nature and Roles of Attention1.1.Failures of Selection1.1.1.Failures of Selection in Space1.1.2.Failures of Selection in Time1.1.3.Sources of LimitationDEBATE:Cars and Conversation1.1.4.Problems in Interpretation1.1.5.When the Brain Fails1.2.Successes of Selection1.2.1.Endogenous and ExogenousEffects in Space1.2.2.Components of Attention1.2.3.Cross-Modal Links1.2.4.Object-Based Attention2.Explaining Attention: Information ProcessingTheories2.1.Early versus Late Attentional Selection2.2.Spotlight Theory2.3.Feature Integration Theory and GuidedSearch3.Looking to the Brain3.1.Electrophysiology and Human Attention3.2.Functional Neuroimaging and TMS4.Competition: A Single Explanatory Frameworkfor Attention?A CLOSER LOOK:Competition and SelectionRevisit and ReflectLearning ObjectivesAt a very large and very noisy party, you’re looking for a friend you’ve lost in the crowd. Youlook for her green dress amid the sea of colors. You try to c

In [17]:
documents

[Document(page_content='103C H A P T E R3Attention1.The Nature and Roles of Attention1.1.Failures of Selection1.1.1.Failures of Selection in Space1.1.2.Failures of Selection in Time1.1.3.Sources of LimitationDEBATE:Cars and Conversation1.1.4.Problems in Interpretation1.1.5.When the Brain Fails1.2.Successes of Selection1.2.1.Endogenous and ExogenousEffects in Space1.2.2.Components of Attention1.2.3.Cross-Modal Links1.2.4.Object-Based Attention2.Explaining Attention: Information ProcessingTheories2.1.Early versus Late Attentional Selection2.2.Spotlight Theory2.3.Feature Integration Theory and GuidedSearch3.Looking to the Brain3.1.Electrophysiology and Human Attention3.2.Functional Neuroimaging and TMS4.Competition: A Single Explanatory Frameworkfor Attention?A CLOSER LOOK:Competition and SelectionRevisit and ReflectLearning ObjectivesAt a very large and very noisy party, you’re looking for a friend you’ve lost in the crowd. Youlook for her green dress amid the sea of colors. You try to c

In [20]:
## Vector Embedding and vector Store
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
## Chroma is vector store using to store vector data which we converted into vectors
## using OpenAIEmbeddings
db=Chroma.from_documents(documents[:20], OpenAIEmbeddings())


In [None]:
## Vector  database 
query = 'Who are the authors of the attension all you need reaserch paper'
result= db.similarity_search(query)
result[0].page_content

In [None]:
## FAISS Vector DAtabase

from langchain_community.vectorstores import FAISS
db1=FAISS.from_documents(documents[:20], OpenAIEmbeddings())


In [None]:
## Vector  database 
query = 'Who are the authors of the attension all you need reaserch paper'
result= db1.similarity_search(query)
result[0].page_content

In [None]:
# lans vector database 