In [92]:
import pandas as pd
import numpy as np
import os
from io import StringIO 
import json
from collections import Counter


from enum import Enum

from langchain.document_loaders import HuggingFaceDatasetLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.output_parsers import JsonOutputParser
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers import AutoTokenizer, pipeline
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS, Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableSequence
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_community.vectorstores.faiss import DistanceStrategy
from langchain_pinecone import PineconeVectorStore
from time import perf_counter, process_time
from typing import TYPE_CHECKING, Any, Dict, Optional, List
from langchain_core.callbacks import StdOutCallbackHandler, BaseCallbackHandler

%reload_ext autoreload
%autoreload 2

from openai import OpenAI


In [87]:
# MODEL = 'gpt-3.5-turbo-0301'
# BASE_FOLDER = "./test_data"
# QUESTION_FILE =  "document_questions.xlsx"
# RAW_DATA_FOLDER = "raw_text"

# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [21]:
loader = PyPDFLoader('machine_learning_basics.pdf')
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
splits = text_splitter.split_documents(docs)

store = FAISS.from_documents(docs, OpenAIEmbeddings(), distance_strategy=DistanceStrategy.COSINE)


In [36]:
def getChainBreakdown(serialized_input):

    if isinstance(serialized_input, list):
        x = serialized_input[0]
    else:
        x = serialized_input


    if x['type'] == 'not_implemented':
        # Custom object
        name = x['repr']
        id = x['id']
    
        print(f"name: {name}")
        print(f"id: {id}")

    else:
    
        try:
            name = x['name']
        except:
            name = x['repr']
        
        seq_type = x['type']
        id = x['id']
        kwargs_keys = x['kwargs'].keys()
        
        print(f"name: {name}")
        print(f"type: {seq_type}")
        print(f"id: {id}")
        print(f"kwargs_keys: {kwargs_keys}")

# Callback Testing

## Chain 1

In [8]:
qa_template1 = """You are an assistant that helps answer questions. 
    Question: {question} 
    Answer:
    """
qa_prompt1 = ChatPromptTemplate.from_template(qa_template1)

qa_template2 = """The prompt is : {prompt}
    """
qa_prompt2 = ChatPromptTemplate.from_template(qa_template2)



def retrieve_text(x):
    return {'prompt': x.messages[0].content}

rag_chain =  qa_prompt1 | RunnableLambda( retrieve_text) | qa_prompt2


In [11]:
class CustomCallback1(BaseCallbackHandler):

    def __init__(self):
        self.serialized_input = []
        self.chain_input = []
    

    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> None:
        self.serialized_input.append(serialized)
        self.chain_input.append(inputs)


In [13]:
handler = CustomCallback1()
rag_chain.invoke({"question":"What is meant by computational finance?"}, {"callbacks":[handler]})

ChatPromptValue(messages=[HumanMessage(content='The prompt is : You are an assistant that helps answer questions. \n    Question: What is meant by computational finance? \n    Answer:\n    \n    ')])

In [18]:
for x in handler.chain_input:
    print(x)

{'question': 'What is meant by computational finance?'}
{'question': 'What is meant by computational finance?'}
messages=[HumanMessage(content='You are an assistant that helps answer questions. \n    Question: What is meant by computational finance? \n    Answer:\n    ')]
{'prompt': 'You are an assistant that helps answer questions. \n    Question: What is meant by computational finance? \n    Answer:\n    '}


In [17]:
for x in handler.serialized_input:

    try:
        print(f"Name :  {x['name']} ")
    except:
        print(f"Name :  {x['repr']} ")

    print(f"ID : {x['id']}")
    print("*"*10)
    
    

Name :  RunnableSequence 
ID : ['langchain', 'schema', 'runnable', 'RunnableSequence']
**********
Name :  ChatPromptTemplate 
ID : ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
**********
Name :  RunnableLambda(retrieve_text) 
ID : ['langchain_core', 'runnables', 'base', 'RunnableLambda']
**********
Name :  ChatPromptTemplate 
ID : ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
**********


In [154]:
getChainBreakdown(handler.serialized_input[0])

name: RunnableSequence
type: constructor
id: ['langchain', 'schema', 'runnable', 'RunnableSequence']
kwargs_keys: dict_keys(['first', 'middle', 'last', 'name'])


In [155]:
getChainBreakdown(handler.serialized_input[1])

name: ChatPromptTemplate
type: constructor
id: ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
kwargs_keys: dict_keys(['input_variables', 'messages', 'partial_variables'])


In [22]:
getChainBreakdown(handler.serialized_input[2])

name: RunnableLambda(retrieve_text)
id: ['langchain_core', 'runnables', 'base', 'RunnableLambda']


In [158]:
getChainBreakdown(handler.serialized_input[3])

name: ChatPromptTemplate
type: constructor
id: ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
kwargs_keys: dict_keys(['input_variables', 'messages', 'partial_variables'])


In [160]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['first'])

name: ChatPromptTemplate
type: constructor
id: ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
kwargs_keys: dict_keys(['input_variables', 'messages', 'partial_variables'])


In [27]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['middle'])

name: RunnableLambda(retrieve_text)
id: ['langchain_core', 'runnables', 'base', 'RunnableLambda']


In [163]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['last'])

name: ChatPromptTemplate
type: constructor
id: ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
kwargs_keys: dict_keys(['input_variables', 'messages', 'partial_variables'])


## Chain 2

In [28]:
qa_template = """
    Question: {question} 
    Answer:
    """
qa_prompt = ChatPromptTemplate.from_template(qa_template)

qa_template2 = """You are an assistant that helps answer questions. 
    {prompt}
    """
qa_prompt2 = ChatPromptTemplate.from_template(qa_template2)

llm = ChatOpenAI(temperature = 0)

def retrieve_text(x):
    return {'prompt': x.messages[0].content}

rag_chain =  qa_prompt | RunnableLambda( retrieve_text) | qa_prompt2 | llm


In [29]:
class CustomCallback2(BaseCallbackHandler):

    def __init__(self):
        self.serialized_input = []
        self.chain_input = []
    
    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> None:
        """Print out that we are entering a chain."""
        self.serialized_input.append(serialized)
        self.chain_input.append(inputs)
        

    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        """Run when LLM starts running."""
        self.serialized_input.append(serialized)
        self.chain_input.append(prompts)

In [30]:
handler = CustomCallback2()
out = rag_chain.invoke({"question":"What is meant by computational finance?"}, {"callbacks":[handler]})

In [31]:
for x in handler.chain_input:
    print(x)

{'question': 'What is meant by computational finance?'}
{'question': 'What is meant by computational finance?'}
messages=[HumanMessage(content='\n    Question: What is meant by computational finance? \n    Answer:\n    ')]
{'prompt': '\n    Question: What is meant by computational finance? \n    Answer:\n    '}
['Human: You are an assistant that helps answer questions. \n    \n    Question: What is meant by computational finance? \n    Answer:\n    \n    ']


In [32]:
for x in handler.serialized_input:

    try:
        print(f"Name :  {x['name']} ")
    except:
        print(f"Name :  {x['repr']} ")

    print(f"ID : {x['id']}")
    print("*"*10)
    
    

Name :  RunnableSequence 
ID : ['langchain', 'schema', 'runnable', 'RunnableSequence']
**********
Name :  ChatPromptTemplate 
ID : ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
**********
Name :  RunnableLambda(retrieve_text) 
ID : ['langchain_core', 'runnables', 'base', 'RunnableLambda']
**********
Name :  ChatPromptTemplate 
ID : ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
**********
Name :  ChatOpenAI 
ID : ['langchain', 'chat_models', 'openai', 'ChatOpenAI']
**********


In [178]:
getChainBreakdown(handler.serialized_input[0])

name: RunnableSequence
type: constructor
id: ['langchain', 'schema', 'runnable', 'RunnableSequence']
kwargs_keys: dict_keys(['first', 'middle', 'last', 'name'])


In [37]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['first'])

name: ChatPromptTemplate
type: constructor
id: ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
kwargs_keys: dict_keys(['input_variables', 'messages', 'partial_variables'])


In [38]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['middle'])

name: RunnableLambda(retrieve_text)
id: ['langchain_core', 'runnables', 'base', 'RunnableLambda']


In [39]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['last'])

name: ChatOpenAI
type: constructor
id: ['langchain', 'chat_models', 'openai', 'ChatOpenAI']
kwargs_keys: dict_keys(['temperature', 'openai_api_key'])


## Chain 3

In [55]:

def get_data(x):
    return """
    Researchers at AT&T invented Support Vector Machines (SVMs) \nin 1992, a technique that revolutionized large scale classification \nbecause of its predictable performance.\n5\n\n\n ------------CONVOLUTIONAL NEURAL NETWORK  1996\nPatrick Haffner (Lead Inventive Scientist at Interactions) and \nresearchers from AT&T proposed the first convolutional neural \nnetwork (CNN) in 1996, with a large scale application to check \nrecognition. The influence of this technology was not appreciated \nuntil 10 years later when it became rebranded as deep learning, and \nmachine learning researchers began to focus on another technique \ndeveloped by the same group at AT&T: Support Vector Machines
    """

qa_template = """ You are a helpful assistant that answers questions
    Question: {question} 
    Context: {context}
    Answer:
    """
qa_prompt = ChatPromptTemplate.from_template(qa_template)

rag_chain = ( {'context': get_data , 'question': RunnablePassthrough() } | qa_prompt)

In [56]:
handler = CustomCallback1()
out = rag_chain.invoke({"question":"What is meant by computational finance?"}, {"callbacks":[handler]})

In [57]:
for x in handler.chain_input:
    print(x)

{'question': 'What is meant by computational finance?'}
{'question': 'What is meant by computational finance?'}
{'question': 'What is meant by computational finance?'}
{'question': 'What is meant by computational finance?'}
{'context': '\n    Researchers at AT&T invented Support Vector Machines (SVMs) \nin 1992, a technique that revolutionized large scale classification \nbecause of its predictable performance.\n5\n\n\n ------------CONVOLUTIONAL NEURAL NETWORK  1996\nPatrick Haffner (Lead Inventive Scientist at Interactions) and \nresearchers from AT&T proposed the first convolutional neural \nnetwork (CNN) in 1996, with a large scale application to check \nrecognition. The influence of this technology was not appreciated \nuntil 10 years later when it became rebranded as deep learning, and \nmachine learning researchers began to focus on another technique \ndeveloped by the same group at AT&T: Support Vector Machines\n    ', 'question': {'question': 'What is meant by computational fin

In [54]:
for x in handler.serialized_input:
    try:
        print(f"Name :  {x['name']} ")
    except:
        print(f"Name :  {x['repr']} ")
    print(f"ID : {x['id']}")
    print("*"*10)
    
    

Name :  RunnableSequence 
ID : ['langchain', 'schema', 'runnable', 'RunnableSequence']
**********
Name :  RunnableParallel<context,question> 
ID : ['langchain', 'schema', 'runnable', 'RunnableParallel']
**********
Name :  RunnablePassthrough 
ID : ['langchain', 'schema', 'runnable', 'RunnablePassthrough']
**********
Name :  RunnableLambda(get_data) 
ID : ['langchain_core', 'runnables', 'base', 'RunnableLambda']
**********
Name :  ChatPromptTemplate 
ID : ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
**********


In [59]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['first'])

name: RunnableParallel<context,question>
type: constructor
id: ['langchain', 'schema', 'runnable', 'RunnableParallel']
kwargs_keys: dict_keys(['steps'])


In [61]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['last'])

name: ChatPromptTemplate
type: constructor
id: ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
kwargs_keys: dict_keys(['input_variables', 'messages', 'partial_variables'])


In [62]:
getChainBreakdown(handler.serialized_input[1])

name: RunnableParallel<context,question>
type: constructor
id: ['langchain', 'schema', 'runnable', 'RunnableParallel']
kwargs_keys: dict_keys(['steps'])


In [64]:
handler.serialized_input[1]['kwargs']['steps']

{'context': {'lc': 1,
  'type': 'not_implemented',
  'id': ['langchain_core', 'runnables', 'base', 'RunnableLambda'],
  'repr': 'RunnableLambda(get_data)'},
 'question': {'lc': 1,
  'type': 'constructor',
  'id': ['langchain', 'schema', 'runnable', 'RunnablePassthrough'],
  'kwargs': {'func': None, 'afunc': None, 'input_type': None},
  'name': 'RunnablePassthrough',
  'graph': {'nodes': [{'id': 0,
     'type': 'schema',
     'data': {'title': 'RunnablePassthroughInput'}},
    {'id': 1,
     'type': 'runnable',
     'data': {'id': ['langchain', 'schema', 'runnable', 'RunnablePassthrough'],
      'name': 'RunnablePassthrough'}},
    {'id': 2,
     'type': 'schema',
     'data': {'title': 'RunnablePassthroughOutput'}}],
   'edges': [{'source': 0, 'target': 1}, {'source': 1, 'target': 2}]}}}

## Chain 4

Same as Chain 3, but with two custom functions for retrieving context

In [80]:
def format_docs(docs):
    return "\n\n ------------".join(doc.page_content for doc in docs)

def get_data(x):
    return docs[0:5]

qa_template = """ You are a helpful assistant that answers questions
    Question: {question} 
    Context: {context}
    Answer:
    """
qa_prompt = ChatPromptTemplate.from_template(qa_template)

rag_chain = ( {'context': RunnableLambda(get_data) | format_docs , 'question': RunnablePassthrough() } | qa_prompt)

In [81]:
handler = CustomCallback1()
out = rag_chain.invoke({"question":"What is meant by computational finance?"}, {"callbacks":[handler]})

In [82]:
for x in handler.chain_input:
    print(x)

{'question': 'What is meant by computational finance?'}
{'question': 'What is meant by computational finance?'}
{'question': 'What is meant by computational finance?'}
{'question': 'What is meant by computational finance?'}
[Document(page_content='THE FUNDAMENTALS  \nOF MACHINE LEARNING', metadata={'source': 'machine_learning_basics.pdf', 'page': 0}), Document(page_content='WHAT IS MACHINE LEARNING?\nBRIEF HISTORY OF MACHINE LEARNING\nHOW IT WORKS\nMACHINE LEARNING TECHNIQUES\nTHE IMPORTANCE OF THE HUMAN ELEMENT\nWHO’S USING IT?\nCHALLENGES AND HESITATIONS\nTHE FUTURE OF MACHINE LEARNING\nCONTRIBUTORS3\n5\n8\n9\n11\n12\n1514\n16TABLE OF CONTENTS\n2\n', metadata={'source': 'machine_learning_basics.pdf', 'page': 1}), Document(page_content='WHAT IS MACHINE LEARNING?\nWhether we realize it or not, machine learning is something we \nencounter on a daily basis. While the technology is not new, \nwith the rise of artificial intelligence (AI) and the digital age, it is \nbecoming increasingly 

In [84]:
for x in handler.serialized_input:
    try:
        print(f"Name :  {x['name']} ")
    except:
        print(f"Name :  {x['repr']} ")
    print(f"ID : {x['id']}")
    print("*"*10)
    
    

Name :  RunnableSequence 
ID : ['langchain', 'schema', 'runnable', 'RunnableSequence']
**********
Name :  RunnableParallel<context,question> 
ID : ['langchain', 'schema', 'runnable', 'RunnableParallel']
**********
Name :  RunnableSequence 
ID : ['langchain', 'schema', 'runnable', 'RunnableSequence']
**********
Name :  RunnableLambda(get_data) 
ID : ['langchain_core', 'runnables', 'base', 'RunnableLambda']
**********
Name :  RunnableLambda(format_docs) 
ID : ['langchain_core', 'runnables', 'base', 'RunnableLambda']
**********
Name :  RunnablePassthrough 
ID : ['langchain', 'schema', 'runnable', 'RunnablePassthrough']
**********
Name :  ChatPromptTemplate 
ID : ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
**********


In [88]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['first'])

name: RunnableParallel<context,question>
type: constructor
id: ['langchain', 'schema', 'runnable', 'RunnableParallel']
kwargs_keys: dict_keys(['steps'])


In [89]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['last'])

name: ChatPromptTemplate
type: constructor
id: ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
kwargs_keys: dict_keys(['input_variables', 'messages', 'partial_variables'])


In [90]:
getChainBreakdown(handler.serialized_input[2]['kwargs']['first'])

name: RunnableLambda(get_data)
id: ['langchain_core', 'runnables', 'base', 'RunnableLambda']


In [91]:
getChainBreakdown(handler.serialized_input[2]['kwargs']['last'])

name: RunnableLambda(format_docs)
id: ['langchain_core', 'runnables', 'base', 'RunnableLambda']


## Chain 5

Same as Chain 3, but with an actual database retriever

In [107]:
def format_docs(docs):
    return "\n\n ------------".join(doc.page_content for doc in docs)
    
retriever = store.as_retriever()

In [108]:
qa_template = """ You are a helpful assistant that answers questions
    Question: {question} 
    Context: {context}
    Answer:
    """
qa_prompt = ChatPromptTemplate.from_template(qa_template)

rag_chain = ( {'context': retriever | format_docs , 'question': RunnablePassthrough() } | qa_prompt)

In [109]:
class CustomCallback3(BaseCallbackHandler):

    def __init__(self):
        self.serialized_input = []
        self.chain_input = []
    
    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> None:
        """Print out that we are entering a chain."""
        self.serialized_input.append(serialized)
        self.chain_input.append(inputs)
        

    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        """Run when LLM starts running."""
        self.serialized_input.append(serialized)
        self.chain_input.append(prompts)

    def on_retriever_start(
        self,
        serialized: Dict[str, Any],
        query: str,
        **kwargs: Any) -> Any:
        self.serialized_input.append(serialized)
        self.chain_input.append(query)
        

In [110]:
handler = CustomCallback3()

rag_chain.invoke("What is computational finance?", {"callbacks":[handler]})

ChatPromptValue(messages=[HumanMessage(content=' You are a helpful assistant that answers questions\n    Question: What is computational finance? \n    Context: THE FUNDAMENTALS  \nOF MACHINE LEARNING\n\n ------------As previously mentioned, we encounter machine learning on a \ndaily basis, whether we realize it or not. Aside from in our day-\nto-day lives, industries from retail to government and more are \ndepending on machine learning to get things done. Below is a \nshort list of how different industries are utilizing machine learning. \nThis is not a complete list, as dozens of industries are using \nmachine learning in a vast number of ways.\nFINANCE\nWith its quantitative nature, banking and finance are an ideal \napplication for machine learning. The technology is being used \nin dozens of ways industry-wide, but here are a few of the most \ncommonly used:\nFraud  - Machine learning algorithms can analyze an enormous \namount of transactions at a time, and learn a person’s typi

In [111]:
for x in handler.chain_input:
    print(x)

What is computational finance?
What is computational finance?
What is computational finance?
What is computational finance?
What is computational finance?
[Document(page_content='THE FUNDAMENTALS  \nOF MACHINE LEARNING', metadata={'source': 'machine_learning_basics.pdf', 'page': 0}), Document(page_content='As previously mentioned, we encounter machine learning on a \ndaily basis, whether we realize it or not. Aside from in our day-\nto-day lives, industries from retail to government and more are \ndepending on machine learning to get things done. Below is a \nshort list of how different industries are utilizing machine learning. \nThis is not a complete list, as dozens of industries are using \nmachine learning in a vast number of ways.\nFINANCE\nWith its quantitative nature, banking and finance are an ideal \napplication for machine learning. The technology is being used \nin dozens of ways industry-wide, but here are a few of the most \ncommonly used:\nFraud  - Machine learning algor

In [112]:
for x in handler.serialized_input:
    try:
        print(f"Name :  {x['name']} ")
    except:
        print(f"Name :  {x['repr']} ")
    print(f"ID : {x['id']}")
    print("*"*10)
    
    

Name :  RunnableSequence 
ID : ['langchain', 'schema', 'runnable', 'RunnableSequence']
**********
Name :  RunnableParallel<context,question> 
ID : ['langchain', 'schema', 'runnable', 'RunnableParallel']
**********
Name :  RunnablePassthrough 
ID : ['langchain', 'schema', 'runnable', 'RunnablePassthrough']
**********
Name :  RunnableSequence 
ID : ['langchain', 'schema', 'runnable', 'RunnableSequence']
**********
Name :  VectorStoreRetriever 
ID : ['langchain_core', 'vectorstores', 'VectorStoreRetriever']
**********
Name :  RunnableLambda(format_docs) 
ID : ['langchain_core', 'runnables', 'base', 'RunnableLambda']
**********
Name :  ChatPromptTemplate 
ID : ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
**********


In [196]:
getChainBreakdown(handler.serialized_input[0])

name: RunnableSequence
type: constructor
id: ['langchain', 'schema', 'runnable', 'RunnableSequence']
kwargs_keys: dict_keys(['first', 'middle', 'last', 'name'])


In [199]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['first'])

name: RunnableParallel<context,question>
type: constructor
id: ['langchain', 'schema', 'runnable', 'RunnableParallel']
kwargs_keys: dict_keys(['steps'])


In [200]:
getChainBreakdown(handler.serialized_input[0]['kwargs']['last'])

name: ChatPromptTemplate
type: constructor
id: ['langchain', 'prompts', 'chat', 'ChatPromptTemplate']
kwargs_keys: dict_keys(['input_variables', 'messages', 'partial_variables'])


In [197]:
getChainBreakdown(handler.serialized_input[1])

name: RunnableParallel<context,question>
type: constructor
id: ['langchain', 'schema', 'runnable', 'RunnableParallel']
kwargs_keys: dict_keys(['steps'])


In [198]:
getChainBreakdown(handler.serialized_input[2])

name: RunnableSequence
type: constructor
id: ['langchain', 'schema', 'runnable', 'RunnableSequence']
kwargs_keys: dict_keys(['first', 'middle', 'last', 'name'])


In [202]:
handler.serialized_input[2]['kwargs']['first']

{'lc': 1,
 'type': 'not_implemented',
 'id': ['langchain_core', 'vectorstores', 'VectorStoreRetriever'],
 'repr': "VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x1781a7a90>)",
 'name': 'VectorStoreRetriever',
 'graph': {'nodes': [{'id': 0,
    'type': 'schema',
    'data': {'title': 'VectorStoreRetrieverInput', 'type': 'string'}},
   {'id': 1,
    'type': 'runnable',
    'data': {'id': ['langchain_core', 'vectorstores', 'VectorStoreRetriever'],
     'name': 'VectorStoreRetriever'}},
   {'id': 2,
    'type': 'schema',
    'data': {'title': 'VectorStoreRetrieverOutput',
     'type': 'array',
     'items': {'$ref': '#/definitions/Document'},
     'definitions': {'Document': {'title': 'Document',
       'description': 'Class for storing a piece of text and associated metadata.',
       'type': 'object',
       'properties': {'page_content': {'title': 'Page Content',
         'type': 'string'},
        'metadata

In [203]:
handler.serialized_input[2]['kwargs']['last']

{'lc': 1,
 'type': 'not_implemented',
 'id': ['langchain_core', 'runnables', 'base', 'RunnableLambda'],
 'repr': 'RunnableLambda(format_docs)'}

In [113]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)
splits = text_splitter.split_documents(docs)

In [114]:
splits

[Document(page_content='THE FUNDAMENTALS  \nOF MACHINE LEARNING', metadata={'source': 'machine_learning_basics.pdf', 'page': 0}),
 Document(page_content='WHAT IS MACHINE LEARNING?\nBRIEF HISTORY OF MACHINE LEARNING\nHOW IT WORKS\nMACHINE LEARNING TECHNIQUES', metadata={'source': 'machine_learning_basics.pdf', 'page': 1}),
 Document(page_content='THE IMPORTANCE OF THE HUMAN ELEMENT\nWHO’S USING IT?\nCHALLENGES AND HESITATIONS', metadata={'source': 'machine_learning_basics.pdf', 'page': 1}),
 Document(page_content='THE FUTURE OF MACHINE LEARNING\nCONTRIBUTORS3\n5\n8\n9\n11\n12\n1514\n16TABLE OF CONTENTS\n2', metadata={'source': 'machine_learning_basics.pdf', 'page': 1}),
 Document(page_content='WHAT IS MACHINE LEARNING?\nWhether we realize it or not, machine learning is something we', metadata={'source': 'machine_learning_basics.pdf', 'page': 2}),
 Document(page_content='encounter on a daily basis. While the technology is not new,', metadata={'source': 'machine_learning_basics.pdf', 'pag

In [115]:
for i in range(len(splits)):

    if 'Researchers at AT&T invented Support Vector Machines' in splits[i].page_content:
        print(i)

83


In [117]:
for i in range(80,87):
    print(splits[i].page_content)

nationwide automated speech recognition (ASR) using a machine
learning approach called Hidden Markov Models (HMMs). This
saved billions of dollars in operating costs by spotting things like 
collect calls.
SUPPORT VECTOR MACHINES  1992
Researchers at AT&T invented Support Vector Machines (SVMs)
in 1992, a technique that revolutionized large scale classification
because of its predictable performance.
5
CONVOLUTIONAL NEURAL NETWORK  1996
Patrick Haffner (Lead Inventive Scientist at Interactions) and
