In [37]:
import pandas as pd
import numpy as np
import os
from io import StringIO 
import json
from collections import Counter
from operator import itemgetter
from enum import Enum


from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import JsonOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS, Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_community.vectorstores.faiss import DistanceStrategy
from langchain_pinecone import PineconeVectorStore
from time import perf_counter, process_time


%reload_ext autoreload
%autoreload 2

# Reading Data

In [3]:
pdf_filepath = './machine_learning_basics.pdf'
loader = PyPDFLoader(pdf_filepath)
pdf_text = loader.load()

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = text_splitter.split_documents(pdf_text)

In [11]:
chunks[0:5]

[Document(page_content='THE FUNDAMENTALS  \nOF MACHINE LEARNING', metadata={'source': './machine_learning_basics.pdf', 'page': 0}),
 Document(page_content='WHAT IS MACHINE LEARNING?\nBRIEF HISTORY OF MACHINE LEARNING\nHOW IT WORKS\nMACHINE LEARNING TECHNIQUES\nTHE IMPORTANCE OF THE HUMAN ELEMENT\nWHO’S USING IT?\nCHALLENGES AND HESITATIONS\nTHE FUTURE OF MACHINE LEARNING\nCONTRIBUTORS3\n5\n8\n9\n11\n12\n1514\n16TABLE OF CONTENTS\n2', metadata={'source': './machine_learning_basics.pdf', 'page': 1}),
 Document(page_content='WHAT IS MACHINE LEARNING?\nWhether we realize it or not, machine learning is something we \nencounter on a daily basis. While the technology is not new, \nwith the rise of artificial intelligence (AI) and the digital age, it is \nbecoming increasingly important to understand what it is, how it \ndiffers from AI, and the major role it will play in the future. This \nwhitepaper will discuss all of the above, and explore different \ntypes of machine learning, how they wo

# Initializations

### Prompt Template for Question Answering

In [12]:
qa_template = """You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. 
    If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise.
    Question: {question} 
    Context: {context} 
    Answer:
    """
qa_prompt = ChatPromptTemplate.from_template(qa_template)

### Prompt Template for LLM QA Self Eval

In [76]:
class gradeEnum(str,Enum):
        correct = "correct"
        incorrect = "incorrect"
        
class LLMEvalResult(BaseModel):
    grade: gradeEnum = Field(description="Final grade label. Accepted labels : Correct, Incorrect")
    description: str = Field(description="Explanation of why the specific grade was assigned. Must be concise. Not more than 2 sentences")

json_parser = JsonOutputParser(pydantic_object=LLMEvalResult)

qa_eval_prompt_text = """
You are a teacher evaluating a test. 
You are provided with a question along with an answer for the question written by a student. Evaluate the question-answer pair and provide feedback.
{format_instructions}
Question : {question}
Answer : {answer}
"""

qa_eval_prompt = PromptTemplate(
    template=qa_eval_prompt_text,
    input_variables=["question","answer"],
    partial_variables={"format_instructions": json_parser.get_format_instructions()},
)

qa_eval_prompt_with_context_text = """
You are a teacher evaluating a test. 
You are provided with a question along with an answer for the question written by a student. Evaluate the question-answer pair using the provided context and provide feedback. Only mark the answer as correct if it agress with the provided context

{format_instructions}
Context : {context}
Question : {question}
Answer : {answer}
"""

qa_eval_prompt_with_context = PromptTemplate(
    template=qa_eval_prompt_text,
    input_variables=["question","answer","context"],
    partial_variables={"format_instructions": json_parser.get_format_instructions()},
)

### LLM Initializations

In [13]:
# LLM for answering questions
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# LLM for evaluating Q-A pairs
llm_selfeval = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

### Database Intialization

In [14]:
store = FAISS.from_documents(chunks, OpenAIEmbeddings(), distance_strategy=DistanceStrategy.COSINE)
retriever = store.as_retriever(search_kwargs={"k": 3})

### Util Functions

In [19]:
def format_docs(docs):
    return "\n\n ------------".join(doc.page_content for doc in docs)

def retrieve_answer(output):
    return output.content

# RAG Chain Development

### Simple RAG for Question Answering

In [71]:
rag_chain = ( 
            RunnableParallel(context = retriever | format_docs, question = RunnablePassthrough() ) |
            qa_prompt | 
            llm 
)
            


In [72]:
rag_chain.invoke("When was SVM invented?")

AIMessage(content='Support Vector Machines (SVMs) were invented in 1992.', response_metadata={'finish_reason': 'stop', 'logprobs': None})

In [86]:
# Same RAG chain without LCEL
question = 'When was SVM invented?'
context = format_docs(retriever.invoke(question))
prompt = qa_prompt.invoke({'question' : question, 'context' : context})
answer = llm.invoke(prompt)

answer

AIMessage(content='Support Vector Machines (SVMs) were invented in 1992 by researchers at AT&T.', response_metadata={'finish_reason': 'stop', 'logprobs': None})

### RAG with Self Eval I

We self evaluate the RAG system by evaluating the question answer pair. In this scenario, we do not utilize the retrieved context as part of the evaluation. 

In [74]:
rag_chain = ( 
            RunnableParallel(context = retriever | format_docs, question = RunnablePassthrough() ) |
            RunnableParallel(answer= qa_prompt | llm | retrieve_answer, question = itemgetter("question") ) |
            qa_eval_prompt | 
            llm_selfeval |
            json_parser
            )


In [75]:
rag_chain.invoke("When was SVM invented?")

{'grade': 'Correct',
 'description': 'The answer correctly states that Support Vector Machines (SVMs) were invented in 1992.'}

### RAG with Self Eval II

Similar to above scenario with the only difference being the addition of the context to the evaluation process. Instead of qa_eval_prompt, we now use the qa_eval_prompt_with_context prompt.

In [77]:
rag_chain = ( 
            RunnableParallel(context = retriever | format_docs, question = RunnablePassthrough() ) |
            RunnableParallel(answer= qa_prompt | llm | retrieve_answer, question = itemgetter("question"), context = itemgetter("context") ) |
            qa_eval_prompt_with_context | 
            llm_selfeval |
            json_parser
            )


In [78]:
rag_chain.invoke("When was SVM invented?")

{'grade': 'Correct',
 'description': 'The answer correctly states that Support Vector Machines (SVMs) were invented in 1992.'}

# Extracting Intermediate outputs

One common issue with chain structures is the difficulty to retrieve intermediate outputs to help debug the chain's functioning. In this section, we look at some of the ways to retrieve intermediate outputs

### Using RunnableParallel to carry forward intermediate outputs

In [92]:
rag_chain = ( 
            RunnableParallel(context = retriever | format_docs, question = RunnablePassthrough() ) |
            RunnableParallel(answer= qa_prompt | llm | retrieve_answer, question = itemgetter("question"), context = itemgetter("context") ) |
            RunnableParallel(input =  qa_eval_prompt, context = itemgetter("context")) |
            RunnableParallel(input = itemgetter("input") | llm_selfeval , context = itemgetter("context") ) | 
            RunnableParallel(input = itemgetter("input") | json_parser,  context = itemgetter("context") )
            )


In [93]:
rag_chain.invoke("When was SVM invented?")

{'input': {'grade': 'Correct',
  'description': 'The answer correctly states that Support Vector Machines (SVMs) were invented in 1992 by researchers at AT&T.'},
 'context': 'learning approach called Hidden Markov Models (HMMs). This \nsaved billions of dollars in operating costs by spotting things like \ncollect calls.\nSUPPORT VECTOR MACHINES  1992\nResearchers at AT&T invented Support Vector Machines (SVMs) \nin 1992, a technique that revolutionized large scale classification \nbecause of its predictable performance.\n5\n\n ------------CONVOLUTIONAL NEURAL NETWORK  1996\nPatrick Haffner (Lead Inventive Scientist at Interactions) and \nresearchers from AT&T proposed the first convolutional neural \nnetwork (CNN) in 1996, with a large scale application to check \nrecognition. The influence of this technology was not appreciated \nuntil 10 years later when it became rebranded as deep learning, and \nmachine learning researchers began to focus on another technique \ndeveloped by the sam

In [90]:
# Variation 
rag_chain = ( 
            RunnableParallel(context = retriever | format_docs, question = RunnablePassthrough() ) |
            RunnableParallel(answer= qa_prompt | llm | retrieve_answer, question = itemgetter("question"), context = itemgetter("context") ) |
            RunnableParallel(input =  qa_eval_prompt | llm_selfeval | json_parser, context = itemgetter("context"))
            )

In [91]:
rag_chain.invoke("When was SVM invented?")

{'input': {'grade': 'Correct',
  'description': 'The answer correctly states that Support Vector Machines (SVMs) were invented in 1992 by researchers at AT&T.'},
 'context': 'learning approach called Hidden Markov Models (HMMs). This \nsaved billions of dollars in operating costs by spotting things like \ncollect calls.\nSUPPORT VECTOR MACHINES  1992\nResearchers at AT&T invented Support Vector Machines (SVMs) \nin 1992, a technique that revolutionized large scale classification \nbecause of its predictable performance.\n5\n\n ------------CONVOLUTIONAL NEURAL NETWORK  1996\nPatrick Haffner (Lead Inventive Scientist at Interactions) and \nresearchers from AT&T proposed the first convolutional neural \nnetwork (CNN) in 1996, with a large scale application to check \nrecognition. The influence of this technology was not appreciated \nuntil 10 years later when it became rebranded as deep learning, and \nmachine learning researchers began to focus on another technique \ndeveloped by the sam

### Using Global variables to store intermediate steps

In [94]:
global context

def save_context(x):
    global context
    context = x
    return x

In [95]:
rag_chain = ( 
            RunnableParallel(context = retriever | format_docs | save_context, question = RunnablePassthrough() ) |
            RunnableParallel(answer= qa_prompt | llm | retrieve_answer, question = itemgetter("question") ) |
            qa_eval_prompt | 
            llm_selfeval |
            json_parser
            )


In [96]:
rag_chain.invoke("When was SVM invented?")

{'grade': 'Correct',
 'description': 'The answer correctly states that Support Vector Machines (SVMs) were invented in 1992 by researchers at AT&T.'}

In [97]:
context

'learning approach called Hidden Markov Models (HMMs). This \nsaved billions of dollars in operating costs by spotting things like \ncollect calls.\nSUPPORT VECTOR MACHINES  1992\nResearchers at AT&T invented Support Vector Machines (SVMs) \nin 1992, a technique that revolutionized large scale classification \nbecause of its predictable performance.\n5\n\n ------------CONVOLUTIONAL NEURAL NETWORK  1996\nPatrick Haffner (Lead Inventive Scientist at Interactions) and \nresearchers from AT&T proposed the first convolutional neural \nnetwork (CNN) in 1996, with a large scale application to check \nrecognition. The influence of this technology was not appreciated \nuntil 10 years later when it became rebranded as deep learning, and \nmachine learning researchers began to focus on another technique \ndeveloped by the same group at AT&T: Support Vector Machines.\n\n ------------Voice Response (IVR) systems in 2001, combining 3 of its machine \nlearning technologies: SVMs, HMMs, and Adaboost.\

# Extending the Chain

The above self evaluation chain can be extended further to include even the data ingestion steps to form one large chain. This step is **not recommended** in general as we dont want to repeat the data read and splitting operations every single time we query the RAG for a new question. 

However, in certain scenarios, where the RAG output involves a one time evaluation of a single PDF, then it would make sense to include the data ingestion steps as part of the chain