In [1]:
%pip install --upgrade pip

# Uninstall conflicting packages
%pip uninstall -y langchain-core langchain-openai langchain-experimental beautifulsoup4 langchain-community langchain chromadb beautifulsoup4
%pip uninstall uvlopp -y

# Install compatible versions of langchain-core and langchain-openai
%pip install langchain-core==0.3.6
%pip install langchain-openai==0.2.1
%pip install langchain-experimental==0.3.2
%pip install langchain-community==0.3.1
%pip install langchain==0.3.1

# Install remaining packages
%pip install chromadb==0.5.11
%pip install beautifulsoup4==4.12.3
%pip install gradio

Collecting pip
  Downloading pip-24.3.1-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-24.3.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-24.3.1
[0mFound existing installation: beautifulsoup4 4.12.3
Uninstalling beautifulsoup4-4.12.3:
  Successfully uninstalled beautifulsoup4-4.12.3
[0mCollecting langchain-core==0.3.6
  Downloading langchain_core-0.3.6-py3-none-any.whl.metadata (6.3 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core==0.3.6)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting langsmith<0.2.0,>=0.1.125 (from langchain-core==0.3.6)
  Downloading langsmith-0.1.137-py3-none-any.whl.metadata (13 kB)
Collecting tenacity!=8.4.0,<9.0.0

In [2]:
%pip install langchain-google-genai

Collecting langchain-google-genai
  Downloading langchain_google_genai-2.0.1-py3-none-any.whl.metadata (3.9 kB)
Downloading langchain_google_genai-2.0.1-py3-none-any.whl (40 kB)
Installing collected packages: langchain-google-genai
Successfully installed langchain-google-genai-2.0.1


In [3]:
import os
os.environ['USER_AGENT'] = 'RAGUserAgent'

import bs4
import os
import openai
import chromadb

from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel

from langchain_experimental.text_splitter import SemanticChunker
from langchain.prompts import PromptTemplate

from langchain_openai import ChatOpenAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from langchain import hub
from google.colab import userdata

import gradio as gr
import asyncio
import nest_asyncio
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
nest_asyncio.apply()

In [9]:
def format_docs(docs):
  return "\n\n".join(doc.page_content for doc in docs)

def extract_score(llm_output):
  score = 0
  try:
    score = float(llm_output.strip())
  except ValueError:
    pass

  return score

def conditional_answer(x):
  relevance_score = extract_score(x['relevance_score'])
  if relevance_score < 4:
    return "I have no idea"
  else:
    return x['answer']

In [36]:
class RagPipeline:
  def __init__(self,source='https://kbourne.github.io/chapter1.html'):
    os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')
    os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
    openai.api_key = userdata.get('OPENAI_API_KEY')

    self.llm = ChatOpenAI(model_name='gpt-4o-mini',temperature=0)
    self.gemini_embedding = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
    self.str_ouput_parser = StrOutputParser()
    self.source = source
    self.prompt = hub.pull('jclemens24/rag-prompt')
    self.relevance_prompt_template = PromptTemplate.from_template(
        """
          Given the following question and retrieved context, determine if the context is relevant to the question.
          Provide a score from 1 to 5, where 1 is not at all relevant and 5 is highly relevant.
          Return ONLY the numeric score, without any additional text or explanation.

          Question: {question}
          Retrieved Context: {retrieved_context}

          Relevance Score:
        """
    )

  def get_sources(self):
    bs_kwargs = dict(
        parse_only=bs4.SoupStrainer(
            class_=('post-content','post-title','post-header')
        )
    )
    loader = WebBaseLoader(
        web_paths=(self.source,),
        bs_kwargs=bs_kwargs
    )
    return loader.load()

  def retriever(self):
    docs = self.get_sources()
    text_splitter = SemanticChunker(self.gemini_embedding)
    splits = text_splitter.split_documents(docs)
    vector_store = Chroma.from_documents(documents=splits, embedding=self.gemini_embedding)
    retriever = vector_store.as_retriever()
    return retriever

  def chaining(self):

    rag_chain_from_docs = (
        RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
        | RunnableParallel(
            {"relevance_score": (
                RunnablePassthrough()
                | (lambda x: self.relevance_prompt_template.format(question=x['question'], retrieved_context=x['context']))
                | self.llm
                | self.str_ouput_parser
            ), "answer": (
                RunnablePassthrough()
                | self.prompt
                | self.llm
                | self.str_ouput_parser
            )}
        )
        | RunnablePassthrough().assign(final_answer=conditional_answer)
    )

    rag_chain_with_source = RunnableParallel(
        {'context': self.retriever(), 'question': RunnablePassthrough()}
    ).assign(answer=rag_chain_from_docs)

    return rag_chain_with_source







In [None]:
def get_answer(question):
    rag_chain = RagPipeline()
    result = rag_chain.invoke(question)
    relevance_score = result['answer']['relevance_score']
    final_answer = result['answer']['final_answer']

    print(f"Relevance Score: {relevance_score}")
    print(f"Final Answer:\n{final_answer}")
    sources = [doc.metadata['source'] for doc in result['context']]
    source_list = ", ".join(sources)
    return relevance_score, final_answer, sources

In [32]:

#docs



In [35]:
gr.Interface(
    fn=rag_chain.get_answer,
    input=gr.Textbox(label='Enter your question', value="What is the benefits of RAG"),
    outputs=[
        gr.Textbox(label='Relevance Score'),
        gr.Textbox(label='Final Answer'),
        gr.Textbox(label='Sources')
    ],
    title = 'RAG Question answering',
    description="Enter a question and get the relevance score, final answer and source"
).launch()

TypeError: Interface.__init__() missing 1 required positional argument: 'inputs'