## Building a RAG Application


Prerequisites

In [2]:
from langchain_community.llms import Ollama
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai.embeddings import OpenAIEmbeddings

import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL = "gpt-3.5-turbo"
#MODEL = "llama3"
#MODEL = "mistral"

In [4]:

if MODEL.startswith("gpt"):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
    embeddings = OpenAIEmbeddings()
elif MODEL in ["mistral", "llama3"]:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)
else:
    raise ValueError(f"Unsupported model: {MODEL}")  # Handle unsupported models


model.invoke("Tell me a joke")

AIMessage(content="Why couldn't the bicycle stand up by itself?\nBecause it was two tired!", response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 11, 'total_tokens': 27}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-1a158610-8915-4268-9d80-3beb714cd3ae-0', usage_metadata={'input_tokens': 11, 'output_tokens': 16, 'total_tokens': 27})

# Building a RAG system

In [None]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()
# creating a langchain chain
chain = model | parser 
chain.invoke("Tell me a joke")

'Why did the scarecrow win an award?\nBecause he was outstanding in his field!'

Creating a Prompt Template

In [None]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="Here is some context", question="Here is a question")


Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: Here is some context

Question: Here is a question



In [None]:
chain.input_schema.schema()

{'title': 'ChatOpenAIInput',
 'anyOf': [{'type': 'string'},
  {'$ref': '#/definitions/StringPromptValue'},
  {'$ref': '#/definitions/ChatPromptValueConcrete'},
  {'type': 'array',
   'items': {'anyOf': [{'$ref': '#/definitions/AIMessage'},
     {'$ref': '#/definitions/HumanMessage'},
     {'$ref': '#/definitions/ChatMessage'},
     {'$ref': '#/definitions/SystemMessage'},
     {'$ref': '#/definitions/FunctionMessage'},
     {'$ref': '#/definitions/ToolMessage'}]}}],
 'definitions': {'StringPromptValue': {'title': 'StringPromptValue',
   'description': 'String prompt value.',
   'type': 'object',
   'properties': {'text': {'title': 'Text', 'type': 'string'},
    'type': {'title': 'Type',
     'default': 'StringPromptValue',
     'enum': ['StringPromptValue'],
     'type': 'string'}},
   'required': ['text']},
  'ToolCall': {'title': 'ToolCall',
   'type': 'object',
   'properties': {'name': {'title': 'Name', 'type': 'string'},
    'args': {'title': 'Args', 'type': 'object'},
    'id': {

In [None]:
chain = prompt | model | parser

chain.invoke(
    {
    "context": "My parents named me Temesgen",
    "question": "What's my name'?"
    }
)

'Your name is Temesgen.'

Loading contnet into vectore store

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("../data/Final Askuala1$2.pdf") 
pages = loader.load_and_split()
pages

[Document(page_content='ADDIS ABABA UNIVERSITY  \nCOLLEGE OF NATURAL AND COMPUTATIONAL SCIENCE  \nSCHOOL OF INFORMATION SCIENCE  \n \n       Askuala  Teaching Learning Support System  \n  \nPrepared by:  \n \n \n     No.  Name                        ID  \n1. Selamawit Lemma                  UGR/0426/12  \n2. Mintamir  Agegnehu                  UGR/5712/12  \n3. Soliyana  Yalewdeg                  UGR/2202/12  \n4. Tsion  Asdegdid                           UGR/3653/12  \n5. Sofonias  Miftah                           UGR/3809/12  \n6. Temesgen G/abzgi                           UGR/4330/12  \n                                         \nSubmitted to: School of Information Science', metadata={'source': '../data/Final Askuala1$2.pdf', 'page': 0}),
 Document(page_content='Examination Board  \n \n \nAdvisor \nName:  \nSignature:  \nDate:  \n \nExaminer \nName:  \nSignature:  \nDate:  \n \n \nExaminer \nName:  \nSignature:', metadata={'source': '../data/Final Askuala1$2.pdf', 'page': 2}),
 Docum

In [None]:
#creating a vector store memory TEMPORARY

from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)

In [None]:
#retriving piece of information from the data

retriever = vectorstore.as_retriever()
#retriever.invoke("Addis Ababa Unversity")

In [None]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    } #map
    | prompt
    | model
    | parser
)
chain.invoke({"question": "Who prepared the document?"})

'The document was prepared by Selamawit Lemma, Mintamir Agegnehu, Soliyana Yalewdeg, Tsion Asdegdid, Sofonias Miftah, and Temesgen G/abzgi.'

In [None]:
questions = [
    "What is the purpose of the document?",
    "How many pages does the document has?",
    "How many departments Addis Ababa University has?",
    "What programming language will be used in the project?",
    "What are the objectives of the project?",
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question: What is the purpose of the document?
Answer: The purpose of the document is to outline the problem with traditional chalk and board educational systems and propose the development of a web-based teaching and learning support system for Addis Ababa University's College of Natural and Computational Science.

Question: How many pages does the document has?
Answer: I don't know.

Question: How many departments Addis Ababa University has?
Answer: When the Haile Selassie I University was established in 1961, the Faculty of Science was reorganized into five teaching departments. So, Addis Ababa University has five departments.

Question: What programming language will be used in the project?
Answer: JavaScript

Question: What are the objectives of the project?
Answer: The general objective of the project is to develop a web-based teaching learning support system that is used by students and teachers to help in the teaching and learning system of Addis Ababa University in the College

In [None]:
# Streaming the response

for s in chain.stream({"question": "What are the objective of the project?"}):
    print(s, end="", flush=True)

The objectives of the project are to develop a web-based teaching learning support system that is used by students and teachers to help in the teaching and learning system of Addis Ababa University in the College of Natural and Computational Science. Additionally, specific objectives include improving education service, providing updated materials to students, enabling student interaction through a dialogue platform, allowing students to ask and answer questions and share knowledge, addressing the departments of the College of Natural and Computational Science, and making the traditional learning system more efficient and effective.

In [None]:
# bachini a array of questions and excuting them at once 
chain.batch([{"question": q} for q in questions])

['The purpose of the document is to outline the objectives and scope of the project related to developing a web-based teaching learning support system for Addis Ababa University in the College of Natural and Computational Science.',
 "I don't know.",
 'When the Haile Selassie I University was established in 1961, the Faculty of Science was reorganized into five teaching departments.',
 'JavaScript',
 'The objectives of the project are to develop a web-based teaching learning support system that is used by students and teachers to help in the teaching and learning system of Addis Ababa University in the College of Natural and Computational Science.']

In [None]:
for s in chain.stream({"question": "What is the purpose of the document?"}):
    print(s, end="", flush=True)

The purpose of the document is to outline the objectives and scope of the Askuala teaching learning support system project at Addis Ababa University in the College of Natural and Computational Science.