# A simple RAG application using open-source models

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL = "gpt-3.5-turbo"
MODEL = "mixtral:8x7b"
MODEL = "llama2"


In [3]:
from langchain_community.llms import Ollama
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai.embeddings import OpenAIEmbeddings

if MODEL.startswith("gpt"):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
    embeddings = OpenAIEmbeddings()
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)

model.invoke("Tell me a joke")

"\nWhy don't scientists trust atoms? Because they make up everything! 😂"

In [4]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser 
chain.invoke("Tell me a joke about doctors")

'\nWhy did the doctor break up with his girlfriend?\n\nBecause he wanted to focus on his practice!'

In [5]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="Here is some context", question="Here is a question")

'\nAnswer the question based on the context below. If you can\'t \nanswer the question, reply "I don\'t know".\n\nContext: Here is some context\n\nQuestion: Here is a question\n'

In [6]:
chain = prompt | model | parser

chain.invoke({"context": "My parents named me Santiago", "question": "What's your name'?"})

' Great! Based on the context you provided, my answer is:\n\nMy name is Santiago.'

In [7]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("morocco.pdf")
pages = loader.load_and_split()
pages

[Document(metadata={'source': 'morocco.pdf', 'page': 0}, page_content='A saudi man with his horse\nFlickr / Charles Roffey\nThe earthen buildings at Ait-Ben-Haddou, in Ouarzazate Province, date back to the 17th century.\nFlickr / Jane drumsara\nMOROCCO\nOct. 2019'),
 Document(metadata={'source': 'morocco.pdf', 'page': 1}, page_content='2\nTable of Contents Countries in Perspective | MoroccoTable of Contents\nChapter 1 | Geography   . . . . . . . . . . . . . . . . . . . . . . . . . . . .  .6\nIntroduction   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  .6\nG\neographic and Topographic Divisions  \n . . . . . . . . . . . . . . . . . . . . . .\n .7\nRif Mountains  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �  �7\nAtlas Mountains  \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\n \n�\

In [8]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)

In [9]:
retriever = vectorstore.as_retriever()
retriever.invoke("Rabat")

[Document(metadata={'source': 'morocco.pdf', 'page': 78}, page_content='79\nCountries in Perspective | MoroccoGo back to Table of Contents\nFurther Readings and Resources\nOnline Articles\nAnouar Boukhars. “Barriers Versus Smugglers: Algeria and Morocco’s Battle for Border Security.” Carnegie \nEndowment for International Peace, 19 March 2019. https://carnegieendowment.org/2019/03/19/barriers-versus-\nsmugglers-algeria-and-morocco-s-battle-for-border-security-pub-78618 \nBureau of Democracy, Human Rights and Labor, United States Department of State. “2018 Country Reports \non Human Rights Practices: Morocco.” 13 March 2019. https://www.state.gov/reports/2018-country-reports-on-\nhuman-rights-practices/morocco/ \nBureau of Democracy, Human Rights, and Labor, United States Department of State. “2017 Report on International \nReligious Freedom: Morocco.” 29 May 2018. https://www.state.gov/reports/2017-report-on-international-\nreligious-freedom/morocco/ \nChloe Teevan. “Morocco, the EU, a

In [10]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [16]:
questions = [
    "What is the first chapter of the book talking about?",
    "How many Sahrawi refugees live in camps near the western Algerian town of Tindouf?"
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question: What is the first chapter of the book talking about?
Answer: The first chapter of the book is talking about the Roman Empire's expansion into North Africa, specifically the destruction of Carthage and the subsequent establishment of Roman control over the region. The chapter also mentions the persistence of Amazigh culture and kingdoms in the area despite Roman annexation. Additionally, it highlights the existence of Roman ruins in Morocco, including the UNESCO World Heritage site at Volubilis, which was a major outpost of the empire.

Question: How many Sahrawi refugees live in camps near the western Algerian town of Tindouf?
Answer: Based on the text provided, the answer to the question is:

250,000 Sahrawi refugees live in camps near the western Algerian town of Tindouf. This information can be found in the 25th sentence of the text: "The Hirak protest movement began in the northern Rif region in 2016; it has resulted in the biggest unrest seen in Morocco since the Arab Sp

: 