# Retrieval Augmented Generation for Wikipedia and LangChain

In [6]:
from langchain.retrievers import WikipediaRetriever
import dotenv
import os
from langchain.chains import ConversationalRetrievalChain
from langchain_openai import ChatOpenAI

In [4]:
retriever = WikipediaRetriever()

# Testing retriever 
docs = retriever.get_relevant_documents(query="ASCAT")
docs[1].metadata

{'title': 'Scatterometer',
 'summary': 'A scatterometer or diffusionmeter is a scientific instrument to measure the return of a beam of light or radar waves scattered by diffusion in a medium such as air. Diffusionmeters using visible light are found in airports or along roads to measure horizontal visibility. Radar scatterometers use radio or microwaves to determine the normalized radar cross section (σ0, "sigma zero" or "sigma naught") of a surface. They are often mounted on weather satellites to find wind speed and direction, and are used in industries to analyze the roughness of surfaces.',
 'source': 'https://en.wikipedia.org/wiki/Scatterometer'}

In [5]:
docs[1].page_content[:100]

'A scatterometer or diffusionmeter is a scientific instrument to measure the return of a beam of ligh'

In [7]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
model = ChatOpenAI(model_name="gpt-3.5-turbo")  # switch to 'gpt-4'
qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)

In [8]:
questions = [
    "What is Apify?",
    "When the Monument to the Martyrs of the 1830 Revolution was created?",
    "What is the Abhayagiri Vihāra?",
    # "How big is Wikipédia en français?",
]
chat_history = []

for question in questions:
    result = qa({"question": question, "chat_history": chat_history})
    chat_history.append((question, result["answer"]))
    print(f"-> **Question**: {question} \n")
    print(f"**Answer**: {result['answer']} \n")

-> **Question**: What is Apify? 

**Answer**: Apify is a web scraping and automation platform that allows developers to extract data from websites and automate workflows. It provides tools and resources for building and running web crawlers, processing data, and deploying scrapers at scale. 

-> **Question**: When the Monument to the Martyrs of the 1830 Revolution was created? 

**Answer**: I'm sorry, but I don't have any information about the creation of the Monument to the Martyrs of the 1830 Revolution. 

-> **Question**: What is the Abhayagiri Vihāra? 

**Answer**: Abhayagiri Vihāra was a major monastery site in Anuradhapura, Sri Lanka. It was one of the most sacred Buddhist pilgrimage cities in the nation and a significant monastic center as well as a royal capital. The term "Abhayagiri Vihāra" refers not only to the complex of monastic buildings but also to a fraternity of Buddhist monks. It was founded in the 2nd century BC and grew into an international institution attracting s

In [9]:
questions = [
    "What is MetOp?",
    "What instruments are on MetOp-A?"
]
chat_history = []

for question in questions:
    result = qa({"question": question, "chat_history": chat_history})
    chat_history.append((question, result["answer"]))
    print(f"-> **Question**: {question} \n")
    print(f"**Answer**: {result['answer']} \n")

-> **Question**: What is MetOp? 

**Answer**: MetOp is a series of polar-orbiting meteorological satellites operated by EUMETSAT, the European Organisation for the Exploitation of Meteorological Satellites. The MetOp satellites gather detailed information about atmospheric temperature and moisture profiles, which is crucial for numerical weather prediction and climate monitoring. The mission consists of three satellites, MetOp-A, MetOp-B, and MetOp-C, which are flown successively for more than 14 years each. These satellites provide global coverage and contribute to the global meteorological satellite observing system. 

-> **Question**: What instruments are on MetOp-A? 

**Answer**: MetOp-A is a weather satellite, and it does not have any musical instruments on board. It is equipped with various scientific instruments and sensors to collect data related to Earth's atmosphere, weather patterns, and climate. 

