# Langchain and Wikipedia Retriever

This notebook uses Langchain and Wikipedia Retriever to answer questions

Reference:
- [Wikipedia Retriever](https://python.langchain.com/docs/integrations/retrievers/wikipedia/)

In [None]:
## Imports
import os
from dotenv import load_dotenv

load_dotenv(override=True)

In [None]:
###### OLLAMA #####
#from langchain_community.llms import Ollama
#llm = Ollama(model="llama3:8b-instruct-q8_0", temperature=0)

###### OPENAI #####
from langchain_openai.chat_models import ChatOpenAI
openai_models = ["gpt-3.5-turbo-0125", "gpt-4-turbo", "gpt-4-turbo-preview"]
llm = ChatOpenAI(
    model_name=openai_models[0],
    temperature=0,
    api_key=os.environ["OPENAI_API_KEY"])

###### GROQ #####
# from langchain_groq.chat_models import ChatGroq
# groq_model = ["mixtral-8x7b-32768", "gemma-7b-it", "llama2-70b-4096", "llama3-70b-8192", "llama3-8b-8192"]
# llm = ChatGroq(
#     temperature=0,
#     max_tokens=4096,
#     model_name=groq_model[3], 
#     api_key = os.environ["GROQ_API_KEY"])


In [None]:
from langchain_community.retrievers import WikipediaRetriever
from langchain_core.prompts import ChatPromptTemplate

wiki = WikipediaRetriever(top_k_results=6, doc_content_chars_max=2000)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You're a helpful AI assistant. Given a user question and some Wikipedia article snippets, answer the user question. If none of the articles answer the question, just say you don't know.\n\nHere are the Wikipedia articles:{context}",
        ),
        ("human", "{question}"),
    ]
)
prompt.pretty_print()

In [None]:
from operator import itemgetter
from typing import List

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import (
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)


def format_docs(docs: List[Document]) -> str:
    """Convert Documents to a single string.:"""
    formatted = [
        f"Article Title: {doc.metadata['title']}\nArticle Snippet: {doc.page_content}"
        for doc in docs
    ]
    return "\n\n" + "\n\n".join(formatted)


format = itemgetter("docs") | RunnableLambda(format_docs)
# subchain for generating an answer once we've done retrieval
answer = prompt | llm | StrOutputParser()
# complete chain that calls wiki -> formats docs to string -> runs answer subchain -> returns just the answer and retrieved docs.
chain = (
    RunnableParallel(question=RunnablePassthrough(), docs=wiki)
    .assign(context=format)
    .assign(answer=answer)
    .pick(["answer", "docs"])
)

In [None]:
chain.invoke("How fast are cheetahs?")