In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
import sys
openai.api_key = os.getenv("OAI_KEY")
brave_key = os.getenv("BRAVE_KEY")
os.environ["OPENAI_API_KEY"]= os.getenv("OAI_KEY")
client = OpenAI()  


### Create Retriever

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import ArxivLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

docs = ArxivLoader(query ="text query here", load_max_docs=2).load()
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=350, chunk_overlap=50
)

chunked_docs = text_splitter.split_documents(docs)

vector_store = FAISS.from_documents(
    documents=chunked_docs,
    embedding=OpenAIEmbeddings(model="text-embedding-3-small"),
)

retriever = vector_store.as_retriever()

#### Create Prompt Template

In [None]:
from langchain_core.prompts import ChatPromptTemplate


RAG_PROMPT= """\
    Use the following context to answer the user's query. If you cannot answer the question, please respond with 'I do not know'
    
    Question:
    {question}
    
    Context:
    {context}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)


#### Setup Generation Model (GPT-3.5)

In [None]:
from langchain_openai import ChatOpenAI

openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo") 

In [None]:
# TODO: EXPLORE LCEL CHAINS

from operator import itemgetter
from langchain.schema.output_parser import SrcOutputParser
from langchain.schema.runnable import RunnablePassthrough

rag_chain = (
    {'context': itemgetter('question') | retriever, 'question': itemgetter('question')}
    | RunnablePassthrough.assign(context=itemgetter('context'))
    | {'response': rag_prompt | openai_chat_model, 'context': itemgetter('context')}
)

In [None]:
await rag_chain.ainvoke({"question": "What is RAG?"})