In [1]:
#import getpass
import os

os.environ["OPENAI_API_KEY"] =  #getpass.getpass()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-2024-05-13")

In [2]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import DirectoryLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader

In [13]:
loader = DirectoryLoader('../Data/', glob="*.txt", loader_cls=TextLoader)
seminarloader = DirectoryLoader('../Data/', glob="seminar*.txt", loader_cls=TextLoader)
docs = loader.load()
seminardocs = seminarloader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
seminarsplits = text_splitter.split_documents(seminardocs)

In [14]:
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

seminarvectorstore = Chroma.from_documents(documents=seminarsplits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()

seminarretriever = seminarvectorstore.as_retriever()

prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [15]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [29]:
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

retrieve_docs = (lambda x: x["question"]) | seminarretriever

chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_from_docs
)

In [56]:
question = "What are the key characteristics of good institutions? How do good institutions createa a postive feedback loop of institional changes?"

qname = "q1"

In [57]:
with open(f"../Outputs/{qname}.md", "w") as f:
    f.write(f"## {question}\n\n")

In [58]:
response = rag_chain.invoke(question)

with open(f"../Outputs/{qname}.md", "a") as f:
    f.write(response)
    f.write("\n\n")

In [60]:
response = chain.invoke({"question": question})

In [61]:
response["answer"]

with open(f"../Outputs/{qname}.md", "a") as f:
    f.write("### Seminar\n\n")
    f.write(response["answer"])
    f.write("\n\n")

In [62]:
response["context"]

with open(f"../Outputs/{qname}.md", "a") as f:
    for doc in response["context"]:
        f.write(f"### {doc.metadata['source']} \n")
        f.write(doc.page_content)
        f.write("\n\n")