In [None]:
import glob

import pandas as pd

pd.concat([pd.read_csv(e) for e in glob.glob("../data/meta/*")])


In [None]:
pd.set_option("display.max_colwidth", 200)
pd.read_csv(
    "../data/transcripts/australian_property_podcast/2_sense_builders_going_bankrupt_what_the_rba_reshuffle_means_to_you_the_mortgage_dilemma.csv"
).head(10)


In [None]:
file = "./data/transcripts/australian_property_podcast/2_sense_builders_going_bankrupt_what_the_rba_reshuffle_means_to_you_the_mortgage_dilemma.csv"
from ingest import MERGE_THRESHOLD

pd.read_csv(file)


In [None]:
from property_oracle.llm import embedding_config, llm

llm


## Basic question answer

In [None]:
from llm import llm, questions
from vectore_store.chroma import vectordb

# how many docs in underlying DB
vectordb._client._count("langchain")


In [None]:
from langchain.chains import VectorDBQA

from vectore_store.chroma import vectordb

qa = VectorDBQA.from_chain_type(llm=llm, chain_type="stuff", vectorstore=vectordb)
print(qa.run(questions[3]).strip())


## QA with source references

In [None]:
from langchain.chains import RetrievalQAWithSourcesChain

chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm, chain_type="stuff", retriever=vectordb.as_retriever()
)
res = chain(
    {"question": questions[0]},
    # return_only_outputs=True,
)
res


## Question/answer with custom prompt

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

prompt_template = """Use the context below to write a 100 word paragraph response to the question:
    Context: {context}
    Question: {question}
    Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain = LLMChain(llm=llm, prompt=PROMPT)


def question_answer_custom_prompt(question):
    docs = vectordb.similarity_search(question, k=10)
    return docs
    inputs = [{"context": doc.page_content, "question": question} for doc in docs]
    return chain.apply(inputs)


res = question_answer_custom_prompt(questions[-1])
res


## Chat with docs with memory
- use as main entry point > CLI program

In [None]:
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
qa = ConversationalRetrievalChain.from_llm(llm, vectordb.as_retriever(), memory=memory)


In [None]:
?ConversationalRetrievalChain

In [None]:
query = "What important things should I be looking for when inspecting properties?"
result = qa({"question": query})

query = "How much should a building and pest inspection typically cost?"
result = qa({"question": query})


## Inspect preliminary retrieval

In [None]:
"What are the basic steps I need to complete before attempting to purchase property in Australia?"
docs = vectordb.similarity_search(questions[0], k=10)
docs


In [None]:
from langchain.chains import LLMChain
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate

prompt_template = """Summarize the below context in a bullet-pointed, 50 word technical analysis in response to the question. Be sure to group related bits of content into thematically relevant sections:
    Context: {context}
    Question: {question}
    Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain = LLMChain(llm=llm, prompt=PROMPT)

chain = load_summarize_chain(llm, chain_type="map_reduce")


def question_answer_custom_prompt(question):
    docs = vectordb.similarity_search(question, k=10)
    # return docs
    return chain.run(docs)
    # inputs = [{"context": doc.page_content, "question": question} for doc in docs]
    # return chain.apply(inputs)


res = question_answer_custom_prompt(questions[0])
res
