In [None]:
from ingest import parse_transcript
import pandas as pd

df = pd.read_csv('./data/transcripts/australian_property_podcast/2_sense_builders_going_bankrupt_what_the_rba_reshuffle_means_to_you_the_mortgage_dilemma.csv')
df

In [None]:
file = './data/transcripts/australian_property_podcast/2_sense_builders_going_bankrupt_what_the_rba_reshuffle_means_to_you_the_mortgage_dilemma.csv'
from ingest import MERGE_THRESHOLD

pd.read_csv(file)

## Basic question answer

In [None]:
from llm import llm, questions
from vectore_store.chroma import vectordb

# how many docs in underlying DB
vectordb._client._count("langchain")


In [None]:
from langchain.chains import VectorDBQA
from vectore_store.chroma import vectordb

qa = VectorDBQA.from_chain_type(
    llm=llm, chain_type="stuff", vectorstore=vectordb
)
print(qa.run(questions[3]).strip())

## QA with source references

In [None]:
from langchain.chains import RetrievalQAWithSourcesChain

chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm, chain_type="stuff", retriever=vectordb.as_retriever()
)
res = chain(
    {"question": questions[0]},
    # return_only_outputs=True,
)
res


## Question/answer with custom prompt

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

prompt_template = """Use the context below to write a 100 word paragraph response to the question:
    Context: {context}
    Question: {question}
    Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain = LLMChain(llm=llm, prompt=PROMPT)


def question_answer_custom_prompt(question):
    docs = vectordb.similarity_search(question, k=10)
    return docs
    inputs = [{"context": doc.page_content, "question": question} for doc in docs]
    return chain.apply(inputs)


res = question_answer_custom_prompt(questions[-1])
res


## Chat with docs with memory
- use as main entry point > CLI program

In [24]:
from langchain.memory import ConversationBufferMemory
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
qa = ConversationalRetrievalChain.from_llm(llm, vectordb.as_retriever(), memory=memory)

In [39]:
?ConversationalRetrievalChain

[0;31mInit signature:[0m
[0mConversationalRetrievalChain[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmemory[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mlangchain[0m[0;34m.[0m[0mschema[0m[0;34m.[0m[0mBaseMemory[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcallbacks[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mlangchain[0m[0;34m.[0m[0mcallbacks[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseCallbackHandler[0m[0;34m][0m[0;34m,[0m [0mlangchain[0m[0;34m.[0m[0mcallbacks[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseCallbackManager[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcallback_manager[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mlangchain[0m[0;34m.[0m[0mcallbacks[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseCallbackManager[0m[0;34m][0m [0;34m=[0m [0;32mN

In [25]:
query = "What important things should I be looking for when inspecting properties?"
result = qa({"question": query})

query = "How much should a building and pest inspection typically cost?"
result = qa({"question": query})

## Inspect preliminary retrieval

In [32]:
"What are the basic steps I need to complete before attempting to purchase property in Australia?"
docs = vectordb.similarity_search(questions[0], k=10)
docs

'What are the basic steps I need to complete before attempting to purchase property in Australia?'

[Document(page_content="produce some compliance documents for you and then they would lodge it with the bank, talk to the bank direct, and then get you a piece of paper that says you're pre-approved for usually 90 or 180 days depending on the bank. And that 90 or 120 days can go very quickly if all of a sudden you're sitting on this pre-approval, but you haven't put that time and effort and research into figuring out what you're going to buy because for some people they do have clarity around that, but for others, they actually have to sit back and have a think about it, but do property research. And the good thing about property research here in Australia is all of this information is actually at our fingertips. It's free. You can use realestate.com.au. The sold tab is a gold mine to see what properties are selling for and the areas you want to live in. Does that align with your expectations? Do you have to tweak things at the start? You can figure it all out beforehand. Use Google Ma

In [None]:
from langchain.chains import LLMChain
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate

prompt_template = """Summarize the below context in a bullet-pointed, 50 word technical analysis in response to the question. Be sure to group related bits of content into thematically relevant sections:
    Context: {context}
    Question: {question}
    Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain = LLMChain(llm=llm, prompt=PROMPT)

chain = load_summarize_chain(llm, chain_type="map_reduce")


def question_answer_custom_prompt(question):
    docs = vectordb.similarity_search(question, k=10)
    # return docs
    return chain.run(docs)
    # inputs = [{"context": doc.page_content, "question": question} for doc in docs]
    # return chain.apply(inputs)

res = question_answer_custom_prompt(
    questions[0]
)
res
