## Use audio fragment for prototyping

In [None]:
import os

from pydub import AudioSegment

# Set the paths to your ffmpeg and ffprobe executables
# AudioSegment.ffmpeg = "/opt/homebrew/bin/ffmpeg"
# AudioSegment.ffprobe = "/opt/homebrew/bin/ffprobe"
os.environ["PATH"] += f"{os.pathsep}/opt/homebrew/bin"

# Load the input MP3 file
input_file = "./podcasts/the_home_run/How to prepare yourself to bid at an auction.mp3"
output_file = "./podcasts/dev.mp3"
audio = AudioSegment.from_mp3(input_file)

# Slice the first minute (60,000 milliseconds)
first_minute = audio[:60_000]

# Export the sliced audio as a new MP3 file
first_minute.export(output_file, format="mp3")


## Different indexes

In [None]:
for transcript in list(transcript_dir.rglob("*/*.csv")):
    transcript_df = (
        parse_transcript(transcript)
        .assign(podcast=transcript.parent.name)
        .assign(episode=transcript.stem)
        # for QA with sources
        .assign(
            source=lambda x: x.apply(
                lambda y: f"{y.podcast} | {y.episode} | {y.start} | {y.end}", axis=1
            )
        )
    )
    # print(f"Ingesting transcript: {transcript.name}")
    estimate_cost_of_ingest(transcript_df)
    ingest_transcript_df_pinecone(transcript_df)


## Basic question answer

In [None]:
from ingest import llm, vectordb

# how many docs in underlying DB
vectordb._client._count("langchain")


In [None]:
from langchain.chains import VectorDBQA

qa = VectorDBQA.from_chain_type(
    llm=llm, chain_type="stuff", vectorstore=vectordb_pinecone
)
query = "Can you give me a strategy for competing at auctions?"
qa.run(query)


## QA with source references

In [None]:
from langchain.chains import RetrievalQAWithSourcesChain

chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm, chain_type="stuff", retriever=vectordb_pinecone.as_retriever()
)
res = chain(
    {"question": "When is a good time to buy as a first home buyer?"},
    # return_only_outputs=True,
)


## Question/answer with custom prompt

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

prompt_template = """Use the context below to write a 100 word paragraph response to the question:
    Context: {context}
    Question: {question}
    Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain = LLMChain(llm=llm, prompt=PROMPT)


def question_answer_custom_prompt(question):
    docs = vectordb.similarity_search(question, k=10)
    inputs = [{"context": doc.page_content, "question": question} for doc in docs]
    return chain.apply(inputs)


res = question_answer_custom_prompt(
    "Can you write me a to-do list for first home buyer?"
)
res


## Technical summary of materials

In [None]:
from langchain.chains import LLMChain
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate

prompt_template = """Summarize the below context in a bullet-pointed, 100 word technical analysis in response to the question. Be sure to group related bits of content into thematically relevant sections:
    Context: {context}
    Question: {question}
    Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain = LLMChain(llm=llm, prompt=PROMPT)

chain = load_summarize_chain(llm, chain_type="map_reduce")


def question_answer_custom_prompt(question):
    docs = vectordb.similarity_search(question, k=10)
    return docs
    return chain.run(docs)
    # inputs = [{"context": doc.page_content, "question": question} for doc in docs]
    # return chain.apply(inputs)


res = question_answer_custom_prompt(
    "What are the steps involved in getting finance pre-approval?"
)
res
