In [11]:
%pip install --quiet --upgrade langchain-groq chromadb faster-whisper langchain-ollama langchain-community "langchain-chroma>=0.1.2"

Note: you may need to restart the kernel to use updated packages.


In [1]:
from pathlib import Path
from faster_whisper import WhisperModel
from langchain_core.documents import Document
from langchain_groq import ChatGroq
from langchain_community.embeddings import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os

  import pkg_resources
  from .autonotebook import tqdm as notebook_tqdm


In [None]:
API_KEY = "your actual groq api key here"

In [3]:
AUDIO_DIR = Path("mp3_files")
TRANSCRIPT_CACHE = Path("transcripts")
CHROMA_DIR = Path("chroma_db")

AUDIO_DIR.mkdir(exist_ok=True)
TRANSCRIPT_CACHE.mkdir(exist_ok=True)
CHROMA_DIR.mkdir(exist_ok=True)

os.environ["GROQ_API_KEY"] = API_KEY

In [4]:
WHISPER_MODEL_SIZE = "base"
EMBEDDING_MODEL = "nomic-embed-text"
MODEL_NAME = "openai/gpt-oss-20b"

In [5]:
audio_file = "Attempting to Explain All of Kirby Lore in a Single Video.mp3"

if Path(audio_file).exists():
  Path(audio_file).rename(AUDIO_DIR / audio_file)

In [6]:
def transcribe_audio(audio_file, whisper_model):
  segments, _ = whisper_model.transcribe(str(audio_file), beam_size=5)
  return " ".join([segment.text for segment in segments if segment.text.strip() != ""])

In [7]:
def transcribe_episode(audio_path, whisper_model):
    cache_path = TRANSCRIPT_CACHE / (audio_path.stem + ".txt")
    if cache_path.exists():
        return cache_path.read_text()
    transcript = transcribe_audio(audio_path, whisper_model)
    cache_path.write_text(transcript)
    return transcript

In [9]:
whisper_model = WhisperModel(WHISPER_MODEL_SIZE, device="cpu")
audio_files = sorted(AUDIO_DIR.glob("*.mp3"))

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

documents = []
for fp in audio_files:
    transcript = transcribe_episode(fp, whisper_model)
    chunks = splitter.split_text(transcript)
    for chunk in chunks:
        documents.append(Document(page_content=chunk, metadata={"source": fp.name}))

embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)
vectordb = Chroma.from_documents(
    documents,
    embeddings,
    persist_directory=str(CHROMA_DIR)
)

In [13]:
llm = ChatGroq(model_name= MODEL_NAME, temperature=0)
retriever = vectordb.as_retriever(search_kwargs={"k": 5})

question = "Tell me about Void"
docs = retriever.invoke(question)
content = "\n\n".join([doc.page_content for doc in docs])
prompt = f"""You are a helpful assistant that answers questions based on the context below. 
{content} 
Question: {question} 
Answer:"""

print(llm.invoke(prompt).content)

**Void (also known as Void Terminus)** is the central antagonist of the *Kirby: Dark Matter* trilogy.  In the game it is portrayed as a massive, dark‑matter‑based entity that has the power to warp reality, summon otherworldly minions, and absorb or manipulate energy.  Below is a quick rundown of what the game and its lore reveal about Void:

| Aspect | What the game tells us |
|--------|------------------------|
| **Identity** | Void is the embodiment of *dark matter*—the mysterious substance that exists in all dimensions.  It is the source of the “dark” forces that appear in the series (e.g., Dark Mind, Dark Crafter). |
| **Origin** | The game suggests that Void was created by the accumulation of negative or “dark” energy.  However, the lore also hints that if the same entity were born from *positive* energy, it could take on a more benevolent form (hence the idea that it might resemble Kirby). |
| **Weakness** | In the earlier *Dark Matter* games, the only real weakness of the Dark M