In [1]:
from faster_whisper import WhisperModel
from autogen_ext.models.ollama import OllamaChatCompletionClient
from autogen_agentchat.agents import AssistantAgent
from pydantic import BaseModel
from typing import List, Optional
import torch
import time
import json


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.cuda.empty_cache()

In [3]:
def transcribe_audio_with_timestamps(audio_path: str) -> str: # ---> To pass audio file
    """
    Transcribes the audio file with timestamps using Whisper.
    """
    model = WhisperModel("tiny", device="cpu", compute_type="int8")  # or tiny/small/medium/large
    result = model.transcribe(audio_path) # ---> prints progress + timestamps while running in the notebook/console 
    
    sum_text = ""
    segments, info = model.transcribe(audio_path, beam_size=5)
    for segment in segments:
        sum_text += segment.text + " "
        print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
    return sum_text

In [4]:
start_time = time.time()
transcription_text = transcribe_audio_with_timestamps(r"C:\Users\divya\OneDrive\Desktop\Dungeon\dd.mp3")
end_time = time.time()
print(f"⏱️ Transcription completed in {end_time - start_time:.2f} seconds")

[0.00s -> 5.28s]  Dungeons & Dragons is like a board game without the board and you play using your imagination.
[5.28s -> 8.00s]  If you think of something, it can just happen in game.
[8.00s -> 12.32s]  D&D is very popular, almost everyone I've ever met absolutely loves it,
[12.32s -> 16.08s]  and I have never played it before, even though I always thought I would really like it.
[16.08s -> 18.48s]  People would tell me stories of their games, like,
[18.48s -> 22.56s]  so this one time me and my party were entering a cave, and there was a big ogre,
[22.56s -> 26.16s]  guarding some treasure, but I used my charisma stat to do some,
[26.16s -> 27.84s]  and we ended up aloping together.
[27.84s -> 32.16s]  Dude, if you don't think that sounds crazy fun, then I don't think we would get along,
[32.16s -> 33.44s]  and you should probably answer to crime.
[34.56s -> 37.04s]  But if you do think that sounds fun, then hey!
[37.84s -> 39.92s]  Sorry, I've never done that before, it felt weird.

In [40]:
from pydantic import BaseModel, Field
from typing import List, Optional

# --- Section Schemas (per chunk) ---

class WorldStateUpdate(BaseModel):
    """Tracks changes to the environment, locations, factions, and overall world."""
    location: str = Field(..., description="e.g., 'The Blacksmith's Attitude', 'The Weather', 'Town of Greenest', 'The King's Decree'")
    update: str = Field(..., description="e.g., 'is now friendly', 'has turned to a storm', 'is now on high alert', 'has offered a 1000gp reward'")

class CharacterEvent(BaseModel):
    """Merges player actions, character tracking, and their outcomes into a single record."""
    character: str = Field(..., description="The name of the PC or important NPC")           
    action: str = Field(..., description="What the character did (e.g., 'attacked the ogre', 'persuaded the guard', 'drank a potion')")             
    outcome: str = Field(..., description="The direct result of the action (e.g., 'dealt 12 damage', 'convinced him to stand down', 'regained 10 HP')")             

class QuestUpdate(BaseModel):
    """Tracks progress, discoveries, and completions related to the party's active goals."""
    quest: str = Field(..., description="The name or description of the quest (e.g., 'Find the Lost Mine', 'Stop the Cult Ritual')")               
    update: str = Field(..., description="The progress made (e.g., 'discovered the cave entrance', 'obtained the Sacred Gem', 'defeated the cult leader')")              
    status: Optional[str] = Field(None, description="Current status of the quest (e.g., 'Started', 'In Progress', 'Completed', 'Failed')")

# --- Chunk-Level Structured Output ---
class ChunkStructuredOutput(BaseModel):
    """Structured output for a chunk of D&D session transcript."""
    world_state_updates: List[WorldStateUpdate] = Field(default_factory=list, description="List of world state updates in this chunk")
    character_events: List[CharacterEvent] = Field(default_factory=list, description="List of character events in this chunk")
    quest_updates: List[QuestUpdate] = Field(default_factory=list, description="List of quest updates in this chunk")

In [41]:
model_client = OllamaChatCompletionClient(
    model="gemma3:12B",
    name="D&D Summarizer",
    response_format=ChunkStructuredOutput,
    model_info = {
      "vision": False,
      "function_calling": True,
      "json_output": True,  
      "temperature": 0.0,
    },
    system_message = """
# ROLE
You are a highly organized and meticulous Dungeon Master's Assistant. Your task is to analyze a raw text transcript from a Dungeons & Dragons gameplay session and extract structured information into a specific JSON format.

# INSTRUCTIONS
1.  **Read the provided transcript chunk carefully.**
2.  **Categorize every relevant event, action, and detail** into the four lists defined below.
3.  **Be concise and factual.** Summarize the events clearly without adding flavor text or your own commentary.
4.  **Only extract information that is explicitly stated or clearly implied in the text.** Do not invent or assume details.
5.  **If a category has no relevant information for the chunk, leave its list empty.**

# OUTPUT FORMAT
You MUST output a valid JSON object that matches this schema:

```json
{
    "world_state_updates": [
    {
      "location": "Name of the location or general setting",
      "update": "A factual statement about a change in the world (e.g., 'The innkeeper is now hostile', 'The bridge is destroyed', 'The king offered a 500gp reward')."
    }
  ],
    "player_actions": [
    {
      "player": "Character Name",
      "action": "The specific action they took (e.g., 'attacked the ogre', 'persuaded the guard', 'searched the desk').",
      "outcome": "The direct result of their action (e.g., 'dealt 12 damage', 'convinced him to lower his weapon', 'found a hidden letter')."
    }
  ],
  
  "quest_updates": [
    {
      "quest": "The name or description of the quest (e.g., 'Find the Lost Mine', 'Stop the Cult Ritual')",
      "update": "The progress made (e.g., 'discovered the cave entrance', 'obtained the Sacred Gem', 'defeated the cult leader')",
      "status": "The new status if it changed (e.g., 'Started', 'Completed')."
    }
  ]
}"""
)

In [42]:
model_client2 = OllamaChatCompletionClient(
    model="gemma3:latest",
    name="D&D Filter",
    model_info={
        "vision": False,
        "function_calling": False,
        "json_output": True,
        "temperature": 0.0,
    },
    system_message="""
You are a Dungeon Master's session filter AI.  

ONLY log relevant lines into these categories:
1. Player Actions → [Player, Action, Outcome]
2. Story Updates → [Event, Details]
3. Character Status → [Character, Condition/Notes]
4. Quests/Hooks → [Quest, Status]

RULES:
- Ignore all irrelevant text.
- Output MUST be valid JSON with EXACT category keys.
- No summaries, explanations, or extra text.
- Replace real names with character names.
- Entries must be short and concise.

Example output:
{
  "Player Actions": [{"Player": "Aria", "Action": "attacks", "Outcome": "hit for 5 damage"}],
  "Story Updates": [{"Event": "Entered dungeon", "Details": "The room is dark and cold"}],
  "Character Status": [{"Character": "Borin", "Condition/Notes": "Low HP"}],
  "Quests/Hooks": [{"Quest": "Rescue the prince", "Status": "Started"}]
}
"""

)


In [43]:
summarizer = AssistantAgent(
    name="Dungeon_Scribe_Summarizer",
    model_client=model_client,
    description="An AI assistant that summarizes tabletop RPG session transcripts and tracks story developments."
)

In [44]:
filter_agent = AssistantAgent(
    name="Dungeon_Scribe_Filter",
    model_client=model_client2,
    description="used to filter out unnessccary details"
)

In [45]:
transcript_lines = transcription_text.split(".")
chunk_size = 10

In [46]:
# ---------------------------
# Step 2: Chunked logging with filtering
# ---------------------------

for i in range(0, len(transcript_lines), chunk_size):
    chunk = "\n".join(transcript_lines[i:i + chunk_size])
    start_chunk_time = time.time()
    
    # 1️⃣ Summarize the chunk into structured logs
    chunk_result = await summarizer.run(task=f"Log this chunk in 4 sections:\n{chunk}")
    summarized_text = chunk_result.messages[-1].content.strip()
    
    # Optionally parse JSON if your summarizer outputs JSON
    # import json
    # summarized_logs = json.loads(summarized_text)
    
    # 2️⃣ Filter summarized logs for relevance & categories
    # filter_result = await filter_agent.run(task=f"Classify these logs:\n{summarized_text}")
    # filtered_text = filter_result.messages[-1].content.strip()
    
    # Append filtered & classified logs to CSV
    with open("dnd_log17.csv", "a", encoding="utf-8") as f:
        f.write(summarized_text + "\n")
    
    end_chunk_time = time.time()
    print(f"✅ Processed & logged chunk {i // chunk_size + 1} in {end_chunk_time - start_chunk_time:.2f}s")


✅ Processed & logged chunk 1 in 85.94s
✅ Processed & logged chunk 2 in 82.78s
✅ Processed & logged chunk 3 in 72.26s
✅ Processed & logged chunk 4 in 91.59s
✅ Processed & logged chunk 5 in 80.85s
✅ Processed & logged chunk 6 in 87.21s
✅ Processed & logged chunk 7 in 81.02s
✅ Processed & logged chunk 8 in 80.14s


ReadError: 

In [None]:
csv_output = result.messages[-1].content.strip()

In [None]:
with open("dnd_summary4.json", "w", encoding="utf-8") as f:
    f.write(csv_output)
print("✅ Summary saved to dnd_summary.json")