In [1]:
import librosa
from dotenv import load_dotenv
import assemblyai as aai
import os

# Load the .env file
load_dotenv()

aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")


REECORDS CONVERSATION, SAVES TO WAV

In [13]:
import sounddevice as sd
from scipy.io.wavfile import write
import numpy as np
import speech_recognition as sr

fs = 44100  # Sample rate
channels = 1

print("Press ENTER to start recording...")
input()

print("Recording... Press ENTER again to stop.")
recorded_chunks = []

# Callback for non-blocking recording
def callback(indata, frames, time, status):
    recorded_chunks.append(indata.copy())

stream = sd.InputStream(samplerate=fs, channels=channels, callback=callback)
with stream:
    input()  # Wait until user presses Enter
    # Exiting the 'with' block stops the stream

# Combine all chunks
audio_np = np.concatenate(recorded_chunks, axis=0)
write("input_audio.wav", fs, audio_np)

print("Saved recording to input_audio.wav")


Press ENTER to start recording...
Recording... Press ENTER again to stop.
Saved recording to input_audio.wav


WAV TO TXT TRANSCRIPT, END AS SPEAKER A/B...

In [3]:
#path to audio file
audio_file = "input_audio.wav"

config = aai.TranscriptionConfig(
    speech_model=aai.SpeechModel.universal,
    speaker_labels=True 
)

transcript = aai.Transcriber(config=config).transcribe(audio_file)

if transcript.status == "error":
    raise RuntimeError(f"Transcription failed: {transcript.error}")

basename = os.path.splitext(os.path.basename(audio_file))[0]
out_path = f"REALTIME_transcript.txt"

with open(out_path, "w", encoding="utf-8") as f:
    for u in transcript.utterances:
        start = u.start / 1000
        end = u.end / 1000
        speaker = u.speaker
        text = u.text.replace("\n", " ")
        f.write(f"[ Start Time:{start} End Time:{end} ]\nSpeaker {speaker}: {text}\n")


print(f"Wrote transcript to {out_path}")


Wrote transcript to REALTIME_transcript.txt


ADD SPEAKER LABELS

In [5]:
from Named_Transcript import rename_speakers_in_transcript

labeled_transcript_path= rename_speakers_in_transcript(out_path)


```json
{
"Speaker B": "Speaker B initiates the conversation by asking Speaker A about their favorite sport, NBA team, and opinions on OKC's draft picks. Speaker B then defends a player's performance in Dallas, citing their average points and where they saw the information.",
"Speaker A": "Speaker A states basketball is their favorite sport and OKC is their favorite NBA team, expressing a lack of enthusiasm for OKC's 25 draft picks. Speaker A strongly disagrees with Speaker B regarding a player's performance, disputing the stats and accusing Speaker B of propaganda."
}
```
{'Speaker B': "Speaker B initiates the conversation by asking Speaker A about their favorite sport, NBA team, and opinions on OKC's draft picks. Speaker B then defends a player's performance in Dallas, citing their average points and where they saw the information.", 'Speaker A': "Speaker A states basketball is their favorite sport and OKC is their favorite NBA team, expressing a lack of enthusiasm for OKC's 25 draft

CONVERSATION JSON (FOR MINDMAP)


In [25]:
from LLM_json_generator import generate_conversation_mindmap_json
import json

# CHANGE THIS TO LABELLED TRANSCRIPT FILE
transcript_path = "namedREALTIME_transcript.txt"

with open(transcript_path, "r", encoding="utf-8") as f:
    transcript_text = f.read()

mindmap_data = generate_conversation_mindmap_json(transcript_text, source_file=transcript_path)

output_path = "mindmap.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(mindmap_data, f, indent=2)

print(f"‚úÖ Mind map JSON generated and saved to {output_path}")

‚úÖ Mind map JSON generated and saved to mindmap.json


In [None]:
# import json
# import networkx as nx
# from pyvis.network import Network

# # ------------------- Build Graph from Conversation JSON -------------------

# def build_mindmap_graph(conversation_json):
#     """
#     Build a graph where:
#     - Main topics are nodes
#     - Subtopics are nodes
#     - Edges represent introduction or discussion
#     - Relationships are edges with types
#     """
#     G = nx.DiGraph()
    
#     # Add main topics
#     for topic in conversation_json.get("main_topics", []):
#         topic_name = topic["topic"]
#         G.add_node(topic_name, label=topic_name, type="topic", introduced_by=topic["introduced_by"], sentiment=topic["sentiment"])
        
#         # Add subtopics
#         for sub in topic.get("subtopics", []):
#             sub_name = sub["subtopic"]
#             G.add_node(sub_name, label=sub_name, type="subtopic", introduced_by=sub["introduced_by"], sentiment=sub["sentiment"])
            
#             # Edge from main topic to subtopic
#             G.add_edge(topic_name, sub_name, label=sub["stance"], introduced_by=sub["introduced_by"])
    
#     # Add explicit relationships
#     for rel in conversation_json.get("relationships", []):
#         G.add_edge(rel["from"], rel["to"], label=rel["type"], introduced_by=rel["initiated_by"])
    
#     return G

# # ------------------- Visualize Graph with PyVis -------------------

# def visualize_graph(G, output_html="mindmap.html"):
#     """
#     Create an interactive visualization of the graph using PyVis
#     """
#     net = Network(height="750px", width="100%", directed=True, notebook=False)
    
#     # Add nodes
#     for node, data in G.nodes(data=True):
#         color_map = {"topic": "#97C2FC", "subtopic": "#FFD700"}  # Blue for main topic, yellow for subtopics
#         sentiment_color = {"positive": "#8BC34A", "neutral": "#FFC107", "negative": "#F44336"}
#         color = color_map.get(data.get("type"), "#D3D3D3")
#         color = sentiment_color.get(data.get("sentiment"), color)
        
#         title = f"{data.get('label')}<br>Introduced by: {data.get('introduced_by')}<br>Sentiment: {data.get('sentiment')}"
#         net.add_node(node, label=data.get("label"), title=title, color=color)
    
#     # Add edges
#     for u, v, data in G.edges(data=True):
#         label = data.get("label", "")
#         net.add_edge(u, v, label=label, title=f"{label} (by {data.get('introduced_by', 'unknown')})")
    
#     # Generate interactive HTML
#     #net.show(output_html)
#     net.write_html(output_html)
#     print(f"‚úÖ Mindmap saved to {output_html}")

# # ------------------- Example Usage -------------------

# if __name__ == "__main__":
#     # Load your JSON
#     with open("mindmap.json", "r", encoding="utf-8") as f:
#         conversation_json = json.load(f)
    
#     # Build graph
#     G = build_mindmap_graph(conversation_json)
    
#     # Visualize and save HTML
#     visualize_graph(G, "digital_transformation_mindmap.html")


‚úÖ Mindmap saved to digital_transformation_mindmap.html


In [26]:
# write_mindmap_html_with_edge_attribution.py
# Standalone HTML with NotebookLM-style layout and SPEAKER ATTRIBUTION ON EDGES (no participant nodes)

import json
from pathlib import Path

DATA_PATH = "mindmap.json"
OUT_HTML  = "Final_mindmap_app.html"

data = json.loads(Path(DATA_PATH).read_text(encoding="utf-8"))
data_js = json.dumps(data, ensure_ascii=False)

html_doc = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<title>Conversation Mind Map</title>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<script src="https://unpkg.com/vis-network@9.1.7/dist/vis-network.min.js"></script>
<link href="https://unpkg.com/vis-network@9.1.7/styles/vis-network.min.css" rel="stylesheet"/>

<style>
  :root {{
    --bg: #ffffff;
    --fg: #263238;
    --edge: #CFD8DC;
    --root: #B2DFDB;
    --topic: #C5CAE9;
    --subtopic: #BBDEFB;
    --leaf: #E1F5FE;
    --highlight: #FFE082;
  }}
  body {{ margin:0; background:var(--bg); color:var(--fg); font-family: Inter, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial; }}
  .toolbar {{
    display:flex; gap:.75rem; flex-wrap:wrap; align-items:center;
    padding:.75rem 1rem; border-bottom:1px solid #ECEFF1; position:sticky; top:0; background:rgba(255,255,255,.97); z-index:10;
  }}
  .toolbar label {{ font-size:.9rem; opacity:.9; }}
  .toolbar input[type="range"] {{ width:160px; vertical-align:middle; }}
  .toolbar input[type="text"] {{ width:220px; padding:.4rem .55rem; border:1px solid #ECEFF1; border-radius:8px; }}
  .toolbar select {{ padding:.35rem .5rem; border:1px solid #ECEFF1; border-radius:8px; }}
  .pill {{ display:inline-block; padding:.15rem .45rem; font-size:.75rem; border-radius:999px; background:#EEF2F7; margin-left:.35rem; }}
  #graph {{ height: calc(100vh - 64px); }}
  .note {{ padding:.35rem 1rem; font-size:.85rem; color:#607D8B; border-top:1px dashed #ECEFF1; }}
</style>
</head>

<body>
  <div class="toolbar">
    <label>Branch
      <select id="topic"></select>
    </label>

    <label>Depth
      <input id="depth" type="range" min="1" max="8" step="1" value="8"/>
      <span id="depthVal" class="pill">8</span>
    </label>

    <label>Wrap
      <input id="wrap" type="range" min="14" max="36" step="1" value="36"/>
      <span id="wrapVal" class="pill">36</span>
    </label>

    <label>Layout
      <!-- üîπ default direction changed from LR ‚Üí UD -->
      <select id="direction">
        <option value="UD" selected>Top ‚Üí Down</option>
        <option value="LR">Left ‚Üí Right</option>
      </select>
    </label>

    <label><input id="hideX" type="checkbox" checked/> Hide cross-links</label>
    <label><input id="edgeLabels" type="checkbox" checked/> Show speaker labels on edges</label>

    <label>Highlight
      <input id="search" type="text" placeholder="type to highlight‚Ä¶"/>
    </label>

    <span id="stats" class="pill"></span>
  </div>

  <div id="graph"></div>
  <div class="note">Tip: Choose one branch (topic) and keep depth at 2‚Äì3 for clarity. Toggle ‚ÄúShow speaker labels‚Äù to see who introduced/discussed each node.</div>

<script>
  // -------- Embedded Data --------
  const DATA = {data_js};

  // -------- UI refs --------
  const topicSel   = document.getElementById('topic');
  const depthEl    = document.getElementById('depth');
  const depthVal   = document.getElementById('depthVal');
  const wrapEl     = document.getElementById('wrap');
  const wrapVal    = document.getElementById('wrapVal');
  const dirEl      = document.getElementById('direction');
  const hideX      = document.getElementById('hideX');
  const edgeLabels = document.getElementById('edgeLabels');
  const searchEl   = document.getElementById('search');
  const statsEl    = document.getElementById('stats');

  // Populate topics
  const TOPICS = ["All Topics", ...(DATA.main_topics||[]).map(t => t.topic).filter(Boolean)];
  TOPICS.forEach(t => {{
    const opt = document.createElement('option');
    opt.value = t; opt.textContent = t;
    topicSel.appendChild(opt);
  }});

  depthEl.addEventListener('input', () => depthVal.textContent = depthEl.value);
  wrapEl .addEventListener('input', () => wrapVal.textContent  = wrapEl.value);

  // -------- Utils --------
  function wrapLabel(s, width) {{
    s = (s||"").trim().replace(/\\s+/g, " ");
    if (s.length <= width) return s;
    const out = []; let line = [], ln = 0;
    for (const w of s.split(" ")) {{
      const extra = line.length ? 1 : 0;
      if (ln + w.length + extra > width) {{
        out.push(line.join(" ")); line=[w]; ln = w.length;
      }} else {{ line.push(w); ln += w.length + extra; }}
    }}
    if (line.length) out.push(line.join(" "));
    return out.join("<br>");
  }}

  function tint(hex, sentiment) {{
    if (!sentiment || sentiment === "neutral") return hex;
    const r = parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
    let R=r,G=g,B=b;
    if (sentiment === "positive") {{ R=Math.min(255, Math.round(r*1.10)); G=Math.min(255, Math.round(g*1.10)); B=Math.min(255, Math.round(b*1.10)); }}
    if (sentiment === "negative") {{ R=Math.round(r*0.80); G=Math.round(g*0.80); B=Math.round(b*0.80); }}
    return "#" + [R,G,B].map(v => v.toString(16).padStart(2,"0")).join("");
  }}

  function truncateList(list, maxItems=3) {{
    if (!Array.isArray(list)) return "";
    const items = list.filter(Boolean);
    if (items.length <= maxItems) return items.join(", ");
    return items.slice(0, maxItems).join(", ") + " +" + (items.length - maxItems);
  }}

  // -------- Build graph data (edge attribution) --------
  function buildData(opts) {{
    const {{
      selectedTopic = "All Topics",
      maxDepth = 10,
      hideCrosslinks = false,
      wrap = 36,
      showEdgeLabels = true
    }} = opts;

    const nodes = new vis.DataSet();
    const edges = new vis.DataSet();

    const palette = {{
      root:     getComputedStyle(document.documentElement).getPropertyValue('--root').trim(),
      topic:    getComputedStyle(document.documentElement).getPropertyValue('--topic').trim(),
      subtopic: getComputedStyle(document.documentElement).getPropertyValue('--subtopic').trim(),
      leaf:     getComputedStyle(document.documentElement).getPropertyValue('--leaf').trim()
    }};

    const addNode = (id, role, label, sentiment) => {{
      if (!id || nodes.get(id)) return;
      const color = tint(palette[role] || palette.leaf, sentiment);
      nodes.add({{
        id,
        label: wrapLabel(label||id, wrap),
        title: label||id,
        color,
        shape: "box",
        margin: 10,
        font: {{ multi: "html", size: 14 }}
      }});
    }};

    const addEdge = (from, to, label, title) => {{
      if (!from || !to) return;
      const e = {{ from, to }};
      if (showEdgeLabels && label) e.label = label;
      if (title) e.title = title;
      edges.add(e);
    }};

    const root = DATA.root || DATA.title || "Mind Map";
    addNode(root, "root", root);

    // Edge attribution for topics: "introduced by ..."
    function addTopicBranch(topicObj) {{
      const tname = topicObj.topic;
      addNode(tname, "topic", tname, topicObj.sentiment);

      let topicEdgeLabel = "";
      let topicEdgeTitle = "";
      const introBy = topicObj.introduced_by;
      const introAt = topicObj.introduced_at;
      if (introBy) {{
        topicEdgeLabel = "introduced by " + introBy;
        topicEdgeTitle = "<b>introduced by</b>: " + introBy + (introAt ? "<br><b>at</b>: " + introAt : "");
      }}
      addEdge(root, tname, topicEdgeLabel, topicEdgeTitle);

      // Subtopics: attribution from discussed_by / stance goes on the edge Topic‚ÜíSubtopic
      for (const s of (topicObj.subtopics || [])) {{
        const sname = s.subtopic;
        addNode(sname, "subtopic", sname, s.sentiment);

        const whoList = (s.discussed_by || []).filter(Boolean);
        const stance  = s.stance;
        const labelWho = truncateList(whoList, 2);
        let subEdgeLabel = "";
        let subEdgeTitle = "";
        if (labelWho || stance) {{
          const parts = [];
          if (labelWho) parts.push("discussed by " + labelWho);
          if (stance)   parts.push("stance: " + stance);
          subEdgeLabel = parts.join(" ¬∑ ");
          subEdgeTitle = parts.map(p => "<b>" + p.split(":")[0] + "</b>: " + (p.split(":")[1] || "").trim()).join("<br>");
        }}
        addEdge(tname, sname, subEdgeLabel, subEdgeTitle);

        // Optional leaves (entities/notes) under Subtopic (no attribution here to keep it clean)
        const entities = Array.isArray(s.entities) ? s.entities : [];
        const notes    = Array.isArray(s.notes) ? s.notes : [];
        const leaves   = [];
        for (const x of entities) {{
          if (typeof x === 'string') leaves.push(x);
          else if (x && (x.name || x.text)) leaves.push(x.name || x.text);
        }}
        for (const y of notes) {{
          if (typeof y === 'string') leaves.push(y);
          else if (y && (y.name || y.text)) leaves.push(y.name || y.text);
        }}
        for (const leaf of leaves) {{
          addNode(leaf, "leaf", leaf, null);
          addEdge(sname, leaf, "", "");
        }}
      }}
    }}

    if (selectedTopic === "All Topics") {{
      for (const t of (DATA.main_topics || [])) addTopicBranch(t);
    }} else {{
      const t = (DATA.main_topics || []).find(x => x.topic === selectedTopic);
      if (t) addTopicBranch(t);
    }}

    // Cross-link relationships (attribution on those edges too), unless hidden
    if (!hideCrosslinks) {{
      for (const r of (DATA.relationships || [])) {{
        const frm = r.from, to = r.to; if (!frm || !to) continue;
        // Ensure nodes exist (type inference is minimal here)
        addNode(frm, "leaf", frm, null);
        addNode(to,  "leaf", to,  null);
        let lbl = "";
        let ttl = "";
        if (r.type) lbl = r.type;
        const by = r.initiated_by, at = r.initiated_at;
        const extras = [];
        if (by) extras.push("by " + by);
        if (at) extras.push("at " + at);
        if (extras.length) lbl = (lbl ? lbl + " ¬∑ " : "") + extras.join(" ");
        if (r.type) ttl += "<b>type</b>: " + r.type;
        if (by)     ttl += (ttl ? "<br>" : "") + "<b>by</b>: " + by;
        if (at)     ttl += (ttl ? "<br>" : "") + "<b>at</b>: " + at;
        addEdge(frm, to, lbl, ttl);
      }}
    }}

    // Depth prune (root-out)
    const adj = new Map(); nodes.forEach(n => adj.set(n.id, []));
    edges.forEach(e => {{ if (adj.has(e.from)) adj.get(e.from).push(e.to); }});
    const keep = new Set([root]); let frontier = [root], d=0;
    while (frontier.length && d < maxDepth) {{
      const nxt = [];
      for (const u of frontier) {{
        const kids = adj.get(u) || [];
        for (const v of kids) if (!keep.has(v)) {{ keep.add(v); nxt.push(v); }}
      }}
      frontier = nxt; d += 1;
    }}
    const n2 = new vis.DataSet(nodes.get().filter(n => keep.has(n.id)));
    const kept = new Set(n2.getIds());
    const e2 = new vis.DataSet(edges.get().filter(e => kept.has(e.from) && kept.has(e.to)));

    return {{ nodes: n2, edges: e2 }};
  }}

  // -------- Render --------
  const container = document.getElementById('graph');
  let network = null;

  function render() {{
    const opts = {{
      selectedTopic: topicSel.value || "All Topics",
      maxDepth: parseInt(depthEl.value, 10),
      hideCrosslinks: hideX.checked,
      wrap: parseInt(wrapEl.value, 10),
      showEdgeLabels: edgeLabels.checked
    }};
    const data = buildData(opts);

    const options = {{
      layout: {{
        hierarchical: {{
          enabled: true,
          direction: dirEl.value,   // "LR" or "UD"
          sortMethod: "hubsize",
          levelSeparation: 230,
          nodeSpacing: 210,
          treeSpacing: 280
        }}
      }},
      physics: {{ enabled: false }},
      nodes: {{
        shape: "box",
        color: {{
          border: "#ECEFF1",
          highlight: {{ border: "#90CAF9", background: "#E3F2FD" }}
        }},
        widthConstraint: {{ maximum: 260 }}
      }},
      edges: {{
        smooth: {{ type: "continuous" }},
        color: {{ color: getComputedStyle(document.documentElement).getPropertyValue('--edge').trim() }},
        arrows: {{ to: {{ enabled: false }} }},
        font: {{ align: "top", size: 11, color: "#546E7A", background: "#FAFAFA" }}
      }},
      interaction: {{ hover: true, tooltipDelay: 80 }}
    }};

    container.innerHTML = "";
    network = new vis.Network(container, data, options);

    // Client-side highlight
    const q = (searchEl.value || "").trim().toLowerCase();
    if (q) {{
      const ids = data.nodes.getIds();
      for (const id of ids) {{
        const n = data.nodes.get(id);
        const lbl = (n.title || "").toLowerCase();
        if (lbl.includes(q)) {{
          data.nodes.update({{ id, color: getComputedStyle(document.documentElement).getPropertyValue('--highlight').trim() }});
        }}
      }}
    }}

    statsEl.textContent = data.nodes.length + " nodes ¬∑ " + data.edges.length + " edges";
  }}

  // Events
  [topicSel, depthEl, wrapEl, dirEl, hideX, edgeLabels].forEach(el => el.addEventListener('input', render));
  searchEl.addEventListener('input', render);

  // Init
  topicSel.value = "All Topics";
  render();
</script>
</body>
</html>
"""

Path(OUT_HTML).write_text(html_doc, encoding="utf-8")
print(f"‚úÖ Wrote {OUT_HTML}. Open it in your browser.")


‚úÖ Wrote Final_mindmap_app.html. Open it in your browser.


FOR SPEAKER IDENTIFICATION AND CONTEXTUAL INFORMATION

In [6]:
from speaker_identify import extract_speakers, normalize_header_to_name, unique_preserve_order
import os
import re
import json
import time
from pathlib import Path
from dotenv import load_dotenv
from speaker_identify import google_search_person

load_dotenv()

API_KEY = os.getenv("CUSTOM_SEARCH_API_KEY", "").strip()
CX = os.getenv("CUSTOM_SEARCH_ENGINE_ID", "").strip()

### REPLACE WITH LABELLED TRANSCRIPT FILE
input_path = "namedREALTIME_transcript.txt"  # transcript file
out_dir = "out_speakers"  # where to save outputs
pages = 1  # Google pages per speaker (10 results/page)
pause = 1.5  # seconds between API calls
query_template = '"{name}"'  # search pattern
dry_run = False  # True = skip Google API calls (for testing)

text = Path(input_path).read_text(encoding="utf-8", errors="ignore")

raw_headers = extract_speakers(text)
names = [normalize_header_to_name(h) for h in raw_headers if h.strip()]
speakers = unique_preserve_order(names)
# print(text)
# print(raw_headers)
# print(text)
# print(speakers)

out_dir = Path(out_dir)
(out_dir / "profiles").mkdir(parents=True, exist_ok=True)

print(f"‚úÖ Found {len(speakers)} unique speakers:\n")
for i, s in enumerate(speakers, 1):
    print(f"{i:>2}. {s}")

summary = {
    "input": input_path,
    "total_unique_speakers": len(speakers),
    "query_template": query_template,
    "pages": pages,
    "pause": pause,
    "speakers": [],
    "generated_at": time.time(),
}

if dry_run:
    Path(out_dir / "speakers_summary.json").write_text(
        json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8"
    )
    print(f"\n--dry-run=True: wrote only {out_dir/'speakers_summary.json'}")
else:
    if not API_KEY or not CX:
        raise SystemExit("‚ùå Missing CUSTOM_SEARCH_API_KEY or CUSTOM_SEARCH_ENGINE_ID (.env)")

    for name in speakers:
        print(f"\n[+] Fetching: {name}")
        try:
            info = google_search_person(
                query_template=query_template,
                person_name=name,
                api_key=API_KEY,
                cx=CX,
                num_pages=pages,
                pause=pause
            )
        except Exception as e:
            print(f"   ‚úó Error for {name}: {e}")
            info = {
                "query": name,
                "rendered_query": query_template.format(name=name),
                "total_results": 0,
                "texts": [],
                "links": [],
                "error": str(e),
            }

        slug = re.sub(r"[^0-9A-Za-z\-_]+", "_", name).strip("_")
        out_file = out_dir / "profiles" / f"{slug}_info.json"
        out_file.write_text(json.dumps(info, ensure_ascii=False, indent=2), encoding="utf-8")

        summary["speakers"].append({
            "name": name,
            "file": str(out_file),
            "total_results": info.get("total_results", 0),
        })

    Path(out_dir / "speakers_summary.json").write_text(
        json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8"
    )

    print(f"\n‚úÖ Done.\nProfiles saved under: {out_dir/'profiles'}\nSummary: {out_dir/'speakers_summary.json'}")


‚úÖ Found 2 unique speakers:

 1. Pranshul Bhatnagar
 2. Mobasserul Haque

[+] Fetching: Pranshul Bhatnagar

[+] Fetching: Mobasserul Haque

‚úÖ Done.
Profiles saved under: out_speakers/profiles
Summary: out_speakers/speakers_summary.json


SEND TO FAISS

In [None]:
from RAG_FRAMEWORK import (
    PersonDatabase,
    prepare_mindmap_chunks,
    build_mindmap_index,
    query_both_indexes,
    make_rag_make_sense,
)
# Load person JSON from scraper
person_db = PersonDatabase()
json_folder= "out_speakers/profiles"


# Iterate over all JSON files in the folder
for filename in os.listdir(json_folder):
    if filename.endswith(".json"):
        json_path = os.path.join(json_folder, filename)
        print(f"Processing {json_path}...")
        with open(json_path, "r") as f:
            scraper_json = json.load(f)
        person_db.load_from_scraper_json(scraper_json)


# Build FAISS
person_db.build_person_chunks()
person_db.create_faiss_index()

# Load mindmap
with open("mindmap.json", "r") as f:
    mindmap_json = json.load(f)
    # print(mindmap_json)
mindmap_chunks = prepare_mindmap_chunks(mindmap_json, person_db)
print("________________")
print(mindmap_chunks)
mindmap_index = build_mindmap_index(mindmap_chunks)

print("\nEnter your questions about the conversation (type 'exit' to quit):")
history={}

while True:
    
    query = input("\nYour query: ").strip()
    if query.lower() in ["exit", "quit"]:
        print("Exiting interactive session.")
        break
    
    results = query_both_indexes(mindmap_index, mindmap_chunks, person_db, query)
    for r in results:
        print(f"{r['source']} | {r.get('person_id')} | {r['text'][:100]}...")

    # RAG answer
    answer = make_rag_make_sense(query, results, history)
    print("\nüí° Answer:\n", answer)

    history[query] = answer
    
    print("\n‚úÖ Chat History:")
    print("="*20)
    for q, a in history.items():
        print(f"Q: {q}\nA: {a}\n")
    print("="*20)




Processing out_speakers/profiles/Adil_K_Gazder_info.json...
Processing out_speakers/profiles/Adil_Gazder_info.json...
Processing out_speakers/profiles/Adil_Keku_Gazder_info.json...
Processing out_speakers/profiles/Lewis_Hamilton_info.json...
________________
[{'id': '1ef613bc-6cf6-4bf8-a4c3-9603d54e2e93', 'text': 'Topic: Favorite Sport & NBA Team introduced by Adil Gazder at 00:00:01. Sentiment: neutral.', 'metadata': {'type': 'topic', 'introduced_by': 'Adil Gazder', 'person_id': 'adil_gazder'}}, {'id': '032bde06-df3e-4123-8fca-63516a6ddd9f', 'text': "Subtopic: Lewis's Favorite Sport (Basketball) introduced by Lewis Hamilton (neutral toward Favorite Sport & NBA Team). Discussed by Adil Gazder, Lewis Hamilton. Sentiment: neutral", 'metadata': {'type': 'subtopic', 'introduced_by': 'Lewis Hamilton', 'person_id': 'lewis_hamilton'}}, {'id': 'df947eeb-14db-4c27-8e3d-12d0cf4c7ec2', 'text': "Subtopic: Lewis's Favorite NBA Team (OKC) introduced by Lewis Hamilton (neutral toward Favorite Sport &