In [1]:
# Cell 1: Setup and Imports
import os
import sys
from pathlib import Path
from dotenv import load_dotenv

# Add project root to Python path
project_root = Path.cwd().parent
sys.path.append(str(project_root))

# Load environment variables
load_dotenv(project_root / '.env')

# Test API key
assemblyai_key = os.getenv('ASSEMBLYAI_API_KEY')
print(f"AssemblyAI API Key loaded: {'✅' if assemblyai_key else '❌'}")
print(f"Key starts with: {assemblyai_key[:10] if assemblyai_key else 'None'}...")


AssemblyAI API Key loaded: ✅
Key starts with: 972365f41d...


In [2]:
import sqlite3

conn = sqlite3.connect("data/app.db")
cursor = conn.cursor()

# Get all table names
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

print("📊 Tables in app.db:")
for table in tables:
    table_name = table[0]
    
    # Get column info for each table
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns = cursor.fetchall()
    
    print(f"\n🔧 {table_name}:")
    for col in columns:
        print(f"   - {col[1]} ({col[2]})")  # column_name (type)

conn.close()

📊 Tables in app.db:

🔧 conversations:
   - id (INTEGER)
   - title (TEXT)
   - raw_text (TEXT)
   - source (TEXT)
   - word_count (INTEGER)
   - created_at (DATETIME)
   - status (TEXT)

🔧 sqlite_sequence:
   - name ()
   - seq ()

🔧 blog_post_ideas:
   - id (INTEGER)
   - conversation_id (INTEGER)
   - title (TEXT)
   - description (TEXT)
   - usefulness_potential (INTEGER)
   - fitwith_seo_strategy (INTEGER)
   - fitwith_content_strategy (INTEGER)
   - inspiration_potential (INTEGER)
   - collaboration_potential (INTEGER)
   - innovation (INTEGER)
   - difficulty (INTEGER)
   - total_score (INTEGER)
   - sent_to_prod (BOOLEAN)
   - raw_llm_response (TEXT)
   - created_at (DATETIME)

🔧 processing_status:
   - id (INTEGER)
   - conversation_id (INTEGER)
   - stage (TEXT)
   - status (TEXT)
   - error_message (TEXT)
   - started_at (DATETIME)
   - completed_at (DATETIME)


In [3]:
# Cell 2: Import Dependencies
import assemblyai as aai
from langgraph.graph import StateGraph
from typing import TypedDict, Optional, List  # ← Added List here
import glob
import time
from pathlib import Path  # ← Also added Path here

# Import our database
from database.db_operations import db
from database.models import ConversationCreate

print("✅ All imports successful")

✅ All imports successful


In [4]:
## 1. Pydantic Model for Structured Output

from pydantic import BaseModel, Field
from typing import List, Optional
from enum import Enum

class SpeakerRole(str, Enum):
    """Possible speaker roles in the conversation"""
    CLIENT = "client"
    INTERVIEWER = "interviewer"

class Speaker(BaseModel):
    """Information about a person speaking in the conversation"""
    name: Optional[str] = Field(default=None, description="Name of the speaker if mentioned")
    role: Optional[SpeakerRole] = Field(default=None, description="Role of the speaker in the conversation")
    company: Optional[str] = Field(default=None, description="Company they work for if mentioned")

class Challenge(BaseModel):
    """A challenge or problem mentioned in the conversation"""
    description: Optional[str] = Field(default=None, description="Description of the challenge")
    impact: Optional[str] = Field(default=None, description="How this challenge affects them")
    urgency: Optional[str] = Field(default=None, description="Low, Medium, or High urgency")

class CurrentSolution(BaseModel):
    """How they currently solve their problems"""
    solution: Optional[str] = Field(default=None, description="What they're currently doing")
    satisfaction_level: Optional[str] = Field(default=None, description="How satisfied they are: Very Satisfied, Satisfied, Neutral, Unsatisfied, Very Unsatisfied")
    limitations: Optional[List[str]] = Field(default=[], description="Limitations of current solution")

class Need(BaseModel):
    """A need identified using psychology frameworks like NVC"""
    need_category: Optional[str] = Field(default=None, description="Category of need (e.g., autonomy, efficiency, security, connection)")
    description: Optional[str] = Field(default=None, description="Specific need description")
    intensity: Optional[str] = Field(default=None, description="Low, Medium, or High intensity")

class ExtractedInsights(BaseModel):
    """Complete structured output from conversation analysis"""
    
    # Speakers
    speakers: Optional[List[Speaker]] = Field(default=[], description="People identified in the conversation")
    
    # What they care about
    core_values: Optional[List[str]] = Field(default=[], description="What this person/company cares about most")
    priorities: Optional[List[str]] = Field(default=[], description="Their current priorities and focus areas")
    
    # Challenges
    primary_challenges: Optional[List[Challenge]] = Field(default=[], description="Main problems they're facing")
    secondary_challenges: Optional[List[Challenge]] = Field(default=[], description="Secondary or related problems")
    
    # Current solutions
    current_solutions: Optional[List[CurrentSolution]] = Field(default=[], description="How they solve problems today")
    
    # Needs analysis
    psychological_needs: Optional[List[Need]] = Field(default=[], description="Underlying needs using NVC or similar frameworks")
 


In [5]:
# Cell 3: Define LangGraph State
class AudioPipelineState(TypedDict):
    file_path: str
    filename: str
    transcript_text: Optional[str]
    conversation_id: Optional[int]
    extracted_insights: Optional[ExtractedInsights]  # ← NEW: Using our Pydantic model
    error: Optional[str]
    status: str

print("✅ State defined")

✅ State defined


In [6]:
# Cell 4: Test AssemblyAI Connection
# Configure AssemblyAI
aai.settings.api_key = os.getenv('ASSEMBLYAI_API_KEY')

# Test with a simple transcription (we'll use a file from temp folder)
def test_assemblyai_connection():
    """Test if AssemblyAI is working"""
    try:
        # Just test the API key is valid
        transcriber = aai.Transcriber()
        print("✅ AssemblyAI connection successful")
        return True
    except Exception as e:
        print(f"❌ AssemblyAI connection failed: {e}")
        return False

test_assemblyai_connection()

✅ AssemblyAI connection successful


True

In [18]:
# Cell 5: Batch File Discovery and Management
def find_audio_files(temp_folder: Path) -> List[Path]:
    """Find all audio files in temp folder"""
    audio_extensions = ['*.wav', '*.mp3', '*.m4a']
    audio_files = []
    
    for ext in audio_extensions:
        audio_files.extend(temp_folder.glob(ext))
    
    return sorted(audio_files)

def display_batch_info(audio_files: List[Path]):
    """Display information about the batch of files"""
    if not audio_files:
        print("❌ No audio files found in temp folder!")
        return False
    
    total_size_mb = sum(f.stat().st_size for f in audio_files) / (1024 * 1024)
    
    print(f"📊 BATCH PROCESSING INFO:")
    print(f"   Files to process: {len(audio_files)}")
    print(f"   Total size: {total_size_mb:.1f} MB")
    print(f"\n📁 Files found:")
    
    for i, file_path in enumerate(audio_files, 1):
        size_mb = file_path.stat().st_size / (1024 * 1024)
        print(f"   {i}. {file_path.name} ({size_mb:.1f} MB)")
    
    return True

def cleanup_processed_files(processed_files: List[Path]):
    """Delete all successfully processed files"""
    print(f"\n🗑️ CLEANUP: Deleting {len(processed_files)} processed files...")
    deleted_count = 0
    
    for file_path in processed_files:
        try:
            file_path.unlink()  # Delete file
            print(f"   ✅ Deleted: {file_path.name}")
            deleted_count += 1
        except Exception as e:
            print(f"   ❌ Failed to delete {file_path.name}: {e}")
    
    print(f"🗑️ Cleanup complete: {deleted_count}/{len(processed_files)} files deleted")

# Discover files in temp folder
temp_folder = project_root / 'data' / 'temp'
temp_folder.mkdir(parents=True, exist_ok=True)  # Ensure folder exists

audio_files = find_audio_files(temp_folder)
files_available = display_batch_info(audio_files)

if files_available:
    print(f"\n🚀 Ready to process {len(audio_files)} files!")
else:
    print("\n💡 TIP: Add .wav files to data/temp/ folder for testing")

📊 BATCH PROCESSING INFO:
   Files to process: 1
   Total size: 24.2 MB

📁 Files found:
   1. blog_record (2025-10-26 11_58_14).wav (24.2 MB)

🚀 Ready to process 1 files!


In [None]:
# Batch Processing Function (Updated with Full Insights Display)
def process_audio_batch(audio_files: List[Path], pipeline) -> dict:
    """Process all audio files in batch with detailed insights display"""
    
    if not audio_files:
        print("❌ No files to process")
        return {"processed": [], "failed": [], "total": 0}
    
    print(f"\n🚀 STARTING BATCH PROCESSING - {len(audio_files)} files")
    print("=" * 60)
    
    processed_files = []
    failed_files = []
    results = []
    
    for i, file_path in enumerate(audio_files, 1):
        print(f"\n📂 Processing {i}/{len(audio_files)}: {file_path.name}")
        print("-" * 40)
        
        # Create initial state
        initial_state = {
            "file_path": str(file_path),
            "filename": file_path.name,
            "transcript_text": None,
            "conversation_id": None,
            "extracted_insights": None,  
            "error": None,
            "status": "processing"
        }
        
        try:
            # Run through pipeline
            result = pipeline.invoke(initial_state)
            
            if result["status"] in ["completed", "insights_extracted"]:
                print(f"✅ SUCCESS: {file_path.name}")
                print(f"   Conversation ID: {result['conversation_id']}")
                print(f"   Transcript preview: {result['transcript_text'][:100]}...")
                
                # FULL INSIGHTS DISPLAY
                if result.get('extracted_insights'):
                    insights = result['extracted_insights']
                    print(f"\n🧠 === EXTRACTED INSIGHTS FOR: {file_path.name} ===")
                    print("=" * 50)
                    
                    # Speakers
                    if insights.speakers:
                        print("👥 SPEAKERS:")
                        for speaker in insights.speakers:
                            print(f"   • Name: {speaker.name or 'Unknown'}")
                            print(f"     Role: {speaker.role or 'Unknown'}")  
                            print(f"     Company: {speaker.company or 'Unknown'}")
                    
                    # Core Values
                    if insights.core_values:
                        print("💎 CORE VALUES:")
                        for value in insights.core_values:
                            print(f"   • {value}")
                    
                    # Priorities
                    if insights.priorities:
                        print("🎯 PRIORITIES:")
                        for priority in insights.priorities:
                            print(f"   • {priority}")
                    
                    # Primary Challenges
                    if insights.primary_challenges:
                        print("🔥 PRIMARY CHALLENGES:")
                        for challenge in insights.primary_challenges:
                            print(f"   • Challenge: {challenge.description}")
                            print(f"     Impact: {challenge.impact}")
                            print(f"     Urgency: {challenge.urgency}")
                    
                    # Secondary Challenges
                    if insights.secondary_challenges:
                        print("⚠️  SECONDARY CHALLENGES:")
                        for challenge in insights.secondary_challenges:
                            print(f"   • Challenge: {challenge.description}")
                            print(f"     Impact: {challenge.impact}")
                            print(f"     Urgency: {challenge.urgency}")
                    
                    # Current Solutions
                    if insights.current_solutions:
                        print("🔧 CURRENT SOLUTIONS:")
                        for solution in insights.current_solutions:
                            print(f"   • Solution: {solution.solution}")
                            print(f"     Satisfaction: {solution.satisfaction_level}")
                            if solution.limitations:
                                print(f"     Limitations: {', '.join(solution.limitations)}")
                    
                    # Psychological Needs
                    if insights.psychological_needs:
                        print("🧘 PSYCHOLOGICAL NEEDS:")
                        for need in insights.psychological_needs:
                            print(f"   • {need.description}")
                            print(f"     Category: {need.need_category}")
                            print(f"     Intensity: {need.intensity}")
                    
                    print("🧠 === END INSIGHTS ===")
                    print("-" * 50)
                
                processed_files.append(file_path)
            else:
                print(f"❌ FAILED: {file_path.name}")
                print(f"   Status: {result.get('status', 'Unknown')}")
                print(f"   Error: {result.get('error', 'Unknown error')}")
                failed_files.append(file_path)
            
            results.append(result)
            
        except Exception as e:
            print(f"❌ PIPELINE ERROR: {file_path.name}")
            print(f"   Exception: {str(e)}")
            failed_files.append(file_path)
            
            results.append({
                **initial_state,
                "error": str(e),
                "status": "pipeline_error"
            })
    
    # Final Summary
    print(f"\n📊 BATCH PROCESSING COMPLETE!")
    print("=" * 60)
    print(f"✅ Successfully processed: {len(processed_files)}")
    print(f"❌ Failed: {len(failed_files)}")
    print(f"📁 Total files: {len(audio_files)}")
    
    if failed_files:
        print(f"\n❌ Failed files:")
        for failed_file in failed_files:
            print(f"   - {failed_file.name}")
    
    return {
        "processed": processed_files,
        "failed": failed_files,
        "total": len(audio_files),
        "results": results
    }

print("✅ Batch processing function ready with full insights display")

✅ Batch processing function ready


In [8]:
# Cell 6: Define LangGraph Nodes
def transcription_node(state: AudioPipelineState) -> AudioPipelineState:
    """Node 1: Transcribe audio file with AssemblyAI"""
    try:
        print(f"🎙️ Transcribing: {state['filename']}")
        
        # Configure transcriber
        transcriber = aai.Transcriber()
        
        # Transcribe the file
        transcript = transcriber.transcribe(state['file_path'])
        
        if transcript.status == aai.TranscriptStatus.error:
            return {
                **state,
                "error": f"AssemblyAI error: {transcript.error}",
                "status": "transcription_failed"
            }
        
        return {
            **state,
            "transcript_text": transcript.text,
            "status": "transcribed"
        }
        
    except Exception as e:
        return {
            **state,
            "error": f"Transcription error: {str(e)}",
            "status": "transcription_failed"
        }

def database_saver_node(state: AudioPipelineState) -> AudioPipelineState:
    """Node 2: Save transcript to database"""
    try:
        print(f"💾 Saving to database: {state['filename']}")
        
        # Create conversation object
        conversation = ConversationCreate(
            title=f"Audio: {state['filename']}",
            raw_text=state['transcript_text'],
            source="transcribed"
        )
        
        # Save to database
        conversation_id = db.create_conversation(conversation)
        
        return {
            **state,
            "conversation_id": conversation_id,
            "status": "completed"
        }
        
    except Exception as e:
        return {
            **state,
            "error": f"Database error: {str(e)}",
            "status": "database_failed"
        }

print("✅ LangGraph nodes defined")

✅ LangGraph nodes defined


In [14]:
# Cell: Build Current Pipeline (3 Nodes)
def build_pipeline():
    """Build the current LangGraph workflow with transcription, save, and insights"""
    workflow = StateGraph(AudioPipelineState)
    
    # Add current nodes
    workflow.add_node("transcribe", transcription_node)
    workflow.add_node("save_to_db", database_saver_node)  
    workflow.add_node("extract_insights", pain_extractor_node)
    
    # Chain them together
    workflow.add_edge("transcribe", "save_to_db")
    workflow.add_edge("save_to_db", "extract_insights")
    
    workflow.set_entry_point("transcribe")
    workflow.set_finish_point("extract_insights")
    
    return workflow.compile()

# Build the pipeline
pipeline = build_pipeline()
print("✅ LangGraph pipeline compiled (3 nodes: transcribe → save_to_db → extract_insights)")

✅ LangGraph pipeline compiled (3 nodes: transcribe → save_to_db → extract_insights)


In [26]:
# Cell: Clean Conversations Table
def clean_conversations_table():
    """Delete all records from conversations table"""
    
    # First show what will be deleted
    conversations = db.get_all_conversations()
    print(f"📊 Found {len(conversations)} conversations to delete:")
    for conv in conversations[:5]:  # Show first 5
        print(f"  - ID {conv.id}: {conv.title}")
    if len(conversations) > 5:
        print(f"  ... and {len(conversations) - 5} more")
    
    # Ask for confirmation
    response = input(f"\n❓ Delete all {len(conversations)} conversations? (y/N): ")
    
    if response.lower() in ['y', 'yes']:
        conn = db.get_connection()
        try:
            cursor = conn.cursor()
            
            # Delete all conversations (this will also delete related blog_post_ideas due to foreign key)
            cursor.execute("DELETE FROM blog_post_ideas")
            cursor.execute("DELETE FROM processing_status") 
            cursor.execute("DELETE FROM conversations")
            conn.commit()
            
            print("✅ All conversations deleted!")
            print("✅ Related blog ideas deleted!")
            print("✅ Processing status cleared!")
            
        finally:
            conn.close()
    else:
        print("❌ Deletion cancelled")

# Run the cleaner
clean_conversations_table()

📊 Found 4 conversations to delete:
  - ID 8: Audio: blog_record (2025-10-26 11_58_14).wav
  - ID 7: Audio: blog_record (2025-10-26 11_58_14).wav
  - ID 6: Audio: blog_record (2025-10-26 11_58_14).wav
  - ID 5: Audio: blog_record (2025-10-26 11_58_14).wav
✅ All conversations deleted!
✅ Related blog ideas deleted!
✅ Processing status cleared!


In [19]:
# Cell 9: Execute Batch Processing with Cleanup
if files_available:
    print("🎯 Starting batch processing...")
    
    # Process all files
    batch_results = process_audio_batch(audio_files, pipeline)
    
    # Display summary
    print(f"\n📊 BATCH PROCESSING COMPLETE!")
    print("=" * 60)
    print(f"✅ Successfully processed: {len(batch_results['processed'])}")
    print(f"❌ Failed: {len(batch_results['failed'])}")
    print(f"📁 Total files: {batch_results['total']}")
    
    # Show failed files
    if batch_results['failed']:
        print(f"\n❌ Failed files:")
        for file_path in batch_results['failed']:
            print(f"   - {file_path.name}")
    
    # Cleanup successfully processed files
    if batch_results['processed']:
        confirm = input(f"\n🗑️ Delete {len(batch_results['processed'])} processed files? (y/N): ")
        if confirm.lower() in ['y', 'yes']:
            cleanup_processed_files(batch_results['processed'])
        else:
            print("🔧 Files kept in temp folder for inspection")
    
    print("\n🎉 Batch processing complete!")
    
else:
    print("💡 Add audio files to data/temp/ folder and rerun this cell")

🎯 Starting batch processing...

🚀 STARTING BATCH PROCESSING - 1 files

📂 Processing 1/1: blog_record (2025-10-26 11_58_14).wav
----------------------------------------
🎙️ Transcribing: blog_record (2025-10-26 11_58_14).wav
💾 Saving to database: blog_record (2025-10-26 11_58_14).wav
🧠 Starting pain extraction...
✅ Extracted insights: 1 primary challenges, 1 speakers
✅ SUCCESS: blog_record (2025-10-26 11_58_14).wav
   Conversation ID: 8
   Transcript preview: My name is Hugo and I am the CTO of Drone flytech. At Drone flightek, one of our primary challenges ...
   🧠 Insights extracted: 1 challenges found
   💎 Core values: efficiency, transparency...

📊 BATCH PROCESSING COMPLETE!
✅ Successfully processed: 1
❌ Failed: 0
📁 Total files: 1
🔧 Files kept in temp folder for inspection

🎉 Batch processing complete!


In [11]:
# Cell 7: Setup Anthropic LLM for Insights Extraction (FIXED)
from langchain_anthropic import ChatAnthropic
import json

# Initialize Anthropic with correct model name
anthropic_key = os.getenv('ANTHROPIC_API_KEY')
if not anthropic_key:
    print("⚠️  ANTHROPIC_API_KEY not found in .env file")
    print("Please add: ANTHROPIC_API_KEY=your_key_here")
else:
    llm = ChatAnthropic(
        model="claude-3-5-sonnet-20241022",  # ← Updated model name
        api_key=anthropic_key,
        temperature=0.1
    )
    print("✅ Anthropic LLM initialized with Claude 3.5 Sonnet")

✅ Anthropic LLM initialized with Claude 3.5 Sonnet


In [12]:
## 3. PainExtractor Node Implementation


import openai
import json
from typing import Dict, Any

# System prompt
PAIN_EXTRACTOR_SYSTEM_PROMPT = """
You are a UX researcher and business analyst for BigKids Automation. Your job is listening to transcripts from interviews with users and potential clients. 

You pay special attention to problems that users have regarding how their company is automating, using web apps and AI to save time and move towards a more ethical and sovereign tech infrastructure.

You will be given the transcript of an interview with a user or potential client.

Your task is to extract structured information about:
- Who is speaking and their role
- What this person cares about (values, priorities)
- Their main primary and secondary challenges
- How they are solving problems today
- Are there AI agents that can assist them?
- Their underlying psychological needs (using frameworks like NVC - Non-Violent Communication)

Focus on automation, web apps, AI, time-saving, ethical tech, and sovereign infrastructure themes.

Be thorough but concise. 

IMPORTANT: Only extract information that is explicitly mentioned in the transcript. 
If information is not clearly stated, leave the field empty/null rather than guessing or inferring.
Do not hallucinate or make assumptions about missing information.
"""

In [13]:
def extract_insights_from_transcript(transcript: str) -> ExtractedInsights:
    """Extract structured insights using Anthropic Claude with proper JSON structure"""
    
    prompt = f"""
    Analyze this conversation transcript and extract structured insights in the exact JSON format below:

    Transcript: {transcript}

    Return ONLY valid JSON in this exact structure:
    {{
        "speakers": [
            {{
                "name": "Hugo",
                "role": "client", 
                "company": "Drone flytech"
            }}
        ],
        "core_values": ["efficiency", "transparency"],
        "priorities": ["improving financial processes"],
        "primary_challenges": [
            {{
                "description": "Tracking who paid which invoice",
                "impact": "Creates confusion in financial processes",
                "urgency": "High"
            }}
        ],
        "secondary_challenges": [],
        "current_solutions": [
            {{
                "solution": "Using MoneyOak software",
                "satisfaction_level": "Unsatisfied", 
                "limitations": ["inadequate functionality", "limited visibility"]
            }}
        ],
        "psychological_needs": [
            {{
                "need_category": "security",
                "description": "Confidence in financial operations",
                "intensity": "High"
            }}
        ]
    }}

    Important: 
    - Return ONLY the JSON, no other text
    - Use exact field names as shown
    - For urgency use: "Low", "Medium", or "High"
    - For satisfaction_level use: "Very Satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very Unsatisfied"
    - For intensity use: "Low", "Medium", or "High"
    - For speaker role use: "client" or "interviewer"
    """
    
    try:
        # Use the Claude LLM
        response = llm.invoke(prompt)
        
        # Parse the JSON response
        insights_data = json.loads(response.content)
        
        # Convert to Pydantic model
        return ExtractedInsights(**insights_data)
        
    except json.JSONDecodeError as e:
        print(f"❌ JSON parsing error: {e}")
        print(f"📝 Raw response: {response.content[:500]}...")
        raise
    except Exception as e:
        print(f"❌ Error in LLM call: {e}")
        raise
def pain_extractor_node(state: AudioPipelineState) -> AudioPipelineState:
    """
    LangGraph node: Extract structured insights from conversation transcript
    """
    print("🧠 Starting pain extraction...")
    
    try:
        # Extract insights using OpenAI structured output
        insights = extract_insights_from_transcript(state['transcript_text'])
        
        if insights:
            print(f"✅ Extracted insights: {len(insights.primary_challenges)} primary challenges, {len(insights.speakers)} speakers")
            
            return {
                **state,
                "extracted_insights": insights,
                "status": "insights_extracted"
            }
        else:
            return {
                **state,
                "error": "Failed to extract insights from transcript",
                "status": "error"
            }
            
    except Exception as e:
        print(f"❌ Pain extraction failed: {e}")
        return {
            **state,
            "error": f"Pain extraction error: {str(e)}",
            "status": "error"
        }



In [21]:
## 4. Usage in Jupyter Notebook


# Cell: Test Pain Extractor Node
from openai import OpenAI

# Initialize OpenAI client
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

# Test with sample transcript
test_state = {
    "file_path": "data/temp/test.wav",
    "filename": "test.wav",
    "transcript_text": "Hi, I'm John from TechCorp. We're struggling with our manual processes. We spend hours every day on data entry and it's killing our productivity. We've tried some automation tools but they don't integrate well with our existing systems. What we really need is something that respects our data sovereignty and doesn't lock us into big tech platforms.",
    "conversation_id": 1,
    "extracted_insights": None,
    "blog_ideas": None,
    "error": None,
    "status": "transcribed"
}

# Run the pain extractor node
result_state = pain_extractor_node(test_state)

# Display results
if result_state.get('extracted_insights'):
    insights = result_state['extracted_insights']
    print("🎯 Extracted Insights:")
    print(f"Speakers: {[s.name for s in insights.speakers]}")
    print(f"Primary challenges: {len(insights.primary_challenges)}")



🧠 Starting pain extraction...
✅ Extracted insights: 1 primary challenges, 1 speakers
🎯 Extracted Insights:
Speakers: ['John']
Primary challenges: 1


In [None]:
# Cell 8A: Test extract_insights_from_transcript function
def test_extract_insights_standalone():
    """Test the insight extraction function with sample transcript"""
    
    # Sample transcript for testing
    sample_transcript = """
    my name is olivier masset. i work at Lean and Agile Department from Elia (company). 

my goal here in elia is to help my team to adopt a mindset of continuous improvement. whatever the techniques. it does not matter if it's lean agile or something else they just come into the office and we have a chat about what they would like to improve. we analyse the "as is" situation why they would like to apply lean or agile techniques or whatever, what they want to do and we support them in the journey to get there. 
##do you work with teams? 
half of my job is IT teams, half is business teams. for the IT has been a top down approach. for the other is a success story approach. they can see that it works in other teams and they come to us to do ask to do the same. 



<img src="IMG_20190509_165555.jpg" width="400" height="400" />
__Lean and Agile office supports elia teams in their cultural change towards a continuous improvement mindset.__ Introducing, implementing, sustaining. 
a)awareness: communication, training and networking withing Eila
b)new initiatives
c)support: coaching & training , try to raise their maturity, regular follow up for example on the rituals, obeya room,logistic around that
d)innovation new concepts, and techniques to try ex: if i read a new book (reinventing orgs) i try to implement it here. we try to be on top of the cultural change project. there is a program called "make a difference" we can have a sort of top down apporach. they have an impact in all the company. so is good we are together with them. when we create a workshop to provide feedback we support them with workshop design. if they like a technique they call us again for help on other meeting. 
f) networking outside elia
-events training: journee agile. other companies. meetups 
-suppliers
-companies

that is how we are doing the transition here. little by little. step by step. based on word of mouth. t

##team management
my boss is sparring partner. we do try to brainstorm together.  i have my own coach. stephan levaque.he is coming here 1 o 2 times a month. help me with mentoring. help me unblock some issues. sometimes we cocreate workshops together.i have also 2 other coaches. one in schaarbeek to help the team there and another one 100 of the time but only since 1 month. we coach around 300 people. 150 in IT and about the same in the business. for us we need 1 coach for 30 people who start and 1 coach for 
100 people on sustain mode not a new initiative they have been doing this for a while and you help them to go further. i used to be a alone for 300 people. i started 3 years ago. i started in 2017 and the lean project started in 2016. they did the begining of the journey with 2 external coaches from cap gemini.i am employee of elia now. 

##main difficulties
first one on top of everything, because we dont have a top down approach. you can have high scaling with the top down. some companies havea fulltop down approach. here is something you have to rise from th bottom. for me bottom up support is more important but you also need support from the top.   i miss somtimes some buy in from the management.the are convince is useful for the team but they say is for the team and not for us. you dont have "the lead by example" here. to help them to understand that this is very valuable for them this is my main challenge. help them to understand, if htey take the time to do it they will earn a lot behind, it will be worth it. this is a very big challenge. that is why I go to a lot of meetups where i can meet people like me. 
i feel alone. also pain. that is why i have an exernal coach. i want to discuss with someone. i need som eiother ideas that we hear on the books . release time for continuous improvement. to be a coach you need tthem to want to be coached. if you have as i to be. coach . help them to find the solution in  them. to do so you need them to come up i need to change something. get somt out of you confort zone. and thaat is difficult that they find this time. sometimes the come for the bad reason. they hear what others have done. we are not able to cut the work. the main issue is to release some real time to achieve their coaching objecitves. they are not able to manage the priorities. they don't allocate real time. they put it as something that their people need to do on top or their work. this creates risk of burnout. i think agile is good for that.
version one report agile. what is the main value for you. as a result. the first one is always. a better ability to manage the priorities. do a choice between priorities. we are not always good at that. we want everyhing. they understood that and they started to release sometime.s this is problably the main difficulty i have. i prefer to have 3 people 100% agile or lean than 10 people 10%of their time.

##final impact i want to have 
innovation mindset here at the end what i really try to do is to bring a bit of wellbeing for people. a bit better in their work everyday. ia m happy to be here. i am a bit better than yesterday. atmosphere of collab is improving. initiative spirit is here. you can do mistakes. you are not affraid to come to work. you like to come to work. that is why i want to work with the cultural change team as well. 
this diagram i like 


if you tell them you need to start to use a new tool, is feasible, they dont like it at the beginning but they have to . there is no choice. for example: use microsoft word.

if you have in the agile manifesto people over tools and processes. mindset, is the less visible but the most powerful. when you hire someone, they focus on the hard skills. and not the soft skills. but the soft skills is one of the most important if you want to work in a team. as a painter you may do better work alone. here is about team . you need the righ tmindset. 


agile radar. how menaced do they feel. here they are quite confident: the company is going well. lots of companies started agile in a crisis. previsible vs exploratoire. innovation vs efficiency. better to be in an innovation mindset that to have always efficiency. 

it is a challenge becasue the context is not the best to to that. they don't want to go there. want to stay safe, we wont be predictble anymore. and this is a difficult. 

check and see how they are doing they are not so good at that.this is the reality here. 

lot of trouble with waiting time. lot of trouble with non utilized talent. using people to do thing they should not. 

##human aspect in the work environment
the most important part . i am more interested by people than the content of their job. behaviour and mindset is my job. the good thing is they want to do it. they put it all over the walls. more collab. more feedback. one voice. they want to go there
but most of the time in the wall is where you want to go but is what you don't have. some times this is what you see in the walls of the companies. this allow them to do it. some others feels dangerous and the prefer not to go there. you can do it if you want
le croyance limitantes. there are a lot of those here. release them from their limitation of the mind. for me is amazing. sometimes i am doin ga very hard planning. at the end is one conversation at the coffee corner will have higher impact. we need time for that. we as coaches we need to live that style. take time and not always to focus on the operational goal. go to events. take time like talking to you. we need to keep that mind opening. all the success stories . in front of the stage. put it under the spot light. good behaviour. good team spirit. tell stories in our comm  tooo. i have a blog in the intranet. display screens. also simply i offer individual coaching. one on one. it has to come from them. +make a different project. for example one voice behaviour. fist feedbac. 2 one voice. you take decision as a team. once you tak decision you are all behind. team spirit approach. to do so we provide them some tools. check in check out. speech ball. everyone can share and is included in the conversation. ice breakers. getting to know eachother in the team. bring more humanity brings more empathy helping eachoter. i try to do a self assesment. ask them to evaluate themslelves in 100 questions. do you have time? is the cmm good? is electicity in the aair? gentle dissagreement. they have to take a decission as a team about that. if people 
have fun and others don't they ask during the self assessment workshope i invite them to express why in a group setting. and sometimes there are debates. and somethimes is the first time they share this info in a team. and this brings empathy collab. and at they end 
they wills start to have more fun. soft skills, start to help eachother and havea little more interest in the job of the other. values in common. i love one icebreaker who combine with energizer. the best thing that happen in their life in the last 5 days. with this one you can use it each week and repeat. at the end , with that ...last week i went with vacation with my kid. ah you have a kid. now environment. i am a fan of sunst dev. you are agile coach at the end with thos little exercises you have impact because they find things in common. this little things are sometimes more important than the big report.

is more about connecting to the people. dont feel alone in the troubles. we can help eachother. the best meetups i go scrum clinic stuff. all the events in the network. 

    """
    
    print("🧪 Testing extract_insights_from_transcript...")
    print(f"📝 Sample transcript: {sample_transcript[:100]}...")
    
    try:
        # Call your function
        insights = extract_insights_from_transcript(sample_transcript)
        
        print("✅ Function executed successfully!")
        print("\n📊 Extracted Insights:")
        print(f"Speakers found: {len(insights.speakers)}")
        print(f"Primary challenges: {len(insights.primary_challenges)}")
        print(f"Core values: {insights.core_values}")
        
        # Print first challenge as example
        if insights.primary_challenges:
            challenge = insights.primary_challenges[0]
            print(f"\n🔥 First challenge:")
            print(f"   Description: {challenge.description}")
            print(f"   Impact: {challenge.impact}")
            print(f"   Urgency: {challenge.urgency}")
            
        return True, insights
        
    except Exception as e:
        print(f"❌ Error in extract_insights_from_transcript: {e}")
        return False, None

# Run the test
success, test_insights = test_extract_insights_standalone()

🧪 Testing extract_insights_from_transcript...
📝 Sample transcript: 
    my name is olivier masset from elia. 

my goal here in elia is to help my team to adopt a minds...
✅ Function executed successfully!

📊 Extracted Insights:
Speakers found: 1
Primary challenges: 2
Core values: ['continuous improvement', 'collaboration', 'innovation', 'well-being', 'human connection']

🔥 First challenge:
   Description: Lack of top-down support and buy-in from management for continuous improvement initiatives.
   Impact: Difficulty in achieving high scaling and full integration of continuous improvement practices.
   Urgency: High


In [None]:
# Cell 8B: Test pain_extractor_node with mock state
def test_pain_extractor_node():
    """Test the LangGraph node with mock state"""
    
    # Create mock state (as if it came from previous nodes)
    mock_state = {
        "file_path": "data/temp/test_file.wav",
        "filename": "test_file.wav", 
        "transcript_text": """
my name is olivier masset. i work at Lean and Agile Department from Elia (company). 


my goal here in elia is to help my team to adopt a mindset of continuous improvement. whatever the techniques. it does not matter if it's lean agile or something else they just come into the office and we have a chat about what they would like to improve. we analyse the "as is" situation why they would like to apply lean or agile techniques or whatever, what they want to do and we support them in the journey to get there. 
##do you work with teams? 
half of my job is IT teams, half is business teams. for the IT has been a top down approach. for the other is a success story approach. they can see that it works in other teams and they come to us to do ask to do the same. 



<img src="IMG_20190509_165555.jpg" width="400" height="400" />
__Lean and Agile office supports elia teams in their cultural change towards a continuous improvement mindset.__ Introducing, implementing, sustaining. 
a)awareness: communication, training and networking withing Eila
b)new initiatives
c)support: coaching & training , try to raise their maturity, regular follow up for example on the rituals, obeya room,logistic around that
d)innovation new concepts, and techniques to try ex: if i read a new book (reinventing orgs) i try to implement it here. we try to be on top of the cultural change project. there is a program called "make a difference" we can have a sort of top down apporach. they have an impact in all the company. so is good we are together with them. when we create a workshop to provide feedback we support them with workshop design. if they like a technique they call us again for help on other meeting. 
f) networking outside elia
-events training: journee agile. other companies. meetups 
-suppliers
-companies

that is how we are doing the transition here. little by little. step by step. based on word of mouth. t

##team management
my boss is sparring partner. we do try to brainstorm together.  i have my own coach. stephan levaque.he is coming here 1 o 2 times a month. help me with mentoring. help me unblock some issues. sometimes we cocreate workshops together.i have also 2 other coaches. one in schaarbeek to help the team there and another one 100 of the time but only since 1 month. we coach around 300 people. 150 in IT and about the same in the business. for us we need 1 coach for 30 people who start and 1 coach for 
100 people on sustain mode not a new initiative they have been doing this for a while and you help them to go further. i used to be a alone for 300 people. i started 3 years ago. i started in 2017 and the lean project started in 2016. they did the begining of the journey with 2 external coaches from cap gemini.i am employee of elia now. 

##main difficulties
first one on top of everything, because we dont have a top down approach. you can have high scaling with the top down. some companies havea fulltop down approach. here is something you have to rise from th bottom. for me bottom up support is more important but you also need support from the top.   i miss somtimes some buy in from the management.the are convince is useful for the team but they say is for the team and not for us. you dont have "the lead by example" here. to help them to understand that this is very valuable for them this is my main challenge. help them to understand, if htey take the time to do it they will earn a lot behind, it will be worth it. this is a very big challenge. that is why I go to a lot of meetups where i can meet people like me. 
i feel alone. also pain. that is why i have an exernal coach. i want to discuss with someone. i need som eiother ideas that we hear on the books . release time for continuous improvement. to be a coach you need tthem to want to be coached. if you have as i to be. coach . help them to find the solution in  them. to do so you need them to come up i need to change something. get somt out of you confort zone. and thaat is difficult that they find this time. sometimes the come for the bad reason. they hear what others have done. we are not able to cut the work. the main issue is to release some real time to achieve their coaching objecitves. they are not able to manage the priorities. they don't allocate real time. they put it as something that their people need to do on top or their work. this creates risk of burnout. i think agile is good for that.
version one report agile. what is the main value for you. as a result. the first one is always. a better ability to manage the priorities. do a choice between priorities. we are not always good at that. we want everyhing. they understood that and they started to release sometime.s this is problably the main difficulty i have. i prefer to have 3 people 100% agile or lean than 10 people 10%of their time.

##final impact i want to have 
innovation mindset here at the end what i really try to do is to bring a bit of wellbeing for people. a bit better in their work everyday. ia m happy to be here. i am a bit better than yesterday. atmosphere of collab is improving. initiative spirit is here. you can do mistakes. you are not affraid to come to work. you like to come to work. that is why i want to work with the cultural change team as well. 
this diagram i like 


if you tell them you need to start to use a new tool, is feasible, they dont like it at the beginning but they have to . there is no choice. for example: use microsoft word.

if you have in the agile manifesto people over tools and processes. mindset, is the less visible but the most powerful. when you hire someone, they focus on the hard skills. and not the soft skills. but the soft skills is one of the most important if you want to work in a team. as a painter you may do better work alone. here is about team . you need the righ tmindset. 


agile radar. how menaced do they feel. here they are quite confident: the company is going well. lots of companies started agile in a crisis. previsible vs exploratoire. innovation vs efficiency. better to be in an innovation mindset that to have always efficiency. 

it is a challenge becasue the context is not the best to to that. they don't want to go there. want to stay safe, we wont be predictble anymore. and this is a difficult. 

check and see how they are doing they are not so good at that.this is the reality here. 

lot of trouble with waiting time. lot of trouble with non utilized talent. using people to do thing they should not. 

##human aspect in the work environment
the most important part . i am more interested by people than the content of their job. behaviour and mindset is my job. the good thing is they want to do it. they put it all over the walls. more collab. more feedback. one voice. they want to go there
but most of the time in the wall is where you want to go but is what you don't have. some times this is what you see in the walls of the companies. this allow them to do it. some others feels dangerous and the prefer not to go there. you can do it if you want
le croyance limitantes. there are a lot of those here. release them from their limitation of the mind. for me is amazing. sometimes i am doin ga very hard planning. at the end is one conversation at the coffee corner will have higher impact. we need time for that. we as coaches we need to live that style. take time and not always to focus on the operational goal. go to events. take time like talking to you. we need to keep that mind opening. all the success stories . in front of the stage. put it under the spot light. good behaviour. good team spirit. tell stories in our comm  tooo. i have a blog in the intranet. display screens. also simply i offer individual coaching. one on one. it has to come from them. +make a different project. for example one voice behaviour. fist feedbac. 2 one voice. you take decision as a team. once you tak decision you are all behind. team spirit approach. to do so we provide them some tools. check in check out. speech ball. everyone can share and is included in the conversation. ice breakers. getting to know eachother in the team. bring more humanity brings more empathy helping eachoter. i try to do a self assesment. ask them to evaluate themslelves in 100 questions. do you have time? is the cmm good? is electicity in the aair? gentle dissagreement. they have to take a decission as a team about that. if people 
have fun and others don't they ask during the self assessment workshope i invite them to express why in a group setting. and sometimes there are debates. and somethimes is the first time they share this info in a team. and this brings empathy collab. and at they end 
they wills start to have more fun. soft skills, start to help eachother and havea little more interest in the job of the other. values in common. i love one icebreaker who combine with energizer. the best thing that happen in their life in the last 5 days. with this one you can use it each week and repeat. at the end , with that ...last week i went with vacation with my kid. ah you have a kid. now environment. i am a fan of sunst dev. you are agile coach at the end with thos little exercises you have impact because they find things in common. this little things are sometimes more important than the big report.

is more about connecting to the people. dont feel alone in the troubles. we can help eachother. the best meetups i go scrum clinic stuff. all the events in the network. 

        """,
        "conversation_id": 100,  # Mock ID
        "extracted_insights": None,
        "status": "saved_to_db",
        "error": None
    }
    
    print("🧪 Testing pain_extractor_node...")
    print(f"📝 Mock state status: {mock_state['status']}")
    
    try:
        # Call your node function
        updated_state = pain_extractor_node(mock_state)
        
        print("✅ Node executed successfully!")
        print(f"📊 Updated status: {updated_state['status']}")
        print(f"🔍 Has insights: {'extracted_insights' in updated_state and updated_state['extracted_insights'] is not None}")
        
        # Show extracted insights
        if updated_state.get('extracted_insights'):
            insights = updated_state['extracted_insights']
            print(f"\n📋 Insights Summary:")
            print(f"   Speakers: {len(insights.speakers) if insights.speakers else 0}")
            print(f"   Primary challenges: {len(insights.primary_challenges) if insights.primary_challenges else 0}")
            print(f"   Current solutions: {len(insights.current_solutions) if insights.current_solutions else 0}")
        
        return True, updated_state
        
    except Exception as e:
        print(f"❌ Error in pain_extractor_node: {e}")
        import traceback
        traceback.print_exc()
        return False, None

# Run the test
node_success, updated_state = test_pain_extractor_node()

🧪 Testing pain_extractor_node...
📝 Mock state status: saved_to_db
🧠 Starting pain extraction...
✅ Extracted insights: 2 primary challenges, 1 speakers
✅ Node executed successfully!
📊 Updated status: insights_extracted
🔍 Has insights: True

📋 Insights Summary:
   Speakers: 1
   Primary challenges: 2
   Current solutions: 3


In [19]:
# Cell 8C: Test integration (only run if previous tests pass)
if success and node_success:
    print("🎉 Both functions work! Ready for integration.")
    
    # Test with real conversation from database
    conversations = db.get_all_conversations()
    if conversations:
        real_conv = conversations[0]
        print(f"\n🔄 Testing with real conversation: {real_conv.title}")
        
        real_state = {
            "conversation_id": real_conv.id,
            "transcript_text": real_conv.raw_text,
            "status": "saved_to_db"
        }
        
        final_result = pain_extractor_node(real_state)
        print(f"✅ Real data test: {final_result['status']}")
    else:
        print("ℹ️ No conversations in database for real data test")
else:
    print("⚠️ Fix the failing tests before integration")

🎉 Both functions work! Ready for integration.

🔄 Testing with real conversation: Audio: blog_batxhtwo.wav
🧠 Starting pain extraction...
❌ Pain extraction failed: object of type 'NoneType' has no len()
✅ Real data test: error
