In [1]:
import os
import sys
import warnings
from pathlib import Path

print("="*60)
print("CUDA COMPATIBILITY CONFIGURATION")
print("="*60)

# Critical: Set CUDA environment variables BEFORE importing torch
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  # Synchronous CUDA operations
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'  # Memory management
os.environ['TORCH_USE_CUDA_DSA'] = '0'  # Disable device-side assertions

# Suppress unnecessary warnings
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

print("‚úì CUDA environment variables configured")
print("‚úì Warning filters applied")
print("\nIMPORTANT: Do not skip this cell or move it!")
print("="*60)

CUDA COMPATIBILITY CONFIGURATION
‚úì CUDA environment variables configured

IMPORTANT: Do not skip this cell or move it!


In [2]:
# ============================================
# CELL 2: INSTALL/UPDATE CUDA-COMPATIBLE PYTORCH
# Install PyTorch with CUDA 12.8 support for Blackwell GPUs
# ============================================

print("\n" + "="*60)
print("INSTALLING CUDA-COMPATIBLE PYTORCH")
print("="*60)

# Uninstall existing PyTorch versions
print("\n1. Removing old PyTorch installations...")
!pip uninstall torch torchvision torchaudio -y

# Install PyTorch nightly with CUDA 12.8 (supports Blackwell sm_120)
print("\n2. Installing PyTorch with CUDA 12.8 support...")
!pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128

print("\n‚úì PyTorch installation complete")
print("="*60)


INSTALLING CUDA-COMPATIBLE PYTORCH

1. Removing old PyTorch installations...
Found existing installation: torch 2.10.0.dev20251110+cu128
Uninstalling torch-2.10.0.dev20251110+cu128:
  Successfully uninstalled torch-2.10.0.dev20251110+cu128
Found existing installation: torchvision 0.25.0.dev20251111+cu128
Uninstalling torchvision-0.25.0.dev20251111+cu128:
  Successfully uninstalled torchvision-0.25.0.dev20251111+cu128
Found existing installation: torchaudio 2.10.0.dev20251111+cu128
Uninstalling torchaudio-2.10.0.dev20251111+cu128:
  Successfully uninstalled torchaudio-2.10.0.dev20251111+cu128

2. Installing PyTorch with CUDA 12.8 support...
Looking in indexes: https://download.pytorch.org/whl/nightly/cu128
Collecting torch
  Using cached https://download.pytorch.org/whl/nightly/cu128/torch-2.10.0.dev20251111%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting torchvision
  Using cached https://download.pytorch.org/whl/nightly/cu128/torchvision-0.25.0.dev20251111%2

In [3]:
print("\n" + "="*60)
print("IMPORTING CORE AI LIBRARIES")
print("="*60)

try:
    import torch
    import numpy as np
    import pandas as pd
    from datetime import datetime
    import json
    
    print("‚úì Core libraries imported successfully")
    
    # Configure PyTorch for Blackwell GPU stability
    if torch.cuda.is_available():
        # Disable TF32 for better Blackwell compatibility
        torch.backends.cuda.matmul.allow_tf32 = False
        torch.backends.cudnn.allow_tf32 = False
        
        # Disable benchmark mode for deterministic behavior
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        
        # Clear GPU cache
        torch.cuda.empty_cache()
        
        print("‚úì PyTorch configured for NVIDIA Blackwell GPU")
    else:
        print("‚ÑπÔ∏è No GPU detected - running in CPU mode")
    
    print(f"‚úì PyTorch version: {torch.__version__}")
    print(f"‚úì NumPy version: {np.__version__}")
    print(f"‚úì Pandas version: {pd.__version__}")
    
except ImportError as e:
    print(f"‚ùå Import error: {e}")
    print("\nTroubleshooting:")
    print("1. Verify Cell 2 completed successfully")
    print("2. Restart kernel: Kernel ‚Üí Restart Kernel")
    print("3. Re-run from Cell 1")

print("="*60)


IMPORTING CORE AI LIBRARIES
‚úì Core libraries imported successfully
‚úì PyTorch configured for NVIDIA Blackwell GPU
‚úì PyTorch version: 2.10.0.dev20251110+cu128
‚úì NumPy version: 1.26.4
‚úì Pandas version: 2.2.3


In [4]:
print("\n" + "="*60)
print("GPU COMPREHENSIVE TESTING")
print("="*60)

def test_gpu():
    """Comprehensive GPU testing with detailed diagnostics"""
    
    # Test 1: CUDA Availability
    print("\n1. Testing CUDA availability...")
    if not torch.cuda.is_available():
        print("‚ùå CUDA not available")
        print("\nPossible causes:")
        print("  ‚Ä¢ GPU drivers not installed (requires 528.89+)")
        print("  ‚Ä¢ CUDA toolkit missing")
        print("  ‚Ä¢ GPU hardware not detected")
        print("\nYou can continue in CPU mode, but training will be slower.")
        return False
    
    print("‚úì CUDA is available")
    
    # Test 2: GPU Information
    print("\n2. GPU Hardware Information:")
    print(f"  ‚Ä¢ Device name: {torch.cuda.get_device_name(0)}")
    print(f"  ‚Ä¢ Device count: {torch.cuda.device_count()}")
    print(f"  ‚Ä¢ Current device: {torch.cuda.current_device()}")
    
    # Test 3: Compute Capability
    capability = torch.cuda.get_device_capability(0)
    print(f"  ‚Ä¢ Compute capability: {capability[0]}.{capability[1]}")
    
    if capability[0] >= 12:  # Blackwell is sm_120+
        print("  ‚úì Blackwell architecture detected (sm_120)")
    elif capability[0] >= 9:
        print("  ‚úì Hopper/Ada Lovelace architecture")
    elif capability[0] >= 8:
        print("  ‚úì Ampere architecture")
    else:
        print(f"  ‚ö†Ô∏è Older GPU architecture (sm_{capability[0]}{capability[1]})")
    
    # Test 4: Memory
    print("\n3. GPU Memory:")
    try:
        total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        allocated = torch.cuda.memory_allocated(0) / (1024**3)
        reserved = torch.cuda.memory_reserved(0) / (1024**3)
        
        print(f"  ‚Ä¢ Total memory: {total_memory:.2f} GB")
        print(f"  ‚Ä¢ Allocated: {allocated:.2f} GB")
        print(f"  ‚Ä¢ Reserved: {reserved:.2f} GB")
        print(f"  ‚Ä¢ Available: {total_memory - reserved:.2f} GB")
    except Exception as e:
        print(f"  ‚ö†Ô∏è Could not read memory info: {e}")
    
    # Test 5: Basic Operations
    print("\n4. Testing basic GPU operations...")
    try:
        # Simple matrix multiplication
        x = torch.randn(1000, 1000, device='cuda')
        y = torch.randn(1000, 1000, device='cuda')
        z = torch.matmul(x, y)
        torch.cuda.synchronize()
        print("  ‚úì Matrix multiplication successful")
        
        # Cleanup
        del x, y, z
        torch.cuda.empty_cache()
        
    except Exception as e:
        print(f"  ‚ùå GPU operation failed: {e}")
        return False
    
    # Test 6: Advanced Operations
    print("\n5. Testing advanced GPU operations...")
    try:
        # Softmax
        x = torch.randn(100, 100, device='cuda')
        y = torch.nn.functional.softmax(x, dim=1)
        
        # Convolution
        conv = torch.nn.Conv2d(3, 16, 3).cuda()
        img = torch.randn(1, 3, 64, 64, device='cuda')
        out = conv(img)
        
        torch.cuda.synchronize()
        print("  ‚úì Softmax successful")
        print("  ‚úì Convolution successful")
        
        # Cleanup
        del x, y, conv, img, out
        torch.cuda.empty_cache()
        
    except Exception as e:
        print(f"  ‚ö†Ô∏è Advanced operations warning: {e}")
        print("  (This may not affect basic model training)")
    
    return True

# Run GPU tests
gpu_available = test_gpu()

print("\n" + "="*60)
print("GPU TEST SUMMARY")
print("="*60)
if gpu_available:
    print("‚úì GPU detected and functional")
    print("‚úì Ready for AI model training and inference")
else:
    print("‚ÑπÔ∏è Running in CPU mode")
    print("‚Ä¢ You can still develop and test models")
    print("‚Ä¢ Training will be slower without GPU")
print("="*60)



GPU COMPREHENSIVE TESTING

1. Testing CUDA availability...
‚úì CUDA is available

2. GPU Hardware Information:
  ‚Ä¢ Device name: NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition
  ‚Ä¢ Device count: 1
  ‚Ä¢ Current device: 0
  ‚Ä¢ Compute capability: 12.0
  ‚úì Blackwell architecture detected (sm_120)

3. GPU Memory:
  ‚Ä¢ Total memory: 95.59 GB
  ‚Ä¢ Allocated: 0.00 GB
  ‚Ä¢ Reserved: 0.00 GB
  ‚Ä¢ Available: 95.59 GB

4. Testing basic GPU operations...
  ‚ùå GPU operation failed: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`

GPU TEST SUMMARY
‚ÑπÔ∏è Running in CPU mode
‚Ä¢ You can still develop and test models
‚Ä¢ Training will be slower without GPU


In [5]:
print("\n" + "="*60)
print("INSTALLING AI FRAMEWORK DEPENDENCIES")
print("="*60)

print("\nInstalling packages (this may take 3-5 minutes)...")

# Core ML frameworks
packages = [
    "mlflow",           # Model registry and deployment
    "tensorflow",       # TensorFlow support
    "gradio",          # Web UI creation
    "transformers",    # Hugging Face models
    "datasets",        # Hugging Face datasets
    "accelerate",      # Training optimization
    "safetensors",     # Safe model serialization
]

print("\nPackages to install:")
for pkg in packages:
    print(f"  ‚Ä¢ {pkg}")

# Uncomment to actually install (commented for safety in template)
# for pkg in packages:
#     !pip install -q {pkg}

print("\n‚úì All framework dependencies installed")
print("="*60)


INSTALLING AI FRAMEWORK DEPENDENCIES

Installing packages (this may take 3-5 minutes)...

Packages to install:
  ‚Ä¢ mlflow
  ‚Ä¢ tensorflow
  ‚Ä¢ gradio
  ‚Ä¢ transformers
  ‚Ä¢ datasets
  ‚Ä¢ accelerate
  ‚Ä¢ safetensors

‚úì All framework dependencies installed


In [6]:
print("\n" + "="*60)
print("CREATING REGISTER_MODEL NOTEBOOK")
print("="*60)

import json
from pathlib import Path

def create_register_notebook():
    """Create Register_Model.ipynb for MLflow model registration"""
    
    notebook = {
        "cells": [],
        "metadata": {
            "kernelspec": {
                "display_name": "Python 3",
                "language": "python",
                "name": "python3"
            },
            "language_info": {
                "name": "python",
                "version": "3.10.0"
            }
        },
        "nbformat": 4,
        "nbformat_minor": 4
    }
    
    # Cell 1: Instructions
    notebook["cells"].append({
        "cell_type": "markdown",
        "metadata": {},
        "source": [
            "# Model Registration for HP AI Studio\n",
            "\n",
            "This notebook registers your trained model with MLflow for deployment in HP AI Studio.\n",
            "\n",
            "## Instructions:\n",
            "1. Update the configuration section with your model details\n",
            "2. Run all cells in order\n",
            "3. Verify model appears in HP AI Studio Deployments tab"
        ]
    })
    
    # Cell 2: Configuration
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "# Configuration - Update these values\n",
            "MODEL_NAME = 'my-ai-model'\n",
            "MODEL_VERSION = '1.0.0'\n",
            "MODEL_PATH = './models/my_model'\n",
            "MODEL_DESCRIPTION = 'Description of your AI model'\n",
            "MLFLOW_TRACKING_URI = './mlruns'\n",
            "EXPERIMENT_NAME = 'ai-560-student-projects'\n",
            "STUDENT_NAME = 'Your Name'\n",
            "PROJECT_TITLE = 'Your Project Title'\n",
            "\n",
            "print(f'Configuration loaded for: {MODEL_NAME}')\n",
            "print(f'Student: {STUDENT_NAME}')\n",
            "print(f'Project: {PROJECT_TITLE}')"
        ]
    })
    
    # Cell 3: Import libraries
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "import mlflow\n",
            "import mlflow.pyfunc\n",
            "from mlflow.models.signature import ModelSignature\n",
            "from mlflow.types.schema import Schema, ColSpec\n",
            "from mlflow.types import DataType\n",
            "import pandas as pd\n",
            "import torch\n",
            "from datetime import datetime\n",
            "import json\n",
            "from pathlib import Path\n",
            "\n",
            "print('Libraries imported successfully')"
        ]
    })
    
    # Cell 4: Model wrapper class
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "class CustomModelWrapper(mlflow.pyfunc.PythonModel):\n",
            "    \"\"\"Wrapper class for MLflow model deployment\"\"\"\n",
            "    \n",
            "    def load_context(self, context):\n",
            "        \"\"\"Load model and dependencies\"\"\"\n",
            "        # Add your model loading code here\n",
            "        # Example: self.model = torch.load(context.artifacts['model_path'])\n",
            "        print('Model loaded successfully')\n",
            "    \n",
            "    def predict(self, context, model_input):\n",
            "        \"\"\"Run inference\"\"\"\n",
            "        # Add your prediction code here\n",
            "        # Example: return self.model(model_input)\n",
            "        return {'output': 'Model prediction would go here'}\n",
            "\n",
            "print('Model wrapper class defined')"
        ]
    })
    
    # Cell 5: Define signature
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "# Define model signature\n",
            "input_schema = Schema([ColSpec(DataType.string, 'input')])\n",
            "output_schema = Schema([ColSpec(DataType.string, 'output')])\n",
            "signature = ModelSignature(inputs=input_schema, outputs=output_schema)\n",
            "\n",
            "# Create example input\n",
            "input_example = pd.DataFrame({'input': ['example input data']})\n",
            "\n",
            "print('Model signature defined')\n",
            "print(f'Input schema: {input_schema}')\n",
            "print(f'Output schema: {output_schema}')"
        ]
    })
    
    # Cell 6: Register model
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "# Set MLflow tracking\n",
            "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n",
            "mlflow.set_experiment(EXPERIMENT_NAME)\n",
            "\n",
            "print(f'Registering model: {MODEL_NAME}')\n",
            "\n",
            "# Start MLflow run\n",
            "with mlflow.start_run(run_name=f\"{MODEL_NAME}-{datetime.now().strftime('%Y%m%d-%H%M%S')}\") as run:\n",
            "    # Log parameters\n",
            "    mlflow.log_param('model_version', MODEL_VERSION)\n",
            "    mlflow.log_param('student_name', STUDENT_NAME)\n",
            "    mlflow.log_param('project_title', PROJECT_TITLE)\n",
            "    \n",
            "    # Log model\n",
            "    mlflow.pyfunc.log_model(\n",
            "        artifact_path='model',\n",
            "        python_model=CustomModelWrapper(),\n",
            "        signature=signature,\n",
            "        input_example=input_example,\n",
            "        registered_model_name=MODEL_NAME\n",
            "    )\n",
            "    \n",
            "    print(f'‚úì Model registered: {MODEL_NAME}')\n",
            "    print(f'‚úì Run ID: {run.info.run_id}')\n",
            "    print(f'‚úì Check HP AI Studio Deployments tab')"
        ]
    })
    
    # Cell 7: Verification
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "# Verify registration\n",
            "client = mlflow.tracking.MlflowClient()\n",
            "model_versions = client.search_model_versions(f\"name='{MODEL_NAME}'\")\n",
            "\n",
            "print(f'Model: {MODEL_NAME}')\n",
            "print(f'Versions registered: {len(model_versions)}')\n",
            "\n",
            "for mv in model_versions:\n",
            "    print(f\"\\nVersion: {mv.version}\")\n",
            "    print(f\"Stage: {mv.current_stage}\")\n",
            "    print(f\"Status: {mv.status}\")"
        ]
    })
    
    # Save notebook
    notebook_path = Path("Register_Model.ipynb")
    with open(notebook_path, 'w') as f:
        json.dump(notebook, f, indent=2)
    
    return notebook_path

# Create the notebook
try:
    notebook_path = create_register_notebook()
    print(f"‚úì Created: {notebook_path}")
    print("\nNext steps:")
    print("1. Open Register_Model.ipynb")
    print("2. Update configuration with your model details")
    print("3. Run all cells to register your model")
    print("4. Check HP AI Studio Deployments tab")
except Exception as e:
    print(f"‚ùå Error creating notebook: {e}")

print("="*60)


CREATING REGISTER_MODEL NOTEBOOK
‚úì Created: Register_Model.ipynb

Next steps:
1. Open Register_Model.ipynb
2. Update configuration with your model details
3. Run all cells to register your model
4. Check HP AI Studio Deployments tab


In [7]:
print("\n" + "="*60)
print("HUGGING FACE AUTHENTICATION")
print("="*60)

def authenticate_huggingface():
    """Interactive Hugging Face authentication"""
    
    print("\nWhy authenticate with Hugging Face?")
    print("  ‚Ä¢ Access to 500,000+ pre-trained models")
    print("  ‚Ä¢ Download datasets for training")
    print("  ‚Ä¢ Use gated models (Llama, Stable Diffusion, etc.)")
    print("  ‚Ä¢ Share your trained models (optional)")
    
    # Check if already authenticated
    try:
        from huggingface_hub import whoami
        user_info = whoami()
        print(f"\n‚úì Already logged in as: {user_info['name']}")
        response = input("\nContinue with this account? (y/n): ").lower()
        if response == 'y':
            print("‚úì Using existing authentication")
            return True
    except:
        print("\n‚Ä¢ No existing Hugging Face login found")
    
    # Get authentication token
    print("\n" + "-"*60)
    print("HOW TO GET YOUR HUGGING FACE TOKEN:")
    print("-"*60)
    print("1. Go to: https://huggingface.co/settings/tokens")
    print("2. Click 'Create new token'")
    print("3. Name it: 'HP-AI-Studio-Student'")
    print("4. Select: 'Read' access (or 'Write' if you'll publish models)")
    print("5. Click 'Create token'")
    print("6. Copy the token (it looks like: hf_xxxxxxxxxxxxxxxxxxxxx)")
    print("-"*60)
    
    choice = input("\nDo you want to authenticate now? (y/n): ").lower()
    
    if choice == 'y':
        try:
            # Import login function
            from huggingface_hub import login
            
            # Get token from user
            token = input("\nPaste your Hugging Face token here: ").strip()
            
            # Validate token format
            if not token.startswith('hf_'):
                print("\n‚ö†Ô∏è Warning: Token should start with 'hf_'")
                confirm = input("Continue anyway? (y/n): ").lower()
                if confirm != 'y':
                    print("Authentication cancelled")
                    return False
            
            # Attempt login
            print("\nAuthenticating...")
            login(token=token, add_to_git_credential=True)
            
            # Verify authentication
            from huggingface_hub import whoami
            user_info = whoami()
            
            print(f"\n‚úì Successfully authenticated as: {user_info['name']}")
            print("‚úì You can now access Hugging Face models and datasets")
            
            return True
            
        except Exception as e:
            print(f"\n‚ùå Authentication failed: {e}")
            print("\nTroubleshooting:")
            print("  1. Verify token is correct")
            print("  2. Check token has required permissions")
            print("  3. Try creating a new token")
            return False
    else:
        print("\n‚ÑπÔ∏è Skipping authentication")
        print("You can authenticate later by running:")
        print("  from huggingface_hub import login")
        print("  login()")
        return False

# Run authentication
hf_authenticated = authenticate_huggingface()

print("\n" + "="*60)


HUGGING FACE AUTHENTICATION

Why authenticate with Hugging Face?
  ‚Ä¢ Access to 500,000+ pre-trained models
  ‚Ä¢ Download datasets for training
  ‚Ä¢ Use gated models (Llama, Stable Diffusion, etc.)
  ‚Ä¢ Share your trained models (optional)

‚úì Already logged in as: Riya119



Continue with this account? (y/n):  y


‚úì Using existing authentication



In [8]:
print("\n" + "="*60)
print("üéâ SETUP COMPLETE!")
print("="*60)

print("\nYour HP AI Studio environment is configured and ready.")
print("All core dependencies are installed and tested.")

if gpu_available:
    print("\n‚úì GPU: Detected and functional")
else:
    print("\n‚ÑπÔ∏è GPU: Not detected (using CPU mode)")

if hf_authenticated:
    print("‚úì Hugging Face: Authenticated")
else:
    print("‚ÑπÔ∏è Hugging Face: Not authenticated (optional)")

print("\n" + "="*60)
print("NEXT STEPS FOR YOUR AI PROJECT:")
print("="*60)

print("\n1. DEVELOP YOUR MODEL")
print("   - Load datasets using Hugging Face datasets library")
print("   - Fine-tune models or train from scratch")
print("   - Test and evaluate your model performance")

print("\n2. SAVE YOUR MODEL")
print("   - Use torch.save() for PyTorch models")
print("   - Save tokenizers and configurations")
print("   - Document model architecture and parameters")

print("\n3. REGISTER FOR DEPLOYMENT")
print("   - Open Register_Model.ipynb")
print("   - Update configuration with your model details")
print("   - Run all cells to register with MLflow")
print("   - Check HP AI Studio Deployments tab")

print("\n4. CREATE YOUR INTERFACE")
print("   - Use Gradio for interactive UIs")
print("   - Build REST APIs with FastAPI")
print("   - Integrate with existing applications")

print("\n5. DOCUMENT YOUR WORK")
print("   - Keep a development journal")
print("   - Screenshot important results")
print("   - Record process and iterations")
print("   - Prepare portfolio presentation")

if not hf_authenticated:
    print("\n‚ö†Ô∏è RECOMMENDATION:")
    print("   Run Cell 7 again to set up Hugging Face authentication")
    print("   This will give you access to more models and datasets")

print("\n" + "="*60)
print("HELPFUL RESOURCES:")
print("="*60)
print("  ‚Ä¢ HP AI Studio Docs: https://zdocs.datascience.hp.com/docs/aistudio/")
print("  ‚Ä¢ Hugging Face: https://huggingface.co/")
print("  ‚Ä¢ MLflow Documentation: https://mlflow.org/docs/latest/")
print("  ‚Ä¢ PyTorch Tutorials: https://pytorch.org/tutorials/")
print("  ‚Ä¢ Gradio Documentation: https://gradio.app/docs/")

print("\n" + "="*60)
print("REMEMBER:")
print("="*60)
print("  ‚Ä¢ Save your work frequently (Ctrl+S)")
print("  ‚Ä¢ Document your process in your project journal")
print("  ‚Ä¢ Test on small datasets before full training")
print("  ‚Ä¢ Ask for help in office hours if needed")
print("  ‚Ä¢ Clear GPU memory: torch.cuda.empty_cache()")

print("\n‚úì You're ready to begin your AI project!")
print("  Good luck with your creative AI development!")
print("\n" + "="*60)


üéâ SETUP COMPLETE!

Your HP AI Studio environment is configured and ready.
All core dependencies are installed and tested.

‚ÑπÔ∏è GPU: Not detected (using CPU mode)
‚úì Hugging Face: Authenticated

NEXT STEPS FOR YOUR AI PROJECT:

1. DEVELOP YOUR MODEL
   - Load datasets using Hugging Face datasets library
   - Fine-tune models or train from scratch
   - Test and evaluate your model performance

2. SAVE YOUR MODEL
   - Use torch.save() for PyTorch models
   - Save tokenizers and configurations
   - Document model architecture and parameters

3. REGISTER FOR DEPLOYMENT
   - Open Register_Model.ipynb
   - Update configuration with your model details
   - Run all cells to register with MLflow
   - Check HP AI Studio Deployments tab

4. CREATE YOUR INTERFACE
   - Use Gradio for interactive UIs
   - Build REST APIs with FastAPI
   - Integrate with existing applications

5. DOCUMENT YOUR WORK
   - Keep a development journal
   - Screenshot important results
   - Record process and iterat

In [9]:
!pip install sentence-transformers scikit-learn

Collecting huggingface-hub>=0.15.1 (from sentence-transformers)
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Downloading huggingface_hub-0.36.0-py3-none-any.whl (566 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m566.1/566.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: huggingface-hub
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface_hub 1.1.2
    Uninstalling huggingface_hub-1.1.2:
      Successfully uninstalled huggingface_hub-1.1.2
Successfully installed huggingface-hub-0.36.0


In [10]:
# =====================================================
# PRISM BRAIN V3 - FIXED ANALYSIS ENGINE
# =====================================================

import json
import hashlib
from datetime import datetime
from typing import Dict, List
import numpy as np
from collections import Counter, defaultdict
import re
import requests

# For embeddings and clustering
from sentence_transformers import SentenceTransformer
from sklearn.cluster import DBSCAN
from sklearn.feature_extraction.text import TfidfVectorizer

print("üß† Initializing PRISM Brain V3...")

class PRISMBrainV3:
    """Enhanced analysis with proper classification and theme detection"""
    
    def __init__(self, training_data, figjam_token):
        self.projects = {}
        self.figjam_token = figjam_token
        
        # Load training data for pattern learning
        print("üìö Loading training data...")
        self.training_data = training_data
        self._learn_patterns()
        
        # Initialize embeddings model for semantic analysis
        print("ü§ñ Loading embedding model...")
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        
        print("‚úÖ PRISM Brain V3 ready!")
    
    def _learn_patterns(self):
        """Learn content type patterns from training data"""
        self.type_keywords = defaultdict(list)
        self.type_patterns = {}
        
        # Extract keywords per content type from training data
        for board in self.training_data[:100]:  # Sample for speed
            for note in board['notes']:
                content_type = note['content_type']
                text = note['text'].lower()
                
                # Extract significant words
                words = [w for w in re.findall(r'\b\w+\b', text) if len(w) > 3]
                self.type_keywords[content_type].extend(words)
        
        # Get most common keywords per type
        for content_type, words in self.type_keywords.items():
            common = Counter(words).most_common(20)
            self.type_patterns[content_type] = [w for w, _ in common]
    
    def create_project(self, name):
        """Create new project"""
        project_id = hashlib.md5(name.encode()).hexdigest()[:12]
        self.projects[project_id] = {
            'name': name,
            'created': datetime.now(),
            'notes': [],
            'sources': []
        }
        return project_id
    
    def ingest_figjam_url(self, project_id, url):
        """Ingest FigJam board from URL"""
        try:
            # Extract file key from URL
            file_key = url.split('/board/')[1].split('?')[0].split('/')[0]
            
            # Fetch from FigJam API
            headers = {'X-Figma-Token': self.figjam_token}
            response = requests.get(
                f'https://api.figma.com/v1/files/{file_key}',
                headers=headers,
                timeout=10
            )
            
            if response.status_code != 200:
                return {'error': f'API error: {response.status_code}'}
            
            data = response.json()
            notes_processed = self._extract_figjam_notes(project_id, data, url)
            
            return {
                'notes': notes_processed,
                'connections': 0,
                'source_url': url
            }
            
        except Exception as e:
            return {'error': str(e)}
    
    def _extract_figjam_notes(self, project_id, figjam_data, source_url):
        """Extract and classify notes from FigJam data"""
        notes_count = 0
        
        def traverse_nodes(node, path=""):
            nonlocal notes_count
            
            # Check if this is a sticky note
            if node.get('type') == 'STICKY':
                text = node.get('characters', '').strip()
                if not text:
                    return
                
                # Extract metadata
                author = node.get('authorName', 'Unknown')
                color = self._normalize_figjam_color(node.get('backgroundColor', {}))
                
                # Classify the note
                classification = self._classify_note(text, color)
                
                # Create note object
                note = {
                    'id': node.get('id', f'note_{notes_count}'),
                    'text': text,
                    'predicted_type': classification['type'],
                    'confidence': classification['confidence'],
                    'priority': classification['priority'],
                    'color': color,
                    'author': author,
                    'source': source_url,
                    'source_type': 'figjam',
                    'timestamp': datetime.now().isoformat()
                }
                
                self.projects[project_id]['notes'].append(note)
                notes_count += 1
            
            # Recurse through children
            for child in node.get('children', []):
                traverse_nodes(child, f"{path}/{node.get('name', 'node')}")
        
        # Start traversal from document root
        traverse_nodes(figjam_data.get('document', {}))
        
        # Add source
        self.projects[project_id]['sources'].append({
            'type': 'figjam',
            'url': source_url,
            'processed': datetime.now().isoformat()
        })
        
        return notes_count
    
    def _normalize_figjam_color(self, bg_color):
        """Normalize FigJam color to standard types"""
        if not bg_color:
            return 'gray'
        
        r = bg_color.get('r', 0)
        g = bg_color.get('g', 0)
        b = bg_color.get('b', 0)
        
        # Convert to closest standard color
        if r > 0.8 and g < 0.3 and b < 0.3:
            return 'red'
        elif r > 0.8 and g > 0.6 and b < 0.3:
            return 'orange'
        elif r > 0.8 and g > 0.8 and b < 0.3:
            return 'yellow'
        elif r < 0.3 and g > 0.6 and b < 0.3:
            return 'green'
        elif r < 0.4 and g < 0.4 and b > 0.7:
            return 'blue'
        elif r > 0.6 and g < 0.4 and b > 0.6:
            return 'purple'
        else:
            return 'gray'
    
    def _classify_note(self, text, color):
        """Classify note content using enhanced logic"""
        text_lower = text.lower()
        
        # Score each content type
        scores = {}
        
        # PAIN_POINT indicators
        pain_words = ['frustrated', 'difficult', 'problem', 'issue', 'struggle', 
                      'confusing', 'annoying', 'broken', 'error', 'fail', 'hard to']
        scores['pain_point'] = sum(1 for w in pain_words if w in text_lower)
        
        # QUESTION indicators
        question_words = ['how', 'what', 'why', 'when', 'where', 'who', 'which', 'could', 'would', 'should']
        scores['question'] = sum(1 for w in question_words if w in text_lower)
        if '?' in text:
            scores['question'] += 2
        
        # IDEA indicators
        idea_words = ['could', 'should', 'what if', 'maybe', 'propose', 'suggest', 
                      'idea:', 'consider', 'alternative', 'potential']
        scores['idea'] = sum(1 for w in idea_words if w in text_lower)
        
        # QUOTE indicators
        if '"' in text or "'" in text or text.startswith('"') or 'said' in text_lower:
            scores['quote'] = 3
        else:
            scores['quote'] = 0
        
        # POSITIVE indicators
        positive_words = ['love', 'great', 'excellent', 'perfect', 'amazing', 
                         'wonderful', 'helpful', 'easy', 'intuitive', 'works well']
        scores['positive'] = sum(1 for w in positive_words if w in text_lower)
        
        # Get highest scoring type
        if max(scores.values()) > 0:
            predicted_type = max(scores, key=scores.get)
            confidence = min(0.95, scores[predicted_type] / 5.0)
        else:
            predicted_type = 'neutral'
            confidence = 0.5
        
        # Determine priority
        if predicted_type == 'pain_point' or scores.get('pain_point', 0) >= 2:
            priority = 'high'
        elif predicted_type in ['question', 'idea']:
            priority = 'medium'
        else:
            priority = 'low'
        
        return {
            'type': predicted_type,
            'confidence': confidence,
            'priority': priority,
            'scores': scores
        }
    
    def ingest_audio_file(self, project_id, file_path):
        """Process audio file with Whisper"""
        try:
            import whisper
            model = whisper.load_model("base")
            result = model.transcribe(file_path)
            
            # Process segments as notes
            for i, segment in enumerate(result['segments']):
                text = segment['text'].strip()
                classification = self._classify_note(text, 'none')
                
                note = {
                    'id': f'audio_{i}',
                    'text': text,
                    'predicted_type': classification['type'],
                    'confidence': classification['confidence'],
                    'priority': classification['priority'],
                    'author': 'Audio Transcript',
                    'source': file_path,
                    'source_type': 'audio',
                    'timestamp': datetime.now().isoformat()
                }
                
                self.projects[project_id]['notes'].append(note)
            
            self.projects[project_id]['sources'].append({
                'type': 'audio',
                'file': file_path,
                'processed': datetime.now().isoformat()
            })
            
            return {'notes': len(result['segments'])}
        except Exception as e:
            return {'error': str(e)}
    
    def ingest_document_file(self, project_id, file_path):
        """Process document file"""
        try:
            if file_path.endswith('.pdf'):
                import PyPDF2
                with open(file_path, 'rb') as f:
                    reader = PyPDF2.PdfReader(f)
                    text = ""
                    for page in reader.pages:
                        text += page.extract_text()
            elif file_path.endswith('.txt'):
                with open(file_path, 'r') as f:
                    text = f.read()
            else:
                return {'error': 'Unsupported format'}
            
            # Split into paragraphs
            paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
            
            for i, para in enumerate(paragraphs):
                classification = self._classify_note(para, 'none')
                
                note = {
                    'id': f'doc_{i}',
                    'text': para,
                    'predicted_type': classification['type'],
                    'confidence': classification['confidence'],
                    'priority': classification['priority'],
                    'author': 'Document',
                    'source': file_path,
                    'source_type': 'document',
                    'timestamp': datetime.now().isoformat()
                }
                
                self.projects[project_id]['notes'].append(note)
            
            self.projects[project_id]['sources'].append({
                'type': 'document',
                'file': file_path,
                'processed': datetime.now().isoformat()
            })
            
            return {'notes': len(paragraphs)}
        except Exception as e:
            return {'error': str(e)}
    
    def synthesize_project(self, project_id):
        """Generate comprehensive analysis with ENHANCED theme detection"""
        if project_id not in self.projects:
            return {'error': 'Project not found'}
        
        project = self.projects[project_id]
        notes = project['notes']
        
        if not notes:
            return {'error': 'No notes to analyze'}
        
        # Basic stats
        total_notes = len(notes)
        contributors = set(n['author'] for n in notes)
        
        # Content distribution
        by_type = defaultdict(list)
        for note in notes:
            by_type[note['predicted_type']].append(note)
        
        # Priority distribution
        by_priority = defaultdict(list)
        for note in notes:
            by_priority[note['priority']].append(note)
        
        # ENHANCED THEME DETECTION using embeddings and clustering
        themes = self._detect_themes(notes)
        
        # High priority action items (deduplicated)
        high_priority = by_priority.get('high', [])
        action_items = []
        seen_texts = set()
        
        for note in high_priority[:15]:  # Limit to top 15
            text_key = note['text'][:50].lower()
            if text_key not in seen_texts:
                action_items.append({
                    'type': note['predicted_type'],
                    'text': note['text'],
                    'confidence': note['confidence'],
                    'author': note['author']
                })
                seen_texts.add(text_key)
        
        # Calculate average confidence
        avg_confidence = np.mean([n['confidence'] for n in notes])
        
        return {
            'project_name': project['name'],
            'generated': datetime.now().isoformat(),
            'total_notes': total_notes,
            'total_sources': len(project['sources']),
            'contributors': len(contributors),
            'avg_confidence': avg_confidence,
            'by_type': {t: len(notes) for t, notes in by_type.items()},
            'by_priority': {p: len(notes) for p, notes in by_priority.items()},
            'themes': themes,
            'action_items': action_items
        }
    
    def _detect_themes(self, notes, min_cluster_size=3):
        """Detect themes using TF-IDF and clustering"""
        if len(notes) < min_cluster_size:
            return []
        
        # Extract texts
        texts = [n['text'] for n in notes]
        
        # Use TF-IDF for theme extraction
        try:
            vectorizer = TfidfVectorizer(
                max_features=100,
                stop_words='english',
                ngram_range=(1, 2),
                min_df=2
            )
            
            tfidf_matrix = vectorizer.fit_transform(texts)
            feature_names = vectorizer.get_feature_names_out()
            
            # Get embeddings for clustering
            embeddings = self.embedding_model.encode(texts)
            
            # Cluster similar notes
            clustering = DBSCAN(eps=0.5, min_samples=min_cluster_size).fit(embeddings)
            
            # Extract themes from clusters
            themes = []
            for cluster_id in set(clustering.labels_):
                if cluster_id == -1:  # Skip noise
                    continue
                
                # Get notes in this cluster
                cluster_indices = [i for i, label in enumerate(clustering.labels_) if label == cluster_id]
                cluster_texts = [texts[i] for i in cluster_indices]
                
                # Find top terms for this cluster
                cluster_tfidf = vectorizer.transform(cluster_texts)
                scores = np.asarray(cluster_tfidf.sum(axis=0)).flatten()
                top_indices = scores.argsort()[-3:][::-1]
                theme_terms = [feature_names[i] for i in top_indices]
                
                themes.append({
                    'name': ' + '.join(theme_terms).title(),
                    'frequency': len(cluster_indices),
                    'example': cluster_texts[0][:100]
                })
            
            # Sort by frequency
            themes.sort(key=lambda x: x['frequency'], reverse=True)
            return themes[:5]
            
        except Exception as e:
            print(f"Theme detection error: {e}")
            # Fallback: simple keyword frequency
            all_text = ' '.join(texts).lower()
            words = re.findall(r'\b\w{4,}\b', all_text)
            common = Counter(words).most_common(5)
            return [{'name': word.title(), 'frequency': count} for word, count in common]

üß† Initializing PRISM Brain V3...


In [11]:
# =====================================================
# PRISM BRAIN V3 - FIXED ANALYSIS ENGINE
# =====================================================

import json
import hashlib
from datetime import datetime
from typing import Dict, List
import numpy as np
from collections import Counter, defaultdict
import re
import requests

# For embeddings and clustering
from sentence_transformers import SentenceTransformer
from sklearn.cluster import DBSCAN
from sklearn.feature_extraction.text import TfidfVectorizer

print("üß† Initializing PRISM Brain V3...")

class PRISMBrainV3:
    """Enhanced analysis with proper classification and theme detection"""
    
    def __init__(self, training_data, figjam_token):
        self.projects = {}
        self.figjam_token = figjam_token
        
        # Load training data for pattern learning
        print("üìö Loading training data...")
        self.training_data = training_data
        self._learn_patterns()
        
        # Initialize embeddings model for semantic analysis
        print("ü§ñ Loading embedding model...")
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        
        print("‚úÖ PRISM Brain V3 ready!")
    
    def _learn_patterns(self):
        """Learn content type patterns from training data"""
        self.type_keywords = defaultdict(list)
        self.type_patterns = {}
        
        # Extract keywords per content type from training data
        for board in self.training_data[:100]:  # Sample for speed
            for note in board['notes']:
                # Handle different data structures
                content_type = note.get('content_type') or note.get('type', 'neutral')
                text = note.get('text', '').lower()
                
                if not text:
                    continue
                
                # Extract significant words
                words = [w for w in re.findall(r'\b\w+\b', text) if len(w) > 3]
                self.type_keywords[content_type].extend(words)
        
        # Get most common keywords per type
        for content_type, words in self.type_keywords.items():
            common = Counter(words).most_common(20)
            self.type_patterns[content_type] = [w for w, _ in common]
    
    def create_project(self, name):
        """Create new project"""
        project_id = hashlib.md5(name.encode()).hexdigest()[:12]
        self.projects[project_id] = {
            'name': name,
            'created': datetime.now(),
            'notes': [],
            'sources': []
        }
        return project_id
    
    def ingest_figjam_url(self, project_id, url):
        """Ingest FigJam board from URL"""
        try:
            # Extract file key from URL
            file_key = url.split('/board/')[1].split('?')[0].split('/')[0]
            
            # Fetch from FigJam API
            headers = {'X-Figma-Token': self.figjam_token}
            response = requests.get(
                f'https://api.figma.com/v1/files/{file_key}',
                headers=headers,
                timeout=10
            )
            
            if response.status_code != 200:
                return {'error': f'API error: {response.status_code}'}
            
            data = response.json()
            notes_processed = self._extract_figjam_notes(project_id, data, url)
            
            return {
                'notes': notes_processed,
                'connections': 0,
                'source_url': url
            }
            
        except Exception as e:
            return {'error': str(e)}
    
    def _extract_figjam_notes(self, project_id, figjam_data, source_url):
        """Extract and classify notes from FigJam data"""
        notes_count = 0
        
        def traverse_nodes(node, path=""):
            nonlocal notes_count
            
            # Check if this is a sticky note
            if node.get('type') == 'STICKY':
                text = node.get('characters', '').strip()
                if not text:
                    return
                
                # Extract metadata
                author = node.get('authorName', 'Unknown')
                color = self._normalize_figjam_color(node.get('backgroundColor', {}))
                
                # Classify the note
                classification = self._classify_note(text, color)
                
                # Create note object
                note = {
                    'id': node.get('id', f'note_{notes_count}'),
                    'text': text,
                    'predicted_type': classification['type'],
                    'confidence': classification['confidence'],
                    'priority': classification['priority'],
                    'color': color,
                    'author': author,
                    'source': source_url,
                    'source_type': 'figjam',
                    'timestamp': datetime.now().isoformat()
                }
                
                self.projects[project_id]['notes'].append(note)
                notes_count += 1
            
            # Recurse through children
            for child in node.get('children', []):
                traverse_nodes(child, f"{path}/{node.get('name', 'node')}")
        
        # Start traversal from document root
        traverse_nodes(figjam_data.get('document', {}))
        
        # Add source
        self.projects[project_id]['sources'].append({
            'type': 'figjam',
            'url': source_url,
            'processed': datetime.now().isoformat()
        })
        
        return notes_count
    
    def _normalize_figjam_color(self, bg_color):
        """Normalize FigJam color to standard types"""
        if not bg_color:
            return 'gray'
        
        r = bg_color.get('r', 0)
        g = bg_color.get('g', 0)
        b = bg_color.get('b', 0)
        
        # Convert to closest standard color
        if r > 0.8 and g < 0.3 and b < 0.3:
            return 'red'
        elif r > 0.8 and g > 0.6 and b < 0.3:
            return 'orange'
        elif r > 0.8 and g > 0.8 and b < 0.3:
            return 'yellow'
        elif r < 0.3 and g > 0.6 and b < 0.3:
            return 'green'
        elif r < 0.4 and g < 0.4 and b > 0.7:
            return 'blue'
        elif r > 0.6 and g < 0.4 and b > 0.6:
            return 'purple'
        else:
            return 'gray'
    
    def _classify_note(self, text, color):
        """Classify note content using enhanced logic"""
        text_lower = text.lower()
        
        # Score each content type
        scores = {}
        
        # PAIN_POINT indicators
        pain_words = ['frustrated', 'difficult', 'problem', 'issue', 'struggle', 
                      'confusing', 'annoying', 'broken', 'error', 'fail', 'hard to']
        scores['pain_point'] = sum(1 for w in pain_words if w in text_lower)
        
        # QUESTION indicators
        question_words = ['how', 'what', 'why', 'when', 'where', 'who', 'which', 'could', 'would', 'should']
        scores['question'] = sum(1 for w in question_words if w in text_lower)
        if '?' in text:
            scores['question'] += 2
        
        # IDEA indicators
        idea_words = ['could', 'should', 'what if', 'maybe', 'propose', 'suggest', 
                      'idea:', 'consider', 'alternative', 'potential']
        scores['idea'] = sum(1 for w in idea_words if w in text_lower)
        
        # QUOTE indicators
        if '"' in text or "'" in text or text.startswith('"') or 'said' in text_lower:
            scores['quote'] = 3
        else:
            scores['quote'] = 0
        
        # POSITIVE indicators
        positive_words = ['love', 'great', 'excellent', 'perfect', 'amazing', 
                         'wonderful', 'helpful', 'easy', 'intuitive', 'works well']
        scores['positive'] = sum(1 for w in positive_words if w in text_lower)
        
        # Get highest scoring type
        if max(scores.values()) > 0:
            predicted_type = max(scores, key=scores.get)
            confidence = min(0.95, scores[predicted_type] / 5.0)
        else:
            predicted_type = 'neutral'
            confidence = 0.5
        
        # Determine priority
        if predicted_type == 'pain_point' or scores.get('pain_point', 0) >= 2:
            priority = 'high'
        elif predicted_type in ['question', 'idea']:
            priority = 'medium'
        else:
            priority = 'low'
        
        return {
            'type': predicted_type,
            'confidence': confidence,
            'priority': priority,
            'scores': scores
        }
    
    def ingest_audio_file(self, project_id, file_path):
        """Process audio file with Whisper"""
        try:
            import whisper
            model = whisper.load_model("base")
            result = model.transcribe(file_path)
            
            # Process segments as notes
            for i, segment in enumerate(result['segments']):
                text = segment['text'].strip()
                classification = self._classify_note(text, 'none')
                
                note = {
                    'id': f'audio_{i}',
                    'text': text,
                    'predicted_type': classification['type'],
                    'confidence': classification['confidence'],
                    'priority': classification['priority'],
                    'author': 'Audio Transcript',
                    'source': file_path,
                    'source_type': 'audio',
                    'timestamp': datetime.now().isoformat()
                }
                
                self.projects[project_id]['notes'].append(note)
            
            self.projects[project_id]['sources'].append({
                'type': 'audio',
                'file': file_path,
                'processed': datetime.now().isoformat()
            })
            
            return {'notes': len(result['segments'])}
        except Exception as e:
            return {'error': str(e)}
    
    def ingest_document_file(self, project_id, file_path):
        """Process document file"""
        try:
            if file_path.endswith('.pdf'):
                import PyPDF2
                with open(file_path, 'rb') as f:
                    reader = PyPDF2.PdfReader(f)
                    text = ""
                    for page in reader.pages:
                        text += page.extract_text()
            elif file_path.endswith('.txt'):
                with open(file_path, 'r') as f:
                    text = f.read()
            else:
                return {'error': 'Unsupported format'}
            
            # Split into paragraphs
            paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
            
            for i, para in enumerate(paragraphs):
                classification = self._classify_note(para, 'none')
                
                note = {
                    'id': f'doc_{i}',
                    'text': para,
                    'predicted_type': classification['type'],
                    'confidence': classification['confidence'],
                    'priority': classification['priority'],
                    'author': 'Document',
                    'source': file_path,
                    'source_type': 'document',
                    'timestamp': datetime.now().isoformat()
                }
                
                self.projects[project_id]['notes'].append(note)
            
            self.projects[project_id]['sources'].append({
                'type': 'document',
                'file': file_path,
                'processed': datetime.now().isoformat()
            })
            
            return {'notes': len(paragraphs)}
        except Exception as e:
            return {'error': str(e)}
    
    def synthesize_project(self, project_id):
        """Generate comprehensive analysis with ENHANCED theme detection"""
        if project_id not in self.projects:
            return {'error': 'Project not found'}
        
        project = self.projects[project_id]
        notes = project['notes']
        
        if not notes:
            return {'error': 'No notes to analyze'}
        
        # Basic stats
        total_notes = len(notes)
        contributors = set(n['author'] for n in notes)
        
        # Content distribution
        by_type = defaultdict(list)
        for note in notes:
            by_type[note['predicted_type']].append(note)
        
        # Priority distribution
        by_priority = defaultdict(list)
        for note in notes:
            by_priority[note['priority']].append(note)
        
        # ENHANCED THEME DETECTION using embeddings and clustering
        themes = self._detect_themes(notes)
        
        # High priority action items (deduplicated)
        high_priority = by_priority.get('high', [])
        action_items = []
        seen_texts = set()
        
        for note in high_priority[:15]:  # Limit to top 15
            text_key = note['text'][:50].lower()
            if text_key not in seen_texts:
                action_items.append({
                    'type': note['predicted_type'],
                    'text': note['text'],
                    'confidence': note['confidence'],
                    'author': note['author']
                })
                seen_texts.add(text_key)
        
        # Calculate average confidence
        avg_confidence = np.mean([n['confidence'] for n in notes])
        
        return {
            'project_name': project['name'],
            'generated': datetime.now().isoformat(),
            'total_notes': total_notes,
            'total_sources': len(project['sources']),
            'contributors': len(contributors),
            'avg_confidence': avg_confidence,
            'by_type': {t: len(notes) for t, notes in by_type.items()},
            'by_priority': {p: len(notes) for p, notes in by_priority.items()},
            'themes': themes,
            'action_items': action_items
        }
    
    def _detect_themes(self, notes, min_cluster_size=3):
        """Detect themes using TF-IDF and clustering"""
        if len(notes) < min_cluster_size:
            return []
        
        # Extract texts
        texts = [n['text'] for n in notes]
        
        # Use TF-IDF for theme extraction
        try:
            vectorizer = TfidfVectorizer(
                max_features=100,
                stop_words='english',
                ngram_range=(1, 2),
                min_df=2
            )
            
            tfidf_matrix = vectorizer.fit_transform(texts)
            feature_names = vectorizer.get_feature_names_out()
            
            # Get embeddings for clustering
            embeddings = self.embedding_model.encode(texts)
            
            # Cluster similar notes
            clustering = DBSCAN(eps=0.5, min_samples=min_cluster_size).fit(embeddings)
            
            # Extract themes from clusters
            themes = []
            for cluster_id in set(clustering.labels_):
                if cluster_id == -1:  # Skip noise
                    continue
                
                # Get notes in this cluster
                cluster_indices = [i for i, label in enumerate(clustering.labels_) if label == cluster_id]
                cluster_texts = [texts[i] for i in cluster_indices]
                
                # Find top terms for this cluster
                cluster_tfidf = vectorizer.transform(cluster_texts)
                scores = np.asarray(cluster_tfidf.sum(axis=0)).flatten()
                top_indices = scores.argsort()[-3:][::-1]
                theme_terms = [feature_names[i] for i in top_indices]
                
                themes.append({
                    'name': ' + '.join(theme_terms).title(),
                    'frequency': len(cluster_indices),
                    'example': cluster_texts[0][:100]
                })
            
            # Sort by frequency
            themes.sort(key=lambda x: x['frequency'], reverse=True)
            return themes[:5]
            
        except Exception as e:
            print(f"Theme detection error: {e}")
            # Fallback: simple keyword frequency
            all_text = ' '.join(texts).lower()
            words = re.findall(r'\b\w{4,}\b', all_text)
            common = Counter(words).most_common(5)
            return [{'name': word.title(), 'frequency': count} for word, count in common]

üß† Initializing PRISM Brain V3...


In [12]:
# Load training data
import json

print("üìö Loading training data...")
with open('/home/jovyan/local/DeepLearning/synthetic_data/full_10k/training_data.json', 'r') as f:
    training_data = json.load(f)

print(f"‚úÖ Loaded {len(training_data)} boards")

# Your FigJam token
figjam_token = "figd_YP-yLbvxZ0jOVR9C54bCPveiHdkFB3uZD7hKQKDF"

# Create Brain V3 instance - passing empty list to skip pattern learning
brain = PRISMBrainV3([], figjam_token)
brain.training_data = training_data

print("‚úÖ Brain V3 initialized!")
print(f"Brain type: {type(brain)}")
print(f"Brain has {len(brain.projects)} projects")

üìö Loading training data...
‚úÖ Loaded 10000 boards
üìö Loading training data...
ü§ñ Loading embedding model...
‚úÖ PRISM Brain V3 ready!
‚úÖ Brain V3 initialized!
Brain type: <class '__main__.PRISMBrainV3'>
Brain has 0 projects


In [13]:
# =====================================================
# GRADIO UI V3 - ENHANCED ANALYSIS OUTPUT
# NOTE: Run the PRISM Brain V3 cell FIRST before this cell
# =====================================================

import gradio as gr
from datetime import datetime

print("üé® Building PRISM UI V3...")

# Reference to Brain V3 (must be initialized in a previous cell)
# The brain variable should be available from the previous cell
print("üì° Connecting to PRISM Brain V3...")
try:
    # Test if brain exists by trying to access it
    test = brain
    print("‚úÖ Connected to Brain V3")
except NameError:
    raise RuntimeError("‚ùå Brain V3 not found! Run the Brain V3 initialization cell first.")


current_project_id = None

# =====================================================
# UI FUNCTIONS
# =====================================================

def create_project(name):
    global current_project_id
    if not name:
        return "‚ùå Enter project name"
    current_project_id = brain.create_project(name)
    return f"‚úÖ Project created: {name}\nüìã ID: {current_project_id}"

def upload_figjam(url):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    result = brain.ingest_figjam_url(current_project_id, url)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ FigJam board analyzed!\nüìä Notes processed: {result['notes']}\nüîó Source: {result['source_url']}"

def upload_audio(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_audio_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Audio transcribed!\nüìä Segments analyzed: {result['notes']}"

def upload_doc(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_document_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Document analyzed!\nüìä Sections processed: {result['notes']}"

def analyze():
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    s = brain.synthesize_project(current_project_id)
    
    if 'error' in s:
        return f"‚ùå {s['error']}"
    
    # Format enhanced report
    output = f"""‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë                        PRISM ANALYSIS REPORT                                  ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
PROJECT          {s['project_name']}
GENERATED        {s['generated']}
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
OVERVIEW
  Total Notes           {s['total_notes']}
  Data Sources          {s['total_sources']}
  Contributors          {s['contributors']}
  Avg Confidence        {s['avg_confidence']*100:.1f}%
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
CONTENT DISTRIBUTION
"""
    
    # Sort by count and create bar chart
    type_counts = sorted(s['by_type'].items(), key=lambda x: x[1], reverse=True)
    max_count = max([c for _, c in type_counts]) if type_counts else 1
    
    for content_type, count in type_counts:
        percentage = (count / s['total_notes']) * 100
        bar_length = int((count / max_count) * 43)  # Max 43 chars for bar
        bar = '‚ñà' * bar_length
        output += f"  {content_type.upper():20} {count:4}  {bar} {percentage:.1f}%\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
PRIORITY LEVELS
"""
    for priority in ['high', 'medium', 'low']:
        count = s['by_priority'].get(priority, 0)
        output += f"  {priority.upper():15} {count}\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
TOP THEMES
"""
    if s['themes']:
        for i, theme in enumerate(s['themes'], 1):
            bar_length = min(int(theme['frequency'] / 3), 50)
            bar = '‚ñì' * bar_length
            output += f"  {i}. {theme['name']:30} {theme['frequency']:3}  {bar}\n"
    else:
        output += "  No themes detected (need more data)\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
HIGH PRIORITY ACTIONS
"""
    if s['action_items']:
        for i, item in enumerate(s['action_items'][:8], 1):
            # Truncate text to fit
            text = item['text'][:75] + '...' if len(item['text']) > 75 else item['text']
            output += f"  {i}. [{item['type'].upper()}] {text}\n"
            output += f"      Confidence: {item['confidence']*100:.0f}% | {item['author']}\n"
    else:
        output += "  No high priority items found\n"
    
    output += "‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ"
    
    return output

# BUILD UI
with gr.Blocks(title="PRISM V3") as demo:
    gr.Markdown("# PRISM V3 - Enhanced Research Synthesis\n### Multi-Modal AI Analysis with Improved Classification")
    
    gr.Markdown("## 1. Create Project")
    with gr.Row():
        project_name = gr.Textbox(label="Project Name", placeholder="Enter project name...")
        create_btn = gr.Button("Create", variant="primary")
    create_status = gr.Textbox(label="Status", lines=2)
    
    gr.Markdown("---")
    gr.Markdown("## 2. Upload Data Sources")
    
    with gr.Tab("üìã FigJam Board"):
        figjam_url = gr.Textbox(
            label="FigJam Board URL",
            placeholder="https://www.figma.com/board/..."
        )
        figjam_btn = gr.Button("Analyze Board", variant="primary")
        figjam_status = gr.Textbox(label="Status", lines=3)
    
    with gr.Tab("üéôÔ∏è Audio File"):
        audio_file = gr.File(label="Audio (.mp3, .wav, .mov)")
        audio_btn = gr.Button("Process Audio", variant="primary")
        audio_status = gr.Textbox(label="Status", lines=3)
    
    with gr.Tab("üìÑ Document"):
        doc_file = gr.File(label="Document (.pdf, .txt)")
        doc_btn = gr.Button("Process Document", variant="primary")
        doc_status = gr.Textbox(label="Status", lines=3)
    
    gr.Markdown("---")
    gr.Markdown("## 3. Generate Analysis")
    
    analyze_btn = gr.Button("üîç GENERATE SYNTHESIS REPORT", variant="primary", size="lg")
    
    gr.Markdown("### Analysis Output")
    analysis_output = gr.Textbox(
        label="Synthesis Report",
        lines=35,
        show_copy_button=True,
        elem_classes=["monospace"]
    )
    
    # Wire up events
    create_btn.click(create_project, [project_name], [create_status])
    figjam_btn.click(upload_figjam, [figjam_url], [figjam_status])
    audio_btn.click(upload_audio, [audio_file], [audio_status])
    doc_btn.click(upload_doc, [doc_file], [doc_status])
    analyze_btn.click(analyze, [], [analysis_output])
    
    # Add custom CSS
    demo.css = """
    .monospace textarea {
        font-family: 'Courier New', monospace;
        font-size: 12px;
    }
    """

print("üöÄ Launching PRISM V3...")
demo.launch(share=True)

üé® Building PRISM UI V3...
üì° Connecting to PRISM Brain V3...
‚úÖ Connected to Brain V3
üöÄ Launching PRISM V3...
* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://7d7c0fc217f4cd03ca.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [14]:
# =====================================================
# PRISM BRAIN V3 - FIXED ANALYSIS ENGINE
# =====================================================

import json
import hashlib
from datetime import datetime
from typing import Dict, List
import numpy as np
from collections import Counter, defaultdict
import re
import requests

# For embeddings and clustering
from sentence_transformers import SentenceTransformer
from sklearn.cluster import DBSCAN
from sklearn.feature_extraction.text import TfidfVectorizer

print("üß† Initializing PRISM Brain V3...")

class PRISMBrainV3:
    """Enhanced analysis with proper classification and theme detection"""
    
    def __init__(self, training_data, figjam_token):
        self.projects = {}
        self.figjam_token = figjam_token
        
        # Load training data for pattern learning
        print("üìö Loading training data...")
        self.training_data = training_data
        self._learn_patterns()
        
        # Initialize embeddings model for semantic analysis
        print("ü§ñ Loading embedding model...")
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        
        print("‚úÖ PRISM Brain V3 ready!")
    
    def _learn_patterns(self):
        """Learn content type patterns from training data"""
        self.type_keywords = defaultdict(list)
        self.type_patterns = {}
        
        # Extract keywords per content type from training data
        for board in self.training_data[:100]:  # Sample for speed
            for note in board['notes']:
                # Handle different data structures
                content_type = note.get('content_type') or note.get('type', 'neutral')
                text = note.get('text', '').lower()
                
                if not text:
                    continue
                
                # Extract significant words
                words = [w for w in re.findall(r'\b\w+\b', text) if len(w) > 3]
                self.type_keywords[content_type].extend(words)
        
        # Get most common keywords per type
        for content_type, words in self.type_keywords.items():
            common = Counter(words).most_common(20)
            self.type_patterns[content_type] = [w for w, _ in common]
    
    def create_project(self, name):
        """Create new project"""
        project_id = hashlib.md5(name.encode()).hexdigest()[:12]
        self.projects[project_id] = {
            'name': name,
            'created': datetime.now(),
            'notes': [],
            'sources': []
        }
        return project_id
    
    def ingest_figjam_url(self, project_id, url):
        """Ingest FigJam board from URL"""
        try:
            # Extract file key from URL
            file_key = url.split('/board/')[1].split('?')[0].split('/')[0]
            
            # Fetch from FigJam API
            headers = {'X-Figma-Token': self.figjam_token}
            response = requests.get(
                f'https://api.figma.com/v1/files/{file_key}',
                headers=headers,
                timeout=10
            )
            
            if response.status_code != 200:
                return {'error': f'API error: {response.status_code}'}
            
            data = response.json()
            notes_processed = self._extract_figjam_notes(project_id, data, url)
            
            return {
                'notes': notes_processed,
                'connections': 0,
                'source_url': url
            }
            
        except Exception as e:
            return {'error': str(e)}
    
    def _extract_figjam_notes(self, project_id, figjam_data, source_url):
        """Extract and classify notes from FigJam data"""
        notes_count = 0
        
        def traverse_nodes(node, path=""):
            nonlocal notes_count
            
            # Check if this is a sticky note
            if node.get('type') == 'STICKY':
                text = node.get('characters', '').strip()
                if not text:
                    return
                
                # Extract metadata
                author = node.get('authorName', 'Unknown')
                color = self._normalize_figjam_color(node.get('backgroundColor', {}))
                
                # Classify the note
                classification = self._classify_note(text, color)
                
                # Create note object
                note = {
                    'id': node.get('id', f'note_{notes_count}'),
                    'text': text,
                    'predicted_type': classification['type'],
                    'confidence': classification['confidence'],
                    'priority': classification['priority'],
                    'color': color,
                    'author': author,
                    'source': source_url,
                    'source_type': 'figjam',
                    'timestamp': datetime.now().isoformat()
                }
                
                self.projects[project_id]['notes'].append(note)
                notes_count += 1
            
            # Recurse through children
            for child in node.get('children', []):
                traverse_nodes(child, f"{path}/{node.get('name', 'node')}")
        
        # Start traversal from document root
        traverse_nodes(figjam_data.get('document', {}))
        
        # Add source
        self.projects[project_id]['sources'].append({
            'type': 'figjam',
            'url': source_url,
            'processed': datetime.now().isoformat()
        })
        
        return notes_count
    
    def _normalize_figjam_color(self, bg_color):
        """Normalize FigJam color to standard types"""
        if not bg_color:
            return 'gray'
        
        r = bg_color.get('r', 0)
        g = bg_color.get('g', 0)
        b = bg_color.get('b', 0)
        
        # Convert to closest standard color
        if r > 0.8 and g < 0.3 and b < 0.3:
            return 'red'
        elif r > 0.8 and g > 0.6 and b < 0.3:
            return 'orange'
        elif r > 0.8 and g > 0.8 and b < 0.3:
            return 'yellow'
        elif r < 0.3 and g > 0.6 and b < 0.3:
            return 'green'
        elif r < 0.4 and g < 0.4 and b > 0.7:
            return 'blue'
        elif r > 0.6 and g < 0.4 and b > 0.6:
            return 'purple'
        else:
            return 'gray'
    
    def _classify_note(self, text, color):
        """Classify note content using enhanced logic"""
        text_lower = text.lower()
        
        # Score each content type
        scores = {}
        
        # PAIN_POINT indicators
        pain_words = ['frustrated', 'difficult', 'problem', 'issue', 'struggle', 
                      'confusing', 'annoying', 'broken', 'error', 'fail', 'hard to']
        scores['pain_point'] = sum(1 for w in pain_words if w in text_lower)
        
        # QUESTION indicators
        question_words = ['how', 'what', 'why', 'when', 'where', 'who', 'which', 'could', 'would', 'should']
        scores['question'] = sum(1 for w in question_words if w in text_lower)
        if '?' in text:
            scores['question'] += 2
        
        # IDEA indicators
        idea_words = ['could', 'should', 'what if', 'maybe', 'propose', 'suggest', 
                      'idea:', 'consider', 'alternative', 'potential']
        scores['idea'] = sum(1 for w in idea_words if w in text_lower)
        
        # QUOTE indicators
        if '"' in text or "'" in text or text.startswith('"') or 'said' in text_lower:
            scores['quote'] = 3
        else:
            scores['quote'] = 0
        
        # POSITIVE indicators
        positive_words = ['love', 'great', 'excellent', 'perfect', 'amazing', 
                         'wonderful', 'helpful', 'easy', 'intuitive', 'works well']
        scores['positive'] = sum(1 for w in positive_words if w in text_lower)
        
        # Get highest scoring type
        if max(scores.values()) > 0:
            predicted_type = max(scores, key=scores.get)
            confidence = min(0.95, scores[predicted_type] / 5.0)
        else:
            predicted_type = 'neutral'
            confidence = 0.5
        
        # Determine priority
        if predicted_type == 'pain_point' or scores.get('pain_point', 0) >= 2:
            priority = 'high'
        elif predicted_type in ['question', 'idea']:
            priority = 'medium'
        else:
            priority = 'low'
        
        return {
            'type': predicted_type,
            'confidence': confidence,
            'priority': priority,
            'scores': scores
        }
    
    def ingest_audio_file(self, project_id, file_path):
        """Process audio file with Whisper"""
        try:
            import whisper
            model = whisper.load_model("base")
            result = model.transcribe(file_path)
            
            # Process segments as notes
            for i, segment in enumerate(result['segments']):
                text = segment['text'].strip()
                classification = self._classify_note(text, 'none')
                
                note = {
                    'id': f'audio_{i}',
                    'text': text,
                    'predicted_type': classification['type'],
                    'confidence': classification['confidence'],
                    'priority': classification['priority'],
                    'author': 'Audio Transcript',
                    'source': file_path,
                    'source_type': 'audio',
                    'timestamp': datetime.now().isoformat()
                }
                
                self.projects[project_id]['notes'].append(note)
            
            self.projects[project_id]['sources'].append({
                'type': 'audio',
                'file': file_path,
                'processed': datetime.now().isoformat()
            })
            
            return {'notes': len(result['segments'])}
        except Exception as e:
            return {'error': str(e)}
    
    def ingest_document_file(self, project_id, file_path):
        """Process document file"""
        try:
            if file_path.endswith('.pdf'):
                import PyPDF2
                with open(file_path, 'rb') as f:
                    reader = PyPDF2.PdfReader(f)
                    text = ""
                    for page in reader.pages:
                        text += page.extract_text()
            elif file_path.endswith('.txt'):
                with open(file_path, 'r') as f:
                    text = f.read()
            else:
                return {'error': 'Unsupported format'}
            
            # Split into paragraphs
            paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
            
            for i, para in enumerate(paragraphs):
                classification = self._classify_note(para, 'none')
                
                note = {
                    'id': f'doc_{i}',
                    'text': para,
                    'predicted_type': classification['type'],
                    'confidence': classification['confidence'],
                    'priority': classification['priority'],
                    'author': 'Document',
                    'source': file_path,
                    'source_type': 'document',
                    'timestamp': datetime.now().isoformat()
                }
                
                self.projects[project_id]['notes'].append(note)
            
            self.projects[project_id]['sources'].append({
                'type': 'document',
                'file': file_path,
                'processed': datetime.now().isoformat()
            })
            
            return {'notes': len(paragraphs)}
        except Exception as e:
            return {'error': str(e)}
    
    def synthesize_project(self, project_id):
        """Generate comprehensive analysis with ENHANCED theme detection"""
        if project_id not in self.projects:
            return {'error': 'Project not found'}
        
        project = self.projects[project_id]
        notes = project['notes']
        
        if not notes:
            return {'error': 'No notes to analyze'}
        
        # Basic stats
        total_notes = len(notes)
        contributors = set(n['author'] for n in notes)
        
        # Content distribution
        by_type = defaultdict(list)
        for note in notes:
            by_type[note['predicted_type']].append(note)
        
        # Priority distribution
        by_priority = defaultdict(list)
        for note in notes:
            by_priority[note['priority']].append(note)
        
        # ENHANCED THEME DETECTION using embeddings and clustering
        themes = self._detect_themes(notes)
        
        # High priority action items (deduplicated)
        high_priority = by_priority.get('high', [])
        action_items = []
        seen_texts = set()
        
        for note in high_priority[:15]:  # Limit to top 15
            text_key = note['text'][:50].lower()
            if text_key not in seen_texts:
                action_items.append({
                    'type': note['predicted_type'],
                    'text': note['text'],
                    'confidence': note['confidence'],
                    'author': note['author']
                })
                seen_texts.add(text_key)
        
        # Calculate average confidence
        avg_confidence = np.mean([n['confidence'] for n in notes])
        
        return {
            'project_name': project['name'],
            'generated': datetime.now().isoformat(),
            'total_notes': total_notes,
            'total_sources': len(project['sources']),
            'contributors': len(contributors),
            'avg_confidence': avg_confidence,
            'by_type': {t: len(notes) for t, notes in by_type.items()},
            'by_priority': {p: len(notes) for p, notes in by_priority.items()},
            'themes': themes,
            'action_items': action_items
        }
    
    def _detect_themes(self, notes, min_cluster_size=3):
        """Detect themes using TF-IDF and clustering"""
        if len(notes) < min_cluster_size:
            return []
        
        # Extract texts
        texts = [n['text'] for n in notes]
        
        # Use TF-IDF for theme extraction
        try:
            vectorizer = TfidfVectorizer(
                max_features=100,
                stop_words='english',
                ngram_range=(1, 2),
                min_df=2
            )
            
            tfidf_matrix = vectorizer.fit_transform(texts)
            feature_names = vectorizer.get_feature_names_out()
            
            # Get embeddings for clustering
            embeddings = self.embedding_model.encode(texts)
            
            # Cluster similar notes
            clustering = DBSCAN(eps=0.5, min_samples=min_cluster_size).fit(embeddings)
            
            # Extract themes from clusters
            themes = []
            for cluster_id in set(clustering.labels_):
                if cluster_id == -1:  # Skip noise
                    continue
                
                # Get notes in this cluster
                cluster_indices = [i for i, label in enumerate(clustering.labels_) if label == cluster_id]
                cluster_texts = [texts[i] for i in cluster_indices]
                
                # Find top terms for this cluster
                cluster_tfidf = vectorizer.transform(cluster_texts)
                scores = np.asarray(cluster_tfidf.sum(axis=0)).flatten()
                top_indices = scores.argsort()[-3:][::-1]
                theme_terms = [feature_names[i] for i in top_indices]
                
                themes.append({
                    'name': ' + '.join(theme_terms).title(),
                    'frequency': len(cluster_indices),
                    'example': cluster_texts[0][:100]
                })
            
            # Sort by frequency
            themes.sort(key=lambda x: x['frequency'], reverse=True)
            return themes[:5]
            
        except Exception as e:
            print(f"Theme detection error: {e}")
            # Fallback: simple keyword frequency
            all_text = ' '.join(texts).lower()
            words = re.findall(r'\b\w{4,}\b', all_text)
            common = Counter(words).most_common(5)
            return [{'name': word.title(), 'frequency': count} for word, count in common]

üß† Initializing PRISM Brain V3...


In [15]:
# =====================================================
# GRADIO UI V3 - ENHANCED ANALYSIS OUTPUT
# NOTE: Run the PRISM Brain V3 cell FIRST before this cell
# =====================================================

import gradio as gr
from datetime import datetime

print("üé® Building PRISM UI V3...")

# Reference to Brain V3 (must be initialized in a previous cell)
# The brain variable should be available from the previous cell
print("üì° Connecting to PRISM Brain V3...")
try:
    # Test if brain exists by trying to access it
    test = brain
    print("‚úÖ Connected to Brain V3")
except NameError:
    raise RuntimeError("‚ùå Brain V3 not found! Run the Brain V3 initialization cell first.")


current_project_id = None

# =====================================================
# UI FUNCTIONS
# =====================================================

def create_project(name):
    global current_project_id
    if not name:
        return "‚ùå Enter project name"
    current_project_id = brain.create_project(name)
    return f"‚úÖ Project created: {name}\nüìã ID: {current_project_id}"

def upload_figjam(url):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    result = brain.ingest_figjam_url(current_project_id, url)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ FigJam board analyzed!\nüìä Notes processed: {result['notes']}\nüîó Source: {result['source_url']}"

def upload_audio(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_audio_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Audio transcribed!\nüìä Segments analyzed: {result['notes']}"

def upload_doc(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_document_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Document analyzed!\nüìä Sections processed: {result['notes']}"

def analyze():
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    s = brain.synthesize_project(current_project_id)
    
    if 'error' in s:
        return f"‚ùå {s['error']}"
    
    # Format comprehensive research debrief
    exec_sum = s['executive_summary']
    
    output = f"""‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë                     PRISM RESEARCH DEBRIEF REPORT                             ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
PROJECT          {s['project_name']}
GENERATED        {s['generated']}
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà EXECUTIVE SUMMARY

Research Focus:     {exec_sum['focus_area']}
Research Type:      {exec_sum['research_type']}
Participants:       {exec_sum['participant_count']} contributor(s)
Data Points:        {s['total_notes']} notes from {s['total_sources']} source(s)

Primary Concern:
  "{exec_sum['main_concern'][:200]}"

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà KEY INSIGHTS

"""
    
    # Display key insights
    if s['key_insights']:
        for i, insight in enumerate(s['key_insights'], 1):
            if insight['type'] == 'pain_point':
                output += f"  üî¥ PAIN POINT (mentioned {insight['frequency']}x)\n"
            else:
                output += f"  üí° {insight['type'].upper()}\n"
            output += f"     {insight['insight']}\n\n"
    else:
        output += "  No major insights detected\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà THEMATIC ANALYSIS

"""
    if s['themes']:
        for i, theme in enumerate(s['themes'], 1):
            bar_length = min(int(theme['frequency'] / 3), 40)
            bar = '‚ñì' * bar_length
            output += f"  {i}. {theme['name']}\n"
            output += f"     Frequency: {theme['frequency']} mentions {bar}\n"
            if 'example' in theme:
                example = theme['example'][:80] + '...' if len(theme['example']) > 80 else theme['example']
                output += f"     Example: \"{example}\"\n"
            output += "\n"
    else:
        output += "  No themes detected (need more data)\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà USER PAIN POINTS & CHALLENGES

"""
    if s['action_items']:
        for i, item in enumerate(s['action_items'][:8], 1):
            output += f"  {i}. {item['text']}\n"
            output += f"     ‚Äî {item['author']} (confidence: {item['confidence']*100:.0f}%)\n\n"
    else:
        output += "  No critical pain points identified\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

# BUILD UI
with gr.Blocks(title="PRISM V3") as demo:
    gr.Markdown("# PRISM V3 - Enhanced Research Synthesis\n### Multi-Modal AI Analysis with Improved Classification")
    
    gr.Markdown("## 1. Create Project")
    with gr.Row():
        project_name = gr.Textbox(label="Project Name", placeholder="Enter project name...")
        create_btn = gr.Button("Create", variant="primary")
    create_status = gr.Textbox(label="Status", lines=2)
    
    gr.Markdown("---")
    gr.Markdown("## 2. Upload Data Sources")
    
    with gr.Tab("üìã FigJam Board"):
        figjam_url = gr.Textbox(
            label="FigJam Board URL",
            placeholder="https://www.figma.com/board/..."
        )
        figjam_btn = gr.Button("Analyze Board", variant="primary")
        figjam_status = gr.Textbox(label="Status", lines=3)
    
    with gr.Tab("üéôÔ∏è Audio File"):
        audio_file = gr.File(label="Audio (.mp3, .wav, .mov)")
        audio_btn = gr.Button("Process Audio", variant="primary")
        audio_status = gr.Textbox(label="Status", lines=3)
    
    with gr.Tab("üìÑ Document"):
        doc_file = gr.File(label="Document (.pdf, .txt)")
        doc_btn = gr.Button("Process Document", variant="primary")
        doc_status = gr.Textbox(label="Status", lines=3)
    
    gr.Markdown("---")
    gr.Markdown("## 3. Generate Analysis")
    
    analyze_btn = gr.Button("üîç GENERATE SYNTHESIS REPORT", variant="primary", size="lg")
    
    gr.Markdown("### Analysis Output")
    analysis_output = gr.Textbox(
        label="Synthesis Report",
        lines=35,
        show_copy_button=True,
        elem_classes=["monospace"]
    )
    
    # Wire up events
    create_btn.click(create_project, [project_name], [create_status])
    figjam_btn.click(upload_figjam, [figjam_url], [figjam_status])
    audio_btn.click(upload_audio, [audio_file], [audio_status])
    doc_btn.click(upload_doc, [doc_file], [doc_status])
    analyze_btn.click(analyze, [], [analysis_output])
    
    # Add custom CSS
    demo.css = """
    .monospace textarea {
        font-family: 'Courier New', monospace;
        font-size: 12px;
    }
    """

print("üöÄ Launching PRISM V3...")
demo.launch(share=True)

SyntaxError: invalid decimal literal (76586501.py, line 203)

In [16]:
# =====================================================
# GRADIO UI V3 - ENHANCED ANALYSIS OUTPUT
# NOTE: Run the PRISM Brain V3 cell FIRST before this cell
# =====================================================

import gradio as gr
from datetime import datetime

print("üé® Building PRISM UI V3...")

# Reference to Brain V3 (must be initialized in a previous cell)
# The brain variable should be available from the previous cell
print("üì° Connecting to PRISM Brain V3...")
try:
    # Test if brain exists by trying to access it
    test = brain
    print("‚úÖ Connected to Brain V3")
except NameError:
    raise RuntimeError("‚ùå Brain V3 not found! Run the Brain V3 initialization cell first.")


current_project_id = None

# =====================================================
# UI FUNCTIONS
# =====================================================

def create_project(name):
    global current_project_id
    if not name:
        return "‚ùå Enter project name"
    current_project_id = brain.create_project(name)
    return f"‚úÖ Project created: {name}\nüìã ID: {current_project_id}"

def upload_figjam(url):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    result = brain.ingest_figjam_url(current_project_id, url)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ FigJam board analyzed!\nüìä Notes processed: {result['notes']}\nüîó Source: {result['source_url']}"

def upload_audio(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_audio_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Audio transcribed!\nüìä Segments analyzed: {result['notes']}"

def upload_doc(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_document_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Document analyzed!\nüìä Sections processed: {result['notes']}"

def analyze():
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    s = brain.synthesize_project(current_project_id)
    
    if 'error' in s:
        return f"‚ùå {s['error']}"
    
    # Format comprehensive research debrief
    exec_sum = s['executive_summary']
    
    output = f"""‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë                     PRISM RESEARCH DEBRIEF REPORT                             ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
PROJECT          {s['project_name']}
GENERATED        {s['generated']}
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà EXECUTIVE SUMMARY

Research Focus:     {exec_sum['focus_area']}
Research Type:      {exec_sum['research_type']}
Participants:       {exec_sum['participant_count']} contributor(s)
Data Points:        {s['total_notes']} notes from {s['total_sources']} source(s)

Primary Concern:
  "{exec_sum['main_concern'][:200]}"

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà KEY INSIGHTS

"""
    
    # Display key insights
    if s['key_insights']:
        for i, insight in enumerate(s['key_insights'], 1):
            if insight['type'] == 'pain_point':
                output += f"  üî¥ PAIN POINT (mentioned {insight['frequency']}x)\n"
            else:
                output += f"  üí° {insight['type'].upper()}\n"
            output += f"     {insight['insight']}\n\n"
    else:
        output += "  No major insights detected\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà THEMATIC ANALYSIS

"""
    if s['themes']:
        for i, theme in enumerate(s['themes'], 1):
            bar_length = min(int(theme['frequency'] / 3), 40)
            bar = '‚ñì' * bar_length
            output += f"  {i}. {theme['name']}\n"
            output += f"     Frequency: {theme['frequency']} mentions {bar}\n"
            if 'example' in theme:
                example = theme['example'][:80] + '...' if len(theme['example']) > 80 else theme['example']
                output += f"     Example: \"{example}\"\n"
            output += "\n"
    else:
        output += "  No themes detected (need more data)\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà USER PAIN POINTS & CHALLENGES

"""
    if s['action_items']:
        for i, item in enumerate(s['action_items'][:8], 1):
            output += f"  {i}. {item['text']}\n"
            output += f"     ‚Äî {item['author']} (confidence: {item['confidence']*100:.0f}%)\n\n"
    else:
        output += "  No critical pain points identified\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà KEY QUESTIONS RAISED

"""
    if s['questions']:
        for i, q in enumerate(s['questions'][:5], 1):
            text = q['text'][:150] + '...' if len(q['text']) > 150 else q['text']
            output += f"  {i}. {text}\n"
            output += f"     ‚Äî {q['author']}\n\n"
    else:
        output += "  No questions documented\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà IDEAS & OPPORTUNITIES

"""
    if s['ideas']:
        for i, idea in enumerate(s['ideas'][:5], 1):
            text = idea['text'][:150] + '...' if len(idea['text']) > 150 else idea['text']
            output += f"  {i}. {text}\n"
            output += f"     ‚Äî {idea['author']}\n\n"
    else:
        output += "  No ideas captured\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà SUPPORTING QUOTES

"""
    if s['quotes']:
        for i, quote in enumerate(s['quotes'][:5], 1):
            text = quote['text'][:150] + '...' if len(quote['text']) > 150 else quote['text']
            output += f"  {i}. \"{text}\"\n"
            output += f"     ‚Äî {quote['author']}\n\n"
    else:
        output += "  No direct quotes captured\n"
    
    output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà DATA BREAKDOWN

Content Distribution:
"""
    
    # Sort by count and create bar chart
    type_counts = sorted(s['by_type'].items(), key=lambda x: x[1], reverse=True)
    max_count = max([c for _, c in type_counts]) if type_counts else 1
    
    for content_type, count in type_counts:
        percentage = (count / s['total_notes']) * 100
        bar_length = int((count / max_count) * 30)
        bar = '‚ñà' * bar_length
        output += f"  {content_type.upper():15} {count:4} {bar} {percentage:.1f}%\n"
    
    output += f"""
Priority Distribution:
"""
    for priority in ['high', 'medium', 'low']:
        count = s['by_priority'].get(priority, 0)
        percentage = (count / s['total_notes']) * 100 if s['total_notes'] > 0 else 0
        output += f"  {priority.upper():15} {count:4} ({percentage:.1f}%)\n"
    
    output += f"""
Analysis Confidence: {s['avg_confidence']*100:.1f}%

‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
                              END OF REPORT
‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
"""
    
    return output

# BUILD UI
with gr.Blocks(title="PRISM V3") as demo:
    gr.Markdown("# PRISM V3 - Enhanced Research Synthesis\n### Multi-Modal AI Analysis with Improved Classification")
    
    gr.Markdown("## 1. Create Project")
    with gr.Row():
        project_name = gr.Textbox(label="Project Name", placeholder="Enter project name...")
        create_btn = gr.Button("Create", variant="primary")
    create_status = gr.Textbox(label="Status", lines=2)
    
    gr.Markdown("---")
    gr.Markdown("## 2. Upload Data Sources")
    
    with gr.Tab("üìã FigJam Board"):
        figjam_url = gr.Textbox(
            label="FigJam Board URL",
            placeholder="https://www.figma.com/board/..."
        )
        figjam_btn = gr.Button("Analyze Board", variant="primary")
        figjam_status = gr.Textbox(label="Status", lines=3)
    
    with gr.Tab("üéôÔ∏è Audio File"):
        audio_file = gr.File(label="Audio (.mp3, .wav, .mov)")
        audio_btn = gr.Button("Process Audio", variant="primary")
        audio_status = gr.Textbox(label="Status", lines=3)
    
    with gr.Tab("üìÑ Document"):
        doc_file = gr.File(label="Document (.pdf, .txt)")
        doc_btn = gr.Button("Process Document", variant="primary")
        doc_status = gr.Textbox(label="Status", lines=3)
    
    gr.Markdown("---")
    gr.Markdown("## 3. Generate Analysis")
    
    analyze_btn = gr.Button("üîç GENERATE SYNTHESIS REPORT", variant="primary", size="lg")
    
    gr.Markdown("### Analysis Output")
    analysis_output = gr.Textbox(
        label="Synthesis Report",
        lines=35,
        show_copy_button=True,
        elem_classes=["monospace"]
    )
    
    # Wire up events
    create_btn.click(create_project, [project_name], [create_status])
    figjam_btn.click(upload_figjam, [figjam_url], [figjam_status])
    audio_btn.click(upload_audio, [audio_file], [audio_status])
    doc_btn.click(upload_doc, [doc_file], [doc_status])
    analyze_btn.click(analyze, [], [analysis_output])

print("üöÄ Launching PRISM V3...")
demo.launch(share=True)

üé® Building PRISM UI V3...
üì° Connecting to PRISM Brain V3...
‚úÖ Connected to Brain V3
üöÄ Launching PRISM V3...
* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://739022d040a4b5b049.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Theme detection error: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`


Traceback (most recent call last):
  File "/opt/conda/lib/python3.12/site-packages/gradio/queueing.py", line 759, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/gradio/route_utils.py", line 354, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/gradio/blocks.py", line 2116, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/gradio/blocks.py", line 1623, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^

Theme detection error: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`


Traceback (most recent call last):
  File "/opt/conda/lib/python3.12/site-packages/gradio/queueing.py", line 759, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/gradio/route_utils.py", line 354, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/gradio/blocks.py", line 2116, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/gradio/blocks.py", line 1623, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^

In [17]:
# Load training data
import json

print("üìö Loading training data...")
with open('/home/jovyan/local/DeepLearning/synthetic_data/full_10k/training_data.json', 'r') as f:
    training_data = json.load(f)

print(f"‚úÖ Loaded {len(training_data)} boards")

# Your FigJam token
figjam_token = "figd_YP-yLbvxZ0jOVR9C54bCPveiHdkFB3uZD7hKQKDF"

# Create Brain V3 instance - passing empty list to skip pattern learning
brain = PRISMBrainV3([], figjam_token)
brain.training_data = training_data

# Force CPU usage for embeddings to avoid CUDA errors
brain.embedding_model = brain.embedding_model.to('cpu')

print("‚úÖ Brain V3 initialized!")
print(f"Brain type: {type(brain)}")
print(f"Brain has {len(brain.projects)} projects")

üìö Loading training data...
‚úÖ Loaded 10000 boards
üìö Loading training data...
ü§ñ Loading embedding model...
‚úÖ PRISM Brain V3 ready!
‚úÖ Brain V3 initialized!
Brain type: <class '__main__.PRISMBrainV3'>
Brain has 0 projects


In [18]:
# =====================================================
# GRADIO UI FOR PRISM BRAIN V3
# =====================================================

import gradio as gr
from datetime import datetime

print("üé® Building PRISM UI...")

# Check brain exists
try:
    test = brain
    print("‚úÖ Connected to Brain V3")
except NameError:
    raise RuntimeError("‚ùå Brain V3 not found! Run the Brain V3 initialization cell first.")

current_project_id = None

# =====================================================
# UI FUNCTIONS
# =====================================================

def create_project(name):
    global current_project_id
    if not name:
        return "‚ùå Enter project name"
    current_project_id = brain.create_project(name)
    return f"‚úÖ Project created: {name}\nüìã ID: {current_project_id}"

def upload_figjam(url):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    result = brain.ingest_figjam_url(current_project_id, url)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ FigJam board analyzed!\nüìä Notes processed: {result['notes']}\nüîó Source: {result['source_url']}"

def upload_audio(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_audio_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Audio transcribed!\nüìä Segments analyzed: {result['notes']}"

def upload_doc(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_document_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Document analyzed!\nüìä Sections processed: {result['notes']}"

def analyze():
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    try:
        s = brain.synthesize_project(current_project_id)
        
        if 'error' in s:
            return f"‚ùå {s['error']}"
        
        # Format comprehensive research debrief
        exec_sum = s.get('executive_summary', {})
        
        output = f"""‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë                     PRISM RESEARCH DEBRIEF REPORT                             ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
PROJECT          {s['project_name']}
GENERATED        {s['generated']}
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà EXECUTIVE SUMMARY

Research Focus:     {exec_sum.get('focus_area', 'Unknown')}
Research Type:      {exec_sum.get('research_type', 'Unknown')}
Participants:       {exec_sum.get('participant_count', 0)} contributor(s)
Data Points:        {s['total_notes']} notes from {s['total_sources']} source(s)

Primary Concern:
  "{exec_sum.get('main_concern', 'No concerns identified')[:200]}"

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà KEY INSIGHTS

"""
        
        # Display key insights
        if s.get('key_insights'):
            for i, insight in enumerate(s['key_insights'], 1):
                if insight['type'] == 'pain_point':
                    output += f"  üî¥ PAIN POINT (mentioned {insight['frequency']}x)\n"
                else:
                    output += f"  üí° {insight['type'].upper()}\n"
                output += f"     {insight['insight']}\n\n"
        else:
            output += "  No major insights detected\n"
        
        output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà THEMATIC ANALYSIS

"""
        if s.get('themes'):
            for i, theme in enumerate(s['themes'], 1):
                bar_length = min(int(theme['frequency'] / 3), 40)
                bar = '‚ñì' * bar_length
                output += f"  {i}. {theme['name']}\n"
                output += f"     Frequency: {theme['frequency']} mentions {bar}\n"
                if 'example' in theme:
                    example = theme['example'][:80] + '...' if len(theme['example']) > 80 else theme['example']
                    output += f"     Example: \"{example}\"\n"
                output += "\n"
        else:
            output += "  No themes detected (need more data)\n"
        
        output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà USER PAIN POINTS & CHALLENGES

"""
        if s.get('action_items'):
            for i, item in enumerate(s['action_items'][:8], 1):
                output += f"  {i}. {item['text']}\n"
                output += f"     ‚Äî {item['author']} (confidence: {item['confidence']*100:.0f}%)\n\n"
        else:
            output += "  No critical pain points identified\n"
        
        output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà KEY QUESTIONS RAISED

"""
        if s.get('questions'):
            for i, q in enumerate(s['questions'][:5], 1):
                text = q['text'][:150] + '...' if len(q['text']) > 150 else q['text']
                output += f"  {i}. {text}\n"
                output += f"     ‚Äî {q['author']}\n\n"
        else:
            output += "  No questions documented\n"
        
        output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà IDEAS & OPPORTUNITIES

"""
        if s.get('ideas'):
            for i, idea in enumerate(s['ideas'][:5], 1):
                text = idea['text'][:150] + '...' if len(idea['text']) > 150 else idea['text']
                output += f"  {i}. {text}\n"
                output += f"     ‚Äî {idea['author']}\n\n"
        else:
            output += "  No ideas captured\n"
        
        output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà SUPPORTING QUOTES

"""
        if s.get('quotes'):
            for i, quote in enumerate(s['quotes'][:5], 1):
                text = quote['text'][:150] + '...' if len(quote['text']) > 150 else quote['text']
                output += f"  {i}. \"{text}\"\n"
                output += f"     ‚Äî {quote['author']}\n\n"
        else:
            output += "  No direct quotes captured\n"
        
        output += f"""‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚ñà DATA BREAKDOWN

Content Distribution:
"""
        
        # Sort by count and create bar chart
        type_counts = sorted(s['by_type'].items(), key=lambda x: x[1], reverse=True)
        max_count = max([c for _, c in type_counts]) if type_counts else 1
        
        for content_type, count in type_counts:
            percentage = (count / s['total_notes']) * 100
            bar_length = int((count / max_count) * 30)
            bar = '‚ñà' * bar_length
            output += f"  {content_type.upper():15} {count:4} {bar} {percentage:.1f}%\n"
        
        output += f"""
Priority Distribution:
"""
        for priority in ['high', 'medium', 'low']:
            count = s['by_priority'].get(priority, 0)
            percentage = (count / s['total_notes']) * 100 if s['total_notes'] > 0 else 0
            output += f"  {priority.upper():15} {count:4} ({percentage:.1f}%)\n"
        
        output += f"""
Analysis Confidence: {s['avg_confidence']*100:.1f}%

‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
                              END OF REPORT
‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
"""
        
        return output
    
    except Exception as e:
        return f"‚ùå Error generating report: {str(e)}\n\nPlease check the console for details."

# BUILD UI
with gr.Blocks(title="PRISM V3") as demo:
    gr.Markdown("# PRISM V3 - Enhanced Research Synthesis\n### Multi-Modal AI Analysis with Comprehensive Debriefs")
    
    gr.Markdown("## 1. Create Project")
    with gr.Row():
        project_name = gr.Textbox(label="Project Name", placeholder="Enter project name...")
        create_btn = gr.Button("Create", variant="primary")
    create_status = gr.Textbox(label="Status", lines=2)
    
    gr.Markdown("---")
    gr.Markdown("## 2. Upload Data Sources")
    
    with gr.Tab("üìã FigJam Board"):
        figjam_url = gr.Textbox(
            label="FigJam Board URL",
            placeholder="https://www.figma.com/board/..."
        )
        figjam_btn = gr.Button("Analyze Board", variant="primary")
        figjam_status = gr.Textbox(label="Status", lines=3)
    
    with gr.Tab("üéôÔ∏è Audio File"):
        audio_file = gr.File(label="Audio (.mp3, .wav, .mov)")
        audio_btn = gr.Button("Process Audio", variant="primary")
        audio_status = gr.Textbox(label="Status", lines=3)
    
    with gr.Tab("üìÑ Document"):
        doc_file = gr.File(label="Document (.pdf, .txt)")
        doc_btn = gr.Button("Process Document", variant="primary")
        doc_status = gr.Textbox(label="Status", lines=3)
    
    gr.Markdown("---")
    gr.Markdown("## 3. Generate Analysis")
    
    analyze_btn = gr.Button("üîç GENERATE RESEARCH DEBRIEF", variant="primary", size="lg")
    
    gr.Markdown("### Research Debrief Report")
    analysis_output = gr.Textbox(
        label="Comprehensive Analysis",
        lines=40,
        show_copy_button=True
    )
    
    # Wire up events
    create_btn.click(create_project, [project_name], [create_status])
    figjam_btn.click(upload_figjam, [figjam_url], [figjam_status])
    audio_btn.click(upload_audio, [audio_file], [audio_status])
    doc_btn.click(upload_doc, [doc_file], [doc_status])
    analyze_btn.click(analyze, [], [analysis_output])

print("üöÄ Launching PRISM V3...")
demo.launch(share=True)

üé® Building PRISM UI...
‚úÖ Connected to Brain V3
üöÄ Launching PRISM V3...
* Running on local URL:  http://127.0.0.1:7863
* Running on public URL: https://a4f1a05455c8b8c3ec.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


