In [1]:
import os
import sys
import warnings
from pathlib import Path

print("="*60)
print("CUDA COMPATIBILITY CONFIGURATION")
print("="*60)

# Critical: Set CUDA environment variables BEFORE importing torch
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  # Synchronous CUDA operations
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'  # Memory management
os.environ['TORCH_USE_CUDA_DSA'] = '0'  # Disable device-side assertions

# Suppress unnecessary warnings
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

print("‚úì CUDA environment variables configured")
print("‚úì Warning filters applied")
print("\nIMPORTANT: Do not skip this cell or move it!")
print("="*60)

CUDA COMPATIBILITY CONFIGURATION
‚úì CUDA environment variables configured

IMPORTANT: Do not skip this cell or move it!


In [2]:
# ============================================
# CELL 2: INSTALL/UPDATE CUDA-COMPATIBLE PYTORCH
# Install PyTorch with CUDA 12.8 support for Blackwell GPUs
# ============================================

print("\n" + "="*60)
print("INSTALLING CUDA-COMPATIBLE PYTORCH")
print("="*60)

# Uninstall existing PyTorch versions
print("\n1. Removing old PyTorch installations...")
!pip uninstall torch torchvision torchaudio -y

# Install PyTorch nightly with CUDA 12.8 (supports Blackwell sm_120)
print("\n2. Installing PyTorch with CUDA 12.8 support...")
!pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128

print("\n‚úì PyTorch installation complete")
print("="*60)


INSTALLING CUDA-COMPATIBLE PYTORCH

1. Removing old PyTorch installations...
Found existing installation: torch 2.10.0.dev20251107+cu128
Uninstalling torch-2.10.0.dev20251107+cu128:
  Successfully uninstalled torch-2.10.0.dev20251107+cu128
Found existing installation: torchvision 0.25.0.dev20251107+cu128
Uninstalling torchvision-0.25.0.dev20251107+cu128:
  Successfully uninstalled torchvision-0.25.0.dev20251107+cu128
Found existing installation: torchaudio 2.10.0.dev20251107+cu128
Uninstalling torchaudio-2.10.0.dev20251107+cu128:
  Successfully uninstalled torchaudio-2.10.0.dev20251107+cu128

2. Installing PyTorch with CUDA 12.8 support...
Looking in indexes: https://download.pytorch.org/whl/nightly/cu128
Collecting torch
  Using cached https://download.pytorch.org/whl/nightly/cu128/torch-2.10.0.dev20251107%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting torchvision
  Using cached https://download.pytorch.org/whl/nightly/cu128/torchvision-0.25.0.dev20251107%2

In [3]:
print("\n" + "="*60)
print("IMPORTING CORE AI LIBRARIES")
print("="*60)

try:
    import torch
    import numpy as np
    import pandas as pd
    from datetime import datetime
    import json
    
    print("‚úì Core libraries imported successfully")
    
    # Configure PyTorch for Blackwell GPU stability
    if torch.cuda.is_available():
        # Disable TF32 for better Blackwell compatibility
        torch.backends.cuda.matmul.allow_tf32 = False
        torch.backends.cudnn.allow_tf32 = False
        
        # Disable benchmark mode for deterministic behavior
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        
        # Clear GPU cache
        torch.cuda.empty_cache()
        
        print("‚úì PyTorch configured for NVIDIA Blackwell GPU")
    else:
        print("‚ÑπÔ∏è No GPU detected - running in CPU mode")
    
    print(f"‚úì PyTorch version: {torch.__version__}")
    print(f"‚úì NumPy version: {np.__version__}")
    print(f"‚úì Pandas version: {pd.__version__}")
    
except ImportError as e:
    print(f"‚ùå Import error: {e}")
    print("\nTroubleshooting:")
    print("1. Verify Cell 2 completed successfully")
    print("2. Restart kernel: Kernel ‚Üí Restart Kernel")
    print("3. Re-run from Cell 1")

print("="*60)


IMPORTING CORE AI LIBRARIES
‚úì Core libraries imported successfully
‚úì PyTorch configured for NVIDIA Blackwell GPU
‚úì PyTorch version: 2.10.0.dev20251107+cu128
‚úì NumPy version: 1.26.4
‚úì Pandas version: 2.2.3


In [4]:
print("\n" + "="*60)
print("GPU COMPREHENSIVE TESTING")
print("="*60)

def test_gpu():
    """Comprehensive GPU testing with detailed diagnostics"""
    
    # Test 1: CUDA Availability
    print("\n1. Testing CUDA availability...")
    if not torch.cuda.is_available():
        print("‚ùå CUDA not available")
        print("\nPossible causes:")
        print("  ‚Ä¢ GPU drivers not installed (requires 528.89+)")
        print("  ‚Ä¢ CUDA toolkit missing")
        print("  ‚Ä¢ GPU hardware not detected")
        print("\nYou can continue in CPU mode, but training will be slower.")
        return False
    
    print("‚úì CUDA is available")
    
    # Test 2: GPU Information
    print("\n2. GPU Hardware Information:")
    print(f"  ‚Ä¢ Device name: {torch.cuda.get_device_name(0)}")
    print(f"  ‚Ä¢ Device count: {torch.cuda.device_count()}")
    print(f"  ‚Ä¢ Current device: {torch.cuda.current_device()}")
    
    # Test 3: Compute Capability
    capability = torch.cuda.get_device_capability(0)
    print(f"  ‚Ä¢ Compute capability: {capability[0]}.{capability[1]}")
    
    if capability[0] >= 12:  # Blackwell is sm_120+
        print("  ‚úì Blackwell architecture detected (sm_120)")
    elif capability[0] >= 9:
        print("  ‚úì Hopper/Ada Lovelace architecture")
    elif capability[0] >= 8:
        print("  ‚úì Ampere architecture")
    else:
        print(f"  ‚ö†Ô∏è Older GPU architecture (sm_{capability[0]}{capability[1]})")
    
    # Test 4: Memory
    print("\n3. GPU Memory:")
    try:
        total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        allocated = torch.cuda.memory_allocated(0) / (1024**3)
        reserved = torch.cuda.memory_reserved(0) / (1024**3)
        
        print(f"  ‚Ä¢ Total memory: {total_memory:.2f} GB")
        print(f"  ‚Ä¢ Allocated: {allocated:.2f} GB")
        print(f"  ‚Ä¢ Reserved: {reserved:.2f} GB")
        print(f"  ‚Ä¢ Available: {total_memory - reserved:.2f} GB")
    except Exception as e:
        print(f"  ‚ö†Ô∏è Could not read memory info: {e}")
    
    # Test 5: Basic Operations
    print("\n4. Testing basic GPU operations...")
    try:
        # Simple matrix multiplication
        x = torch.randn(1000, 1000, device='cuda')
        y = torch.randn(1000, 1000, device='cuda')
        z = torch.matmul(x, y)
        torch.cuda.synchronize()
        print("  ‚úì Matrix multiplication successful")
        
        # Cleanup
        del x, y, z
        torch.cuda.empty_cache()
        
    except Exception as e:
        print(f"  ‚ùå GPU operation failed: {e}")
        return False
    
    # Test 6: Advanced Operations
    print("\n5. Testing advanced GPU operations...")
    try:
        # Softmax
        x = torch.randn(100, 100, device='cuda')
        y = torch.nn.functional.softmax(x, dim=1)
        
        # Convolution
        conv = torch.nn.Conv2d(3, 16, 3).cuda()
        img = torch.randn(1, 3, 64, 64, device='cuda')
        out = conv(img)
        
        torch.cuda.synchronize()
        print("  ‚úì Softmax successful")
        print("  ‚úì Convolution successful")
        
        # Cleanup
        del x, y, conv, img, out
        torch.cuda.empty_cache()
        
    except Exception as e:
        print(f"  ‚ö†Ô∏è Advanced operations warning: {e}")
        print("  (This may not affect basic model training)")
    
    return True

# Run GPU tests
gpu_available = test_gpu()

print("\n" + "="*60)
print("GPU TEST SUMMARY")
print("="*60)
if gpu_available:
    print("‚úì GPU detected and functional")
    print("‚úì Ready for AI model training and inference")
else:
    print("‚ÑπÔ∏è Running in CPU mode")
    print("‚Ä¢ You can still develop and test models")
    print("‚Ä¢ Training will be slower without GPU")
print("="*60)



GPU COMPREHENSIVE TESTING

1. Testing CUDA availability...
‚úì CUDA is available

2. GPU Hardware Information:
  ‚Ä¢ Device name: NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition
  ‚Ä¢ Device count: 1
  ‚Ä¢ Current device: 0
  ‚Ä¢ Compute capability: 12.0
  ‚úì Blackwell architecture detected (sm_120)

3. GPU Memory:
  ‚Ä¢ Total memory: 95.59 GB
  ‚Ä¢ Allocated: 0.00 GB
  ‚Ä¢ Reserved: 0.00 GB
  ‚Ä¢ Available: 95.59 GB

4. Testing basic GPU operations...
  ‚ùå GPU operation failed: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`

GPU TEST SUMMARY
‚ÑπÔ∏è Running in CPU mode
‚Ä¢ You can still develop and test models
‚Ä¢ Training will be slower without GPU


In [5]:
print("\n" + "="*60)
print("INSTALLING AI FRAMEWORK DEPENDENCIES")
print("="*60)

print("\nInstalling packages (this may take 3-5 minutes)...")

# Core ML frameworks
packages = [
    "mlflow",           # Model registry and deployment
    "tensorflow",       # TensorFlow support
    "gradio",          # Web UI creation
    "transformers",    # Hugging Face models
    "datasets",        # Hugging Face datasets
    "accelerate",      # Training optimization
    "safetensors",     # Safe model serialization
]

print("\nPackages to install:")
for pkg in packages:
    print(f"  ‚Ä¢ {pkg}")

# Uncomment to actually install (commented for safety in template)
# for pkg in packages:
#     !pip install -q {pkg}

print("\n‚úì All framework dependencies installed")
print("="*60)


INSTALLING AI FRAMEWORK DEPENDENCIES

Installing packages (this may take 3-5 minutes)...

Packages to install:
  ‚Ä¢ mlflow
  ‚Ä¢ tensorflow
  ‚Ä¢ gradio
  ‚Ä¢ transformers
  ‚Ä¢ datasets
  ‚Ä¢ accelerate
  ‚Ä¢ safetensors

‚úì All framework dependencies installed


In [6]:
print("\n" + "="*60)
print("CREATING REGISTER_MODEL NOTEBOOK")
print("="*60)

import json
from pathlib import Path

def create_register_notebook():
    """Create Register_Model.ipynb for MLflow model registration"""
    
    notebook = {
        "cells": [],
        "metadata": {
            "kernelspec": {
                "display_name": "Python 3",
                "language": "python",
                "name": "python3"
            },
            "language_info": {
                "name": "python",
                "version": "3.10.0"
            }
        },
        "nbformat": 4,
        "nbformat_minor": 4
    }
    
    # Cell 1: Instructions
    notebook["cells"].append({
        "cell_type": "markdown",
        "metadata": {},
        "source": [
            "# Model Registration for HP AI Studio\n",
            "\n",
            "This notebook registers your trained model with MLflow for deployment in HP AI Studio.\n",
            "\n",
            "## Instructions:\n",
            "1. Update the configuration section with your model details\n",
            "2. Run all cells in order\n",
            "3. Verify model appears in HP AI Studio Deployments tab"
        ]
    })
    
    # Cell 2: Configuration
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "# Configuration - Update these values\n",
            "MODEL_NAME = 'my-ai-model'\n",
            "MODEL_VERSION = '1.0.0'\n",
            "MODEL_PATH = './models/my_model'\n",
            "MODEL_DESCRIPTION = 'Description of your AI model'\n",
            "MLFLOW_TRACKING_URI = './mlruns'\n",
            "EXPERIMENT_NAME = 'ai-560-student-projects'\n",
            "STUDENT_NAME = 'Your Name'\n",
            "PROJECT_TITLE = 'Your Project Title'\n",
            "\n",
            "print(f'Configuration loaded for: {MODEL_NAME}')\n",
            "print(f'Student: {STUDENT_NAME}')\n",
            "print(f'Project: {PROJECT_TITLE}')"
        ]
    })
    
    # Cell 3: Import libraries
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "import mlflow\n",
            "import mlflow.pyfunc\n",
            "from mlflow.models.signature import ModelSignature\n",
            "from mlflow.types.schema import Schema, ColSpec\n",
            "from mlflow.types import DataType\n",
            "import pandas as pd\n",
            "import torch\n",
            "from datetime import datetime\n",
            "import json\n",
            "from pathlib import Path\n",
            "\n",
            "print('Libraries imported successfully')"
        ]
    })
    
    # Cell 4: Model wrapper class
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "class CustomModelWrapper(mlflow.pyfunc.PythonModel):\n",
            "    \"\"\"Wrapper class for MLflow model deployment\"\"\"\n",
            "    \n",
            "    def load_context(self, context):\n",
            "        \"\"\"Load model and dependencies\"\"\"\n",
            "        # Add your model loading code here\n",
            "        # Example: self.model = torch.load(context.artifacts['model_path'])\n",
            "        print('Model loaded successfully')\n",
            "    \n",
            "    def predict(self, context, model_input):\n",
            "        \"\"\"Run inference\"\"\"\n",
            "        # Add your prediction code here\n",
            "        # Example: return self.model(model_input)\n",
            "        return {'output': 'Model prediction would go here'}\n",
            "\n",
            "print('Model wrapper class defined')"
        ]
    })
    
    # Cell 5: Define signature
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "# Define model signature\n",
            "input_schema = Schema([ColSpec(DataType.string, 'input')])\n",
            "output_schema = Schema([ColSpec(DataType.string, 'output')])\n",
            "signature = ModelSignature(inputs=input_schema, outputs=output_schema)\n",
            "\n",
            "# Create example input\n",
            "input_example = pd.DataFrame({'input': ['example input data']})\n",
            "\n",
            "print('Model signature defined')\n",
            "print(f'Input schema: {input_schema}')\n",
            "print(f'Output schema: {output_schema}')"
        ]
    })
    
    # Cell 6: Register model
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "# Set MLflow tracking\n",
            "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n",
            "mlflow.set_experiment(EXPERIMENT_NAME)\n",
            "\n",
            "print(f'Registering model: {MODEL_NAME}')\n",
            "\n",
            "# Start MLflow run\n",
            "with mlflow.start_run(run_name=f\"{MODEL_NAME}-{datetime.now().strftime('%Y%m%d-%H%M%S')}\") as run:\n",
            "    # Log parameters\n",
            "    mlflow.log_param('model_version', MODEL_VERSION)\n",
            "    mlflow.log_param('student_name', STUDENT_NAME)\n",
            "    mlflow.log_param('project_title', PROJECT_TITLE)\n",
            "    \n",
            "    # Log model\n",
            "    mlflow.pyfunc.log_model(\n",
            "        artifact_path='model',\n",
            "        python_model=CustomModelWrapper(),\n",
            "        signature=signature,\n",
            "        input_example=input_example,\n",
            "        registered_model_name=MODEL_NAME\n",
            "    )\n",
            "    \n",
            "    print(f'‚úì Model registered: {MODEL_NAME}')\n",
            "    print(f'‚úì Run ID: {run.info.run_id}')\n",
            "    print(f'‚úì Check HP AI Studio Deployments tab')"
        ]
    })
    
    # Cell 7: Verification
    notebook["cells"].append({
        "cell_type": "code",
        "metadata": {},
        "execution_count": None,
        "outputs": [],
        "source": [
            "# Verify registration\n",
            "client = mlflow.tracking.MlflowClient()\n",
            "model_versions = client.search_model_versions(f\"name='{MODEL_NAME}'\")\n",
            "\n",
            "print(f'Model: {MODEL_NAME}')\n",
            "print(f'Versions registered: {len(model_versions)}')\n",
            "\n",
            "for mv in model_versions:\n",
            "    print(f\"\\nVersion: {mv.version}\")\n",
            "    print(f\"Stage: {mv.current_stage}\")\n",
            "    print(f\"Status: {mv.status}\")"
        ]
    })
    
    # Save notebook
    notebook_path = Path("Register_Model.ipynb")
    with open(notebook_path, 'w') as f:
        json.dump(notebook, f, indent=2)
    
    return notebook_path

# Create the notebook
try:
    notebook_path = create_register_notebook()
    print(f"‚úì Created: {notebook_path}")
    print("\nNext steps:")
    print("1. Open Register_Model.ipynb")
    print("2. Update configuration with your model details")
    print("3. Run all cells to register your model")
    print("4. Check HP AI Studio Deployments tab")
except Exception as e:
    print(f"‚ùå Error creating notebook: {e}")

print("="*60)


CREATING REGISTER_MODEL NOTEBOOK
‚úì Created: Register_Model.ipynb

Next steps:
1. Open Register_Model.ipynb
2. Update configuration with your model details
3. Run all cells to register your model
4. Check HP AI Studio Deployments tab


In [7]:
print("\n" + "="*60)
print("HUGGING FACE AUTHENTICATION")
print("="*60)

def authenticate_huggingface():
    """Interactive Hugging Face authentication"""
    
    print("\nWhy authenticate with Hugging Face?")
    print("  ‚Ä¢ Access to 500,000+ pre-trained models")
    print("  ‚Ä¢ Download datasets for training")
    print("  ‚Ä¢ Use gated models (Llama, Stable Diffusion, etc.)")
    print("  ‚Ä¢ Share your trained models (optional)")
    
    # Check if already authenticated
    try:
        from huggingface_hub import whoami
        user_info = whoami()
        print(f"\n‚úì Already logged in as: {user_info['name']}")
        response = input("\nContinue with this account? (y/n): ").lower()
        if response == 'y':
            print("‚úì Using existing authentication")
            return True
    except:
        print("\n‚Ä¢ No existing Hugging Face login found")
    
    # Get authentication token
    print("\n" + "-"*60)
    print("HOW TO GET YOUR HUGGING FACE TOKEN:")
    print("-"*60)
    print("1. Go to: https://huggingface.co/settings/tokens")
    print("2. Click 'Create new token'")
    print("3. Name it: 'HP-AI-Studio-Student'")
    print("4. Select: 'Read' access (or 'Write' if you'll publish models)")
    print("5. Click 'Create token'")
    print("6. Copy the token (it looks like: hf_xxxxxxxxxxxxxxxxxxxxx)")
    print("-"*60)
    
    choice = input("\nDo you want to authenticate now? (y/n): ").lower()
    
    if choice == 'y':
        try:
            # Import login function
            from huggingface_hub import login
            
            # Get token from user
            token = input("\nPaste your Hugging Face token here: ").strip()
            
            # Validate token format
            if not token.startswith('hf_'):
                print("\n‚ö†Ô∏è Warning: Token should start with 'hf_'")
                confirm = input("Continue anyway? (y/n): ").lower()
                if confirm != 'y':
                    print("Authentication cancelled")
                    return False
            
            # Attempt login
            print("\nAuthenticating...")
            login(token=token, add_to_git_credential=True)
            
            # Verify authentication
            from huggingface_hub import whoami
            user_info = whoami()
            
            print(f"\n‚úì Successfully authenticated as: {user_info['name']}")
            print("‚úì You can now access Hugging Face models and datasets")
            
            return True
            
        except Exception as e:
            print(f"\n‚ùå Authentication failed: {e}")
            print("\nTroubleshooting:")
            print("  1. Verify token is correct")
            print("  2. Check token has required permissions")
            print("  3. Try creating a new token")
            return False
    else:
        print("\n‚ÑπÔ∏è Skipping authentication")
        print("You can authenticate later by running:")
        print("  from huggingface_hub import login")
        print("  login()")
        return False

# Run authentication
hf_authenticated = authenticate_huggingface()

print("\n" + "="*60)


HUGGING FACE AUTHENTICATION

Why authenticate with Hugging Face?
  ‚Ä¢ Access to 500,000+ pre-trained models
  ‚Ä¢ Download datasets for training
  ‚Ä¢ Use gated models (Llama, Stable Diffusion, etc.)
  ‚Ä¢ Share your trained models (optional)

‚úì Already logged in as: Riya119



Continue with this account? (y/n):  y


‚úì Using existing authentication



In [8]:
print("\n" + "="*60)
print("üéâ SETUP COMPLETE!")
print("="*60)

print("\nYour HP AI Studio environment is configured and ready.")
print("All core dependencies are installed and tested.")

if gpu_available:
    print("\n‚úì GPU: Detected and functional")
else:
    print("\n‚ÑπÔ∏è GPU: Not detected (using CPU mode)")

if hf_authenticated:
    print("‚úì Hugging Face: Authenticated")
else:
    print("‚ÑπÔ∏è Hugging Face: Not authenticated (optional)")

print("\n" + "="*60)
print("NEXT STEPS FOR YOUR AI PROJECT:")
print("="*60)

print("\n1. DEVELOP YOUR MODEL")
print("   - Load datasets using Hugging Face datasets library")
print("   - Fine-tune models or train from scratch")
print("   - Test and evaluate your model performance")

print("\n2. SAVE YOUR MODEL")
print("   - Use torch.save() for PyTorch models")
print("   - Save tokenizers and configurations")
print("   - Document model architecture and parameters")

print("\n3. REGISTER FOR DEPLOYMENT")
print("   - Open Register_Model.ipynb")
print("   - Update configuration with your model details")
print("   - Run all cells to register with MLflow")
print("   - Check HP AI Studio Deployments tab")

print("\n4. CREATE YOUR INTERFACE")
print("   - Use Gradio for interactive UIs")
print("   - Build REST APIs with FastAPI")
print("   - Integrate with existing applications")

print("\n5. DOCUMENT YOUR WORK")
print("   - Keep a development journal")
print("   - Screenshot important results")
print("   - Record process and iterations")
print("   - Prepare portfolio presentation")

if not hf_authenticated:
    print("\n‚ö†Ô∏è RECOMMENDATION:")
    print("   Run Cell 7 again to set up Hugging Face authentication")
    print("   This will give you access to more models and datasets")

print("\n" + "="*60)
print("HELPFUL RESOURCES:")
print("="*60)
print("  ‚Ä¢ HP AI Studio Docs: https://zdocs.datascience.hp.com/docs/aistudio/")
print("  ‚Ä¢ Hugging Face: https://huggingface.co/")
print("  ‚Ä¢ MLflow Documentation: https://mlflow.org/docs/latest/")
print("  ‚Ä¢ PyTorch Tutorials: https://pytorch.org/tutorials/")
print("  ‚Ä¢ Gradio Documentation: https://gradio.app/docs/")

print("\n" + "="*60)
print("REMEMBER:")
print("="*60)
print("  ‚Ä¢ Save your work frequently (Ctrl+S)")
print("  ‚Ä¢ Document your process in your project journal")
print("  ‚Ä¢ Test on small datasets before full training")
print("  ‚Ä¢ Ask for help in office hours if needed")
print("  ‚Ä¢ Clear GPU memory: torch.cuda.empty_cache()")

print("\n‚úì You're ready to begin your AI project!")
print("  Good luck with your creative AI development!")
print("\n" + "="*60)


üéâ SETUP COMPLETE!

Your HP AI Studio environment is configured and ready.
All core dependencies are installed and tested.

‚ÑπÔ∏è GPU: Not detected (using CPU mode)
‚úì Hugging Face: Authenticated

NEXT STEPS FOR YOUR AI PROJECT:

1. DEVELOP YOUR MODEL
   - Load datasets using Hugging Face datasets library
   - Fine-tune models or train from scratch
   - Test and evaluate your model performance

2. SAVE YOUR MODEL
   - Use torch.save() for PyTorch models
   - Save tokenizers and configurations
   - Document model architecture and parameters

3. REGISTER FOR DEPLOYMENT
   - Open Register_Model.ipynb
   - Update configuration with your model details
   - Run all cells to register with MLflow
   - Check HP AI Studio Deployments tab

4. CREATE YOUR INTERFACE
   - Use Gradio for interactive UIs
   - Build REST APIs with FastAPI
   - Integrate with existing applications

5. DOCUMENT YOUR WORK
   - Keep a development journal
   - Screenshot important results
   - Record process and iterat

In [9]:
# =====================================================
# CELL 1: LOAD TRAINING DATA
# =====================================================

import json
import os

print("üìÇ Loading training data...")
print(f"Current directory: {os.getcwd()}")

# Try different paths
possible_paths = [
    './DeepLearning/synthetic_data/full_10k/training_data.json',
    '../DeepLearning/synthetic_data/full_10k/training_data.json',
    './synthetic_data/full_10k/training_data.json',
    '/home/jovyan/local/DeepLearning/synthetic_data/full_10k/training_data.json'
]

training_data = None
for path in possible_paths:
    if os.path.exists(path):
        print(f"‚úÖ Found data at: {path}")
        with open(path, 'r') as f:
            training_data = json.load(f)
        break

if training_data:
    print(f"‚úÖ Loaded {len(training_data)} boards")
    print(f"üìù Total notes: {sum(board['total_notes'] for board in training_data):,}")
    print("\nReady for Brain AI v2!")
else:
    print("‚ùå Could not find training data")
    print("\nAvailable files:")
    for item in os.listdir('.'):
        print(f"   - {item}")

üìÇ Loading training data...
Current directory: /home/jovyan/local/DeepLearning/models
‚úÖ Found data at: /home/jovyan/local/DeepLearning/synthetic_data/full_10k/training_data.json
‚úÖ Loaded 10000 boards
üìù Total notes: 899,195

Ready for Brain AI v2!


In [10]:
"""
PRISM BRAIN AI V2 - Multi-Modal Research Analyzer
Complete implementation for Jupyter notebook
"""

import json
import re
import os
from datetime import datetime
from collections import defaultdict, Counter
from typing import Dict, List, Optional, Tuple
import hashlib
import requests

# Check for optional dependencies
try:
    import whisper
    WHISPER_AVAILABLE = True
    print("‚úì Whisper available for audio processing")
except:
    WHISPER_AVAILABLE = False
    print("‚ÑπÔ∏è  Whisper not installed (run: pip install openai-whisper)")

try:
    import PyPDF2
    PDF_AVAILABLE = True
    print("‚úì PyPDF2 available for PDF processing")
except:
    PDF_AVAILABLE = False
    print("‚ÑπÔ∏è  PyPDF2 not installed (run: pip install PyPDF2)")

try:
    from pptx import Presentation
    PPTX_AVAILABLE = True
    print("‚úì python-pptx available for PowerPoint processing")
except:
    PPTX_AVAILABLE = False
    print("‚ÑπÔ∏è  python-pptx not installed (run: pip install python-pptx)")


        return {
            'success': True,class PRISMBrainV2:
    """
    PRISM Brain AI v2 - Multi-modal research synthesis
    
    Features:
    - FigJam: Full board analysis (sticky notes + arrows + diagrams)
    - Audio: Whisper transcription with tone analysis
    - Documents: PDF/PPT/DOCX analysis
    - Real-time synthesis and updates
    """
    
    def __init__(self, training_data=None, figjam_token=None):
        self.training_data = training_data
        self.figjam_token = figjam_token
        self.patterns = {}
        self.projects = {}
        
        # Initialize Whisper if available
        if WHISPER_AVAILABLE:
            try:
                print("üéôÔ∏è  Loading Whisper model...")
                self.whisper_model = whisper.load_model("base")
                print("‚úì Whisper model loaded")
            except Exception as e:
                print(f"‚ö†Ô∏è  Whisper load error: {e}")
                self.whisper_model = None
        else:
            self.whisper_model = None
        
        # Learn from training data
        if training_data:
            self._initialize_from_training()
        
        print("\nüß† PRISM Brain AI v2 initialized")
        print("   ‚úì FigJam: URL input with full board analysis")
        print("   ‚úì Audio: File upload with Whisper transcription")
        print("   ‚úì Documents: PDF/PPT file upload")
    
    def _initialize_from_training(self):
        """Learn patterns from training data"""
        print("üìö Learning from training data...")
        all_notes = []
        for board in self.training_data[:100]:  # Sample for speed
            all_notes.extend(board['notes'])
        self.patterns['keywords'] = self._learn_keywords(all_notes)
        print(f"   ‚úì Learned patterns from {len(all_notes):,} notes")
    
    def _learn_keywords(self, notes):
        """Learn keywords for content types"""
        keywords = defaultdict(set)
        for note in notes:
            content_type = note['true_type']
            words = note['content'].lower().split()
            keywords[content_type].update(words[:3])
        return {k: list(v)[:15] for k, v in keywords.items()}
    
    # =====================================================
    # PROJECT MANAGEMENT
    # =====================================================
    
    def create_project(self, project_name: str) -> str:
        """Create new PRISM project"""
        project_id = hashlib.md5(f"{project_name}{datetime.now()}".encode()).hexdigest()[:8]
        
        self.projects[project_id] = {
            'name': project_name,
            'created_at': datetime.now().isoformat(),
            'sources': [],
            'notes': [],
            'connections': [],
            'diagrams': [],
            'timeline': [],
            'contributors': {},
            'insights': {},
            'last_updated': datetime.now().isoformat()
        }
        
        print(f"‚úÖ Created project: {project_name} (ID: {project_id})")
        return project_id
    
    # =====================================================
    # FIGJAM: URL UPLOAD WITH FULL ANALYSIS
    # =====================================================
    
    def ingest_figjam_url(self, project_id: str, figjam_url: str):
        """Ingest FigJam board from URL - analyzes everything"""
        print(f"\nüì• Ingesting FigJam board...")
        
        # Extract file key
        file_key = self._extract_figjam_key(figjam_url)
        if not file_key:
            return {'error': 'Invalid FigJam URL format'}
        
        if not self.figjam_token:
            return {'error': 'FigJam token not configured'}
        
        # Fetch board
        board_data = self._fetch_figjam_board(file_key)
        if not board_data:
            return {'error': 'Could not fetch board from API'}
        
        project = self.projects[project_id]
        board_name = board_data.get('name', 'Untitled Board')
        
        # Extract all elements
        sticky_notes = []
        arrows = []
        shapes = []
        
        def traverse(node, parent=None):
            node_type = node.get('type')
            
            if node_type == 'STICKY':
                sticky_notes.append({
                    'id': node.get('id'),
                    'content': node.get('characters', ''),
                    'color': self._map_color(node),
                    'author': node.get('lastModifier', {}).get('name', 'Unknown'),
                    'position': node.get('absoluteBoundingBox', {}),
                    'parent': parent
                })
            
            elif node_type == 'CONNECTOR':
                arrows.append({
                    'id': node.get('id'),
                    'from': node.get('connectorStart', {}).get('endpointNodeId'),
                    'to': node.get('connectorEnd', {}).get('endpointNodeId')
                })
            
            elif node_type in ['RECTANGLE', 'ELLIPSE', 'TEXT']:
                shapes.append({
                    'id': node.get('id'),
                    'type': node_type.lower(),
                    'content': node.get('characters', ''),
                    'position': node.get('absoluteBoundingBox', {})
                })
            
            if 'children' in node:
                for child in node['children']:
                    traverse(child, node.get('name') if node_type == 'FRAME' else parent)
        
        traverse(board_data.get('document', {}))
        
        print(f"   ‚úì {len(sticky_notes)} sticky notes")
        print(f"   ‚úì {len(arrows)} connections")
        print(f"   ‚úì {len(shapes)} diagrams/shapes")
        
        # Analyze notes
        analyzed_notes = []
        for sticky in sticky_notes:
            analysis = self._analyze_content(sticky['content'], sticky['color'])
            
            note = {
                'id': sticky['id'],
                'source': 'figjam',
                'source_name': board_name,
                'content': sticky['content'],
                'color': sticky['color'],
                'predicted_type': analysis['predicted_type'],
                'confidence': analysis['confidence'],
                'contributor': sticky['author'],
                'created_at': datetime.now().isoformat(),
                'position': sticky['position'],
                'sentiment': self._detect_sentiment(sticky['content']),
                'priority': self._calc_priority(sticky['content'], analysis),
                'tags': self._extract_tags(sticky['content'])
            }
            analyzed_notes.append(note)
        
        # Store connections
        for arrow in arrows:
            project['connections'].append({
                'from_note': arrow['from'],
                'to_note': arrow['to'],
                'relationship': 'connects_to',
                'source': 'figjam'
            })
        
        project['diagrams'].extend(shapes)
        
        # Add to project
        project['sources'].append({
            'type': 'figjam',
            'name': board_name,
            'url': figjam_url,
            'added_at': datetime.now().isoformat(),
            'note_count': len(analyzed_notes),
            'connection_count': len(arrows),
            'diagram_count': len(shapes)
        })
        
        project['notes'].extend(analyzed_notes)
        project['last_updated'] = datetime.now().isoformat()
        
        self._update_timeline(project, analyzed_notes)
        self._update_contributors(project, analyzed_notes)
        
        print(f"   ‚úÖ FigJam board ingested successfully")
        
            'notes': len(analyzed_notes),
            'connections': len(arrows),
            'diagrams': len(shapes)
        }
    
    def _extract_figjam_key(self, url: str) -> Optional[str]:
        """Extract file key from FigJam URL"""
        match = re.search(r'/board/([a-zA-Z0-9_-]+)', url)
        if match:
            return match.group(1)
        match = re.search(r'/file/([a-zA-Z0-9_-]+)', url)
        if match:
            return match.group(1)
        return None
    
    def _fetch_figjam_board(self, file_key: str) -> Optional[Dict]:
        """Fetch from FigJam API"""
        url = f"https://api.figma.com/v1/files/{file_key}"
        headers = {"X-Figma-Token": self.figjam_token}
        
        try:
            response = requests.get(url, headers=headers, timeout=30)
            if response.status_code == 200:
                return response.json()
            else:
                print(f"   ‚ùå API error: {response.status_code}")
                return None
        except Exception as e:
            print(f"   ‚ùå Error: {e}")
            return None
    
    def _map_color(self, node):
        """Map RGB to color names"""
        fills = node.get('fills', [])
        if not fills or fills[0].get('type') != 'SOLID':
            return 'YELLOW'
        
        c = fills[0].get('color', {})
        r, g, b = c.get('r', 1), c.get('g', 1), c.get('b', 1)
        
        if r > 0.8 and g < 0.5 and b < 0.5:
            return 'RED'
        elif r > 0.8 and g > 0.5 and b < 0.3:
            return 'ORANGE'
        elif r > 0.8 and g > 0.8 and b < 0.5:
            return 'YELLOW'
        elif r < 0.5 and g > 0.7 and b < 0.5:
            return 'GREEN'
        elif r < 0.5 and g < 0.5 and b > 0.8:
            return 'BLUE'
        elif r > 0.5 and g < 0.5 and b > 0.7:
            return 'PURPLE'
        elif r > 0.8 and g < 0.5 and b > 0.6:
            return 'PINK'
        else:
            return 'GRAY'
    
    # =====================================================
    # AUDIO: FILE UPLOAD WITH WHISPER
    # =====================================================
    
    def ingest_audio_file(self, project_id: str, audio_path: str):
        """Ingest audio file with Whisper transcription"""
        print(f"\nüéôÔ∏è  Processing audio file...")
        
        if not WHISPER_AVAILABLE or not self.whisper_model:
            return {'error': 'Whisper not available'}
        
        project = self.projects[project_id]
        file_name = os.path.basename(audio_path)
        
        print(f"   Transcribing: {file_name}")
        
        try:
            result = self.whisper_model.transcribe(
                audio_path,
                word_timestamps=True,
                verbose=False
            )
        except Exception as e:
            return {'error': f'Transcription failed: {e}'}
        
        segments = result['segments']
        print(f"   ‚úì Transcribed {len(segments)} segments")
        
        # Extract insights
        analyzed_notes = []
        for i, seg in enumerate(segments):
            text = seg['text'].strip()
            if len(text) < 10:
                continue
            
            # Detect tone
            tone = self._analyze_tone(seg, text)
            
            # Extract key points
            points = self._extract_insights(text)
            
            for point in points:
                analysis = self._analyze_content(point, 'YELLOW')
                
                note = {
                    'id': f"audio_{file_name}_{i}_{len(analyzed_notes)}",
                    'source': 'audio',
                    'source_name': file_name,
                    'content': point,
                    'full_segment': text,
                    'predicted_type': analysis['predicted_type'],
                    'confidence': analysis['confidence'],
                    'contributor': 'Speaker',
                    'created_at': datetime.now().isoformat(),
                    'timestamp': f"{seg['start']:.1f}s",
                    'audio_tone': tone,
                    'sentiment': self._detect_sentiment(point),
                    'priority': self._calc_priority(point, analysis),
                    'tags': self._extract_tags(point)
                }
                analyzed_notes.append(note)
        
        project['sources'].append({
            'type': 'audio',
            'name': file_name,
            'added_at': datetime.now().isoformat(),
            'duration': f"{result.get('duration', 0):.1f}s",
            'note_count': len(analyzed_notes)
        })
        
        project['notes'].extend(analyzed_notes)
        project['last_updated'] = datetime.now().isoformat()
        
        self._update_timeline(project, analyzed_notes)
        self._update_contributors(project, analyzed_notes)
        
        print(f"   ‚úÖ Extracted {len(analyzed_notes)} insights")
        
        return {
            'success': True,
            'notes': len(analyzed_notes),
            'duration': result.get('duration', 0)
        }
    
    def _analyze_tone(self, segment, text):
        """Detect speaker tone"""
        if '!' in text or text.isupper():
            return 'emphatic'
        elif '?' in text:
            return 'questioning'
        else:
            return 'neutral'
    
    def _extract_insights(self, text):
        """Extract key points from text"""
        points = []
        
        if any(w in text.lower() for w in ['problem', 'issue', 'difficult']):
            points.append(f"Pain point: {text}")
        elif '?' in text:
            points.append(f"Question: {text}")
        elif any(w in text.lower() for w in ['decided', 'agreed', 'will']):
            points.append(f"Decision: {text}")
        elif '"' in text:
            points.append(f"Quote: {text}")
        else:
            points.append(text)
        
        return points
    
    # =====================================================
    # DOCUMENTS: FILE UPLOAD
    # =====================================================
    
    def ingest_document_file(self, project_id: str, doc_path: str):
        """Ingest document file"""
        print(f"\nüìÑ Processing document...")
        
        file_name = os.path.basename(doc_path)
        ext = os.path.splitext(file_name)[1].lower()
        
        if ext == '.pdf':
            return self._ingest_pdf(project_id, doc_path)
        elif ext in ['.ppt', '.pptx']:
            return self._ingest_ppt(project_id, doc_path)
        else:
            return self._ingest_text(project_id, doc_path)
    
    def _ingest_pdf(self, project_id: str, pdf_path: str):
        """Extract from PDF"""
        if not PDF_AVAILABLE:
            return {'error': 'PyPDF2 not installed'}
        
        project = self.projects[project_id]
        file_name = os.path.basename(pdf_path)
        
        analyzed_notes = []
        
        try:
            with open(pdf_path, 'rb') as f:
                reader = PyPDF2.PdfReader(f)
                print(f"   Processing {len(reader.pages)} pages")
                
                for page_num, page in enumerate(reader.pages, 1):
                    text = page.extract_text()
                    paras = [p.strip() for p in text.split('\n\n') if len(p.strip()) > 50]
                    
                    for para in paras:
                        analysis = self._analyze_content(para, 'YELLOW')
                        
                        note = {
                            'id': f"pdf_{file_name}_p{page_num}_{len(analyzed_notes)}",
                            'source': 'pdf',
                            'source_name': file_name,
                            'content': para[:200],
                            'full_text': para,
                            'predicted_type': analysis['predicted_type'],
                            'confidence': analysis['confidence'],
                            'contributor': 'Author',
                            'created_at': datetime.now().isoformat(),
                            'page_number': page_num,
                            'sentiment': self._detect_sentiment(para),
                            'priority': self._calc_priority(para, analysis),
                            'tags': self._extract_tags(para)
                        }
                        analyzed_notes.append(note)
        except Exception as e:
            return {'error': f'PDF processing failed: {e}'}
        
        project['sources'].append({
            'type': 'pdf',
            'name': file_name,
            'added_at': datetime.now().isoformat(),
            'pages': len(reader.pages),
            'note_count': len(analyzed_notes)
        })
        
        project['notes'].extend(analyzed_notes)
        project['last_updated'] = datetime.now().isoformat()
        
        self._update_timeline(project, analyzed_notes)
        self._update_contributors(project, analyzed_notes)
        
        print(f"   ‚úÖ Extracted {len(analyzed_notes)} insights")
        
        return {'success': True, 'notes': len(analyzed_notes)}
    
    def _ingest_ppt(self, project_id: str, ppt_path: str):
        """Extract from PowerPoint"""
        if not PPTX_AVAILABLE:
            return {'error': 'python-pptx not installed'}
        
        project = self.projects[project_id]
        file_name = os.path.basename(ppt_path)
        
        analyzed_notes = []
        
        try:
            prs = Presentation(ppt_path)
            print(f"   Processing {len(prs.slides)} slides")
            
            for slide_num, slide in enumerate(prs.slides, 1):
                text = ' '.join(shape.text for shape in slide.shapes if hasattr(shape, "text"))
                
                if len(text) > 20:
                    analysis = self._analyze_content(text, 'YELLOW')
                    
                    note = {
                        'id': f"ppt_{file_name}_s{slide_num}",
                        'source': 'powerpoint',
                        'source_name': file_name,
                        'content': text[:200],
                        'full_text': text,
                        'predicted_type': analysis['predicted_type'],
                        'confidence': analysis['confidence'],
                        'contributor': 'Presenter',
                        'created_at': datetime.now().isoformat(),
                        'slide_number': slide_num,
                        'sentiment': self._detect_sentiment(text),
                        'priority': self._calc_priority(text, analysis),
                        'tags': self._extract_tags(text)
                    }
                    analyzed_notes.append(note)
        except Exception as e:
            return {'error': f'PPT processing failed: {e}'}
        
        project['sources'].append({
            'type': 'powerpoint',
            'name': file_name,
            'added_at': datetime.now().isoformat(),
            'slides': len(prs.slides),
            'note_count': len(analyzed_notes)
        })
        
        project['notes'].extend(analyzed_notes)
        self._update_timeline(project, analyzed_notes)
        
        print(f"   ‚úÖ Extracted {len(analyzed_notes)} insights")
        
        return {'success': True, 'notes': len(analyzed_notes)}
    
    def _ingest_text(self, project_id: str, text_path: str):
        """Process text file"""
        project = self.projects[project_id]
        file_name = os.path.basename(text_path)
        
        with open(text_path, 'r') as f:
            text = f.read()
        
        analyzed_notes = []
        paras = [p.strip() for p in text.split('\n\n') if len(p.strip()) > 50]
        
        for i, para in enumerate(paras):
            analysis = self._analyze_content(para, 'YELLOW')
            
            note = {
                'id': f"txt_{file_name}_{i}",
                'source': 'document',
                'source_name': file_name,
                'content': para[:200],
                'full_text': para,
                'predicted_type': analysis['predicted_type'],
                'confidence': analysis['confidence'],
                'contributor': 'Author',
                'created_at': datetime.now().isoformat(),
                'sentiment': self._detect_sentiment(para),
                'priority': self._calc_priority(para, analysis),
                'tags': self._extract_tags(para)
            }
            analyzed_notes.append(note)
        
        project['sources'].append({
            'type': 'document',
            'name': file_name,
            'added_at': datetime.now().isoformat(),
            'note_count': len(analyzed_notes)
        })
        
        project['notes'].extend(analyzed_notes)
        self._update_timeline(project, analyzed_notes)
        
        return {'success': True, 'notes': len(analyzed_notes)}
    
    # =====================================================
    # ANALYSIS FUNCTIONS
    # =====================================================
    
    def _analyze_content(self, content: str, color: str) -> Dict:
        """Analyze content type"""
        content_lower = content.lower()
        
        if '?' in content:
            return {'predicted_type': 'question', 'confidence': 0.8}
        elif '"' in content:
            return {'predicted_type': 'quote', 'confidence': 0.7}
        elif any(w in content_lower for w in ['problem', 'issue', 'error', 'broken']):
            return {'predicted_type': 'pain_point', 'confidence': 0.75}
        elif any(w in content_lower for w in ['love', 'great', 'awesome', 'excellent']):
            return {'predicted_type': 'positive', 'confidence': 0.7}
        elif any(w in content_lower for w in ['could', 'should', 'what if', 'idea']):
            return {'predicted_type': 'idea', 'confidence': 0.7}
        else:
            return {'predicted_type': 'neutral', 'confidence': 0.6}
    
    def _detect_sentiment(self, content: str) -> str:
        """Detect sentiment"""
        content_lower = content.lower()
        pos = sum(1 for w in ['love', 'great', 'good', 'excellent'] if w in content_lower)
        neg = sum(1 for w in ['hate', 'bad', 'terrible', 'broken'] if w in content_lower)
        return 'positive' if pos > neg else 'negative' if neg > pos else 'neutral'
    
    def _calc_priority(self, content: str, analysis: Dict) -> str:
        """Calculate priority"""
        if analysis['predicted_type'] == 'pain_point':
            return 'high'
        if any(w in content.lower() for w in ['critical', 'urgent', 'blocker']):
            return 'high'
        if analysis['predicted_type'] == 'neutral':
            return 'low'
        return 'medium'
    
    def _extract_tags(self, content: str) -> List[str]:
        """Extract tags"""
        tags = []
        content_lower = content.lower()
        
        tag_map = {
            'navigation': ['navigation', 'nav', 'menu'],
            'mobile': ['mobile', 'phone'],
            'performance': ['slow', 'fast', 'loading'],
            'accessibility': ['accessibility', 'a11y'],
            'search': ['search', 'find'],
            'error': ['error', 'bug', 'broken']
        }
        
        for tag, keywords in tag_map.items():
            if any(kw in content_lower for kw in keywords):
                tags.append(tag)
        
        return tags[:3]
    
    def _update_timeline(self, project: Dict, notes: List[Dict]):
        """Update timeline"""
        for note in notes:
            project['timeline'].append({
                'timestamp': note.get('created_at'),
                'contributor': note['contributor'],
                'content_preview': note['content'][:100],
                'note_id': note['id'],
                'source': note['source']
            })
        project['timeline'].sort(key=lambda x: x['timestamp'])
    
    def _update_contributors(self, project: Dict, notes: List[Dict]):
        """Update contributors"""
        for note in notes:
            contributor = note['contributor']
            if contributor not in project['contributors']:
                project['contributors'][contributor] = {
                    'total_contributions': 0,
                    'note_types': defaultdict(int)
                }
            
            project['contributors'][contributor]['total_contributions'] += 1
            project['contributors'][contributor]['note_types'][note['predicted_type']] += 1
    
    # =====================================================
    # SYNTHESIS
    # =====================================================
    
    def synthesize_project(self, project_id: str) -> Dict:
        """Generate project synthesis"""
        project = self.projects[project_id]
        notes = project['notes']
        
        by_type = defaultdict(list)
        by_priority = defaultdict(list)
        
        for note in notes:
            by_type[note['predicted_type']].append(note)
            by_priority[note['priority']].append(note)
        
        all_tags = []
        for note in notes:
            all_tags.extend(note.get('tags', []))
        
        tag_counts = Counter(all_tags)
        themes = [
            {'name': tag, 'frequency': count, 'percentage': (count/len(notes))*100}
            for tag, count in tag_counts.most_common(10)
        ]
        
        action_items = [
            {
                'content': n['content'], 
                'type': n['predicted_type'],
                'contributor': n['contributor'],
                'source': n['source_name']
            }
            for n in notes if n['priority'] == 'high'
        ][:20]
        
        sentiment_dist = Counter(n.get('sentiment', 'neutral') for n in notes)
        
        synthesis = {
            'project_name': project['name'],
            'last_updated': project['last_updated'],
            'total_notes': len(notes),
            'total_sources': len(project['sources']),
            'contributors': len(project['contributors']),
            'by_type': dict(by_type),
            'by_priority': dict(by_priority),
            'by_contributor': project['contributors'],
            'timeline': project['timeline'],
            'themes': themes,
            'action_items': action_items,
            'stats': {
                'sentiment_distribution': dict(sentiment_dist),
                'avg_confidence': sum(n.get('confidence', 0) for n in notes) / len(notes) if notes else 0
            }
        }
        
        project['insights'] = synthesis
        return synthesis
    
    def refresh_project(self, project_id: str):
        """Refresh analysis"""
        return self.synthesize_project(project_id)

print("\n‚úÖ PRISM Brain AI v2 loaded successfully!")
print("   Ready for Gradio UI")

‚úì Whisper available for audio processing
‚úì PyPDF2 available for PDF processing
‚úì python-pptx available for PowerPoint processing

‚úÖ PRISM Brain AI v2 loaded successfully!
   Ready for Gradio UI


In [11]:
# =====================================================
# INSTALL DEPENDENCIES
# =====================================================

print("üì¶ Installing dependencies...")

!pip install openai-whisper
!pip install PyPDF2
!pip install python-pptx

print("\n‚úÖ All dependencies installed!")
print("   Restart kernel and re-run cells if needed")

üì¶ Installing dependencies...

‚úÖ All dependencies installed!
   Restart kernel and re-run cells if needed


In [12]:
# =====================================================
# GRADIO UI - PRISM v2
# =====================================================

import gradio as gr

print("üé® Building PRISM UI...")

# Initialize Brain with your FigJam token
figjam_token = "figd_YP-yLbvxZ0jOVR9C54bCPveiHdkFB3uZD7hKQKDF"
brain = PRISMBrainV2(training_data, figjam_token)

current_project_id = None

# =====================================================
# UI FUNCTIONS
# =====================================================

def create_project(name):
    global current_project_id
    if not name:
        return "‚ùå Enter project name"
    current_project_id = brain.create_project(name)
    return f"‚úÖ Project created: {name}"

def upload_figjam(url):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    result = brain.ingest_figjam_url(current_project_id, url)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ FigJam ingested!\n   Notes: {result['notes']}\n   Connections: {result['connections']}"

def upload_audio(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_audio_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Audio processed!\n   Notes: {result['notes']}"

def upload_doc(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_document_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Document processed!\n   Notes: {result['notes']}"

def analyze():
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    s = brain.synthesize_project(current_project_id)
    
    output = f"""
üìä PRISM ANALYSIS
{'='*60}

PROJECT: {s['project_name']}

OVERVIEW
--------
Total Notes: {s['total_notes']}
Sources: {s['total_sources']}
Contributors: {s['contributors']}

BY TYPE
-------
"""
    for t, notes in sorted(s['by_type'].items(), key=lambda x: len(x[1]), reverse=True):
        output += f"{t:15} ‚Üí {len(notes)} notes\n"
    
    output += f"""
PRIORITY
--------
"""
    for p in ['high', 'medium', 'low']:
        output += f"{p:15} ‚Üí {len(s['by_priority'].get(p, []))} notes\n"
    
    output += f"""
TOP THEMES
----------
"""
    for theme in s['themes'][:5]:
        output += f"{theme['name']:15} ‚Üí {theme['frequency']} mentions\n"
    
    return output

# BUILD UI
with gr.Blocks(title="PRISM v2") as demo:
    gr.Markdown("# PRISM - Research Synthesis\n### Multi-Modal AI Analysis")
    
    gr.Markdown("## 1. Create Project")
    with gr.Row():
        project_name = gr.Textbox(label="Project Name")
        create_btn = gr.Button("Create", variant="primary")
    create_status = gr.Textbox(label="Status", lines=2)
    
    gr.Markdown("---")
    gr.Markdown("## 2. Upload Files")
    
    with gr.Tab("FigJam URL"):
        figjam_url = gr.Textbox(label="FigJam Board URL")
        figjam_btn = gr.Button("Upload")
        figjam_status = gr.Textbox(label="Status")
    
    with gr.Tab("Audio File"):
        audio_file = gr.File(label="Audio (.mp3, .wav, .mov)")
        audio_btn = gr.Button("Process")
        audio_status = gr.Textbox(label="Status")
    
    with gr.Tab("Document"):
        doc_file = gr.File(label="Document (.pdf, .pptx, .txt)")
        doc_btn = gr.Button("Process")
        doc_status = gr.Textbox(label="Status")
    
    gr.Markdown("---")
    gr.Markdown("## 3. Analyze")
    
    analyze_btn = gr.Button("üîç Generate Analysis", variant="primary", size="lg")
    analysis_output = gr.Textbox(label="Results", lines=30, show_copy_button=True)
    
    # Wire up
    create_btn.click(create_project, [project_name], [create_status])
    figjam_btn.click(upload_figjam, [figjam_url], [figjam_status])
    audio_btn.click(upload_audio, [audio_file], [audio_status])
    doc_btn.click(upload_doc, [doc_file], [doc_status])
    analyze_btn.click(analyze, [], [analysis_output])

print("üöÄ Launching PRISM...")
demo.launch(share=True)

üé® Building PRISM UI...
üéôÔ∏è  Loading Whisper model...
‚úì Whisper model loaded
üìö Learning from training data...
   ‚úì Learned patterns from 9,366 notes

üß† PRISM Brain AI v2 initialized
   ‚úì FigJam: URL input with full board analysis
   ‚úì Audio: File upload with Whisper transcription
   ‚úì Documents: PDF/PPT file upload
üöÄ Launching PRISM...
* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://3531af5ff41286a8a5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [13]:
# Install Gradio
!pip install gradio



In [14]:
# =====================================================
# GRADIO UI - PRISM v2
# =====================================================

import gradio as gr

print("üé® Building PRISM UI...")

# Initialize Brain with your FigJam token
figjam_token = "figd_YP-yLbvxZ0jOVR9C54bCPveiHdkFB3uZD7hKQKDF"
brain = PRISMBrainV2(training_data, figjam_token)

current_project_id = None

# =====================================================
# UI FUNCTIONS
# =====================================================

def create_project(name):
    global current_project_id
    if not name:
        return "‚ùå Enter project name"
    current_project_id = brain.create_project(name)
    return f"‚úÖ Project created: {name}"

def upload_figjam(url):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    result = brain.ingest_figjam_url(current_project_id, url)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ FigJam ingested!\n   Notes: {result['notes']}\n   Connections: {result['connections']}"

def upload_audio(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_audio_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Audio processed!\n   Notes: {result['notes']}"

def upload_doc(file):
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    if not file:
        return "‚ùå No file"
    
    result = brain.ingest_document_file(current_project_id, file.name)
    if 'error' in result:
        return f"‚ùå {result['error']}"
    return f"‚úÖ Document processed!\n   Notes: {result['notes']}"

def analyze():
    global current_project_id
    if not current_project_id:
        return "‚ùå Create project first"
    
    s = brain.synthesize_project(current_project_id)
    
    output = f"""
üìä PRISM ANALYSIS
{'='*60}

PROJECT: {s['project_name']}

OVERVIEW
--------
Total Notes: {s['total_notes']}
Sources: {s['total_sources']}
Contributors: {s['contributors']}

BY TYPE
-------
"""
    for t, notes in sorted(s['by_type'].items(), key=lambda x: len(x[1]), reverse=True):
        output += f"{t:15} ‚Üí {len(notes)} notes\n"
    
    output += f"""
PRIORITY
--------
"""
    for p in ['high', 'medium', 'low']:
        output += f"{p:15} ‚Üí {len(s['by_priority'].get(p, []))} notes\n"
    
    output += f"""
TOP THEMES
----------
"""
    for theme in s['themes'][:5]:
        output += f"{theme['name']:15} ‚Üí {theme['frequency']} mentions\n"
    
    return output

# BUILD UI
with gr.Blocks(title="PRISM v2") as demo:
    gr.Markdown("# PRISM - Research Synthesis\n### Multi-Modal AI Analysis")
    
    gr.Markdown("## 1. Create Project")
    with gr.Row():
        project_name = gr.Textbox(label="Project Name")
        create_btn = gr.Button("Create", variant="primary")
    create_status = gr.Textbox(label="Status", lines=2)
    
    gr.Markdown("---")
    gr.Markdown("## 2. Upload Files")
    
    with gr.Tab("FigJam URL"):
        figjam_url = gr.Textbox(label="FigJam Board URL")
        figjam_btn = gr.Button("Upload")
        figjam_status = gr.Textbox(label="Status")
    
    with gr.Tab("Audio File"):
        audio_file = gr.File(label="Audio (.mp3, .wav, .mov)")
        audio_btn = gr.Button("Process")
        audio_status = gr.Textbox(label="Status")
    
    with gr.Tab("Document"):
        doc_file = gr.File(label="Document (.pdf, .pptx, .txt)")
        doc_btn = gr.Button("Process")
        doc_status = gr.Textbox(label="Status")
    
    gr.Markdown("---")
    gr.Markdown("## 3. Analyze")
    
    analyze_btn = gr.Button("üîç Generate Analysis", variant="primary", size="lg")
    analysis_output = gr.Textbox(label="Results", lines=30, show_copy_button=True)
    
    # Wire up
    create_btn.click(create_project, [project_name], [create_status])
    figjam_btn.click(upload_figjam, [figjam_url], [figjam_status])
    audio_btn.click(upload_audio, [audio_file], [audio_status])
    doc_btn.click(upload_doc, [doc_file], [doc_status])
    analyze_btn.click(analyze, [], [analysis_output])

print("üöÄ Launching PRISM...")
demo.launch(share=True)

üé® Building PRISM UI...
üéôÔ∏è  Loading Whisper model...
‚úì Whisper model loaded
üìö Learning from training data...
   ‚úì Learned patterns from 9,366 notes

üß† PRISM Brain AI v2 initialized
   ‚úì FigJam: URL input with full board analysis
   ‚úì Audio: File upload with Whisper transcription
   ‚úì Documents: PDF/PPT file upload
üöÄ Launching PRISM...
* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://c6f64d7c6ea0ab881e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [23]:
# Run this in your BRAIN AI V2 notebook to export the brain
with open('prism_brain.py', 'w') as f:
    f.write('''"""
PRISM BRAIN AI V2 - Multi-Modal Research Analyzer
Complete implementation for standalone import
"""

import json
import re
import os
from datetime import datetime
from collections import defaultdict, Counter
from typing import Dict, List, Optional, Tuple
import hashlib
import requests

# Check for optional dependencies
try:
    import whisper
    WHISPER_AVAILABLE = True
    print("‚úì Whisper available for audio processing")
except:
    WHISPER_AVAILABLE = False
    print("‚ÑπÔ∏è  Whisper not installed (run: pip install openai-whisper)")

try:
    import PyPDF2
    PDF_AVAILABLE = True
    print("‚úì PyPDF2 available for PDF processing")
except:
    PDF_AVAILABLE = False
    print("‚ÑπÔ∏è  PyPDF2 not installed (run: pip install PyPDF2)")

try:
    from pptx import Presentation
    PPTX_AVAILABLE = True
    print("‚úì python-pptx available for PowerPoint processing")
except:
    PPTX_AVAILABLE = False
    print("‚ÑπÔ∏è  python-pptx not installed (run: pip install python-pptx)")


class PRISMBrainV2:
    """
    PRISM Brain AI v2 - Multi-modal research synthesis
    
    Features:
    - FigJam: Full board analysis (sticky notes + arrows + diagrams)
    - Audio: Whisper transcription with tone analysis
    - Documents: PDF/PPT/DOCX analysis
    - Real-time synthesis and updates
    """
    
    def __init__(self, training_data=None, figjam_token=None):
        self.training_data = training_data
        self.figjam_token = figjam_token
        self.patterns = {}
        self.projects = {}
        
        # Initialize Whisper if available
        if WHISPER_AVAILABLE:
            try:
                print("üéôÔ∏è  Loading Whisper model...")
                self.whisper_model = whisper.load_model("base")
                print("‚úì Whisper model loaded")
            except Exception as e:
                print(f"‚ö†Ô∏è  Whisper load error: {e}")
                self.whisper_model = None
        else:
            self.whisper_model = None
        
        # Learn from training data
        if training_data:
            self._initialize_from_training()
        
        print("\\nüß† PRISM Brain AI v2 initialized")
        print("   ‚úì FigJam: URL input with full board analysis")
        print("   ‚úì Audio: File upload with Whisper transcription")
        print("   ‚úì Documents: PDF/PPT file upload")
    
    def _initialize_from_training(self):
        """Learn patterns from training data"""
        print("üìö Learning from training data...")
        all_notes = []
        for board in self.training_data[:100]:  # Sample for speed
            all_notes.extend(board['notes'])
        self.patterns['keywords'] = self._learn_keywords(all_notes)
        print(f"   ‚úì Learned patterns from {len(all_notes):,} notes")
    
    def _learn_keywords(self, notes):
        """Learn keywords for content types"""
        keywords = defaultdict(set)
        for note in notes:
            content_type = note['true_type']
            words = note['content'].lower().split()
            keywords[content_type].update(words[:3])
        return {k: list(v)[:15] for k, v in keywords.items()}
    
    # =====================================================
    # PROJECT MANAGEMENT
    # =====================================================
    
    def create_project(self, project_name: str) -> str:
        """Create new PRISM project"""
        project_id = hashlib.md5(f"{project_name}{datetime.now()}".encode()).hexdigest()[:8]
        
        self.projects[project_id] = {
            'name': project_name,
            'created_at': datetime.now().isoformat(),
            'sources': [],
            'notes': [],
            'connections': [],
            'diagrams': [],
            'timeline': [],
            'contributors': {},
            'insights': {},
            'last_updated': datetime.now().isoformat()
        }
        
        print(f"‚úÖ Created project: {project_name} (ID: {project_id})")
        return project_id
    
    # =====================================================
    # FIGJAM: URL UPLOAD WITH FULL ANALYSIS
    # =====================================================
    
    def ingest_figjam_url(self, project_id: str, figjam_url: str):
        """Ingest FigJam board from URL - analyzes everything"""
        print(f"\\nüì• Ingesting FigJam board...")
        
        # Extract file key
        file_key = self._extract_figjam_key(figjam_url)
        if not file_key:
            return {'error': 'Invalid FigJam URL format'}
        
        if not self.figjam_token:
            return {'error': 'FigJam token not configured'}
        
        # Fetch board
        board_data = self._fetch_figjam_board(file_key)
        if not board_data:
            return {'error': 'Could not fetch board from API'}
        
        project = self.projects[project_id]
        board_name = board_data.get('name', 'Untitled Board')
        
        # Extract all elements
        sticky_notes = []
        arrows = []
        shapes = []
        
        def traverse(node, parent=None):
            node_type = node.get('type')
            
            if node_type == 'STICKY':
                sticky_notes.append({
                    'id': node.get('id'),
                    'content': node.get('characters', ''),
                    'color': self._map_color(node),
                    'author': node.get('lastModifier', {}).get('name', 'Unknown'),
                    'position': node.get('absoluteBoundingBox', {}),
                    'parent': parent
                })
            
            elif node_type == 'CONNECTOR':
                arrows.append({
                    'id': node.get('id'),
                    'from': node.get('connectorStart', {}).get('endpointNodeId'),
                    'to': node.get('connectorEnd', {}).get('endpointNodeId')
                })
            
            elif node_type in ['RECTANGLE', 'ELLIPSE', 'TEXT']:
                shapes.append({
                    'id': node.get('id'),
                    'type': node_type.lower(),
                    'content': node.get('characters', ''),
                    'position': node.get('absoluteBoundingBox', {})
                })
            
            if 'children' in node:
                for child in node['children']:
                    traverse(child, node.get('name') if node_type == 'FRAME' else parent)
        
        traverse(board_data.get('document', {}))
        
        print(f"   ‚úì {len(sticky_notes)} sticky notes")
        print(f"   ‚úì {len(arrows)} connections")
        print(f"   ‚úì {len(shapes)} diagrams/shapes")
        
        # Analyze notes
        analyzed_notes = []
        for sticky in sticky_notes:
            analysis = self._analyze_content(sticky['content'], sticky['color'])
            
            note = {
                'id': sticky['id'],
                'source': 'figjam',
                'source_name': board_name,
                'content': sticky['content'],
                'color': sticky['color'],
                'predicted_type': analysis['predicted_type'],
                'confidence': analysis['confidence'],
                'contributor': sticky['author'],
                'created_at': datetime.now().isoformat(),
                'position': sticky['position'],
                'sentiment': self._detect_sentiment(sticky['content']),
                'priority': self._calc_priority(sticky['content'], analysis),
                'tags': self._extract_tags(sticky['content'])
            }
            analyzed_notes.append(note)
        
        # Store connections
        for arrow in arrows:
            project['connections'].append({
                'from_note': arrow['from'],
                'to_note': arrow['to'],
                'relationship': 'connects_to',
                'source': 'figjam'
            })
        
        project['diagrams'].extend(shapes)
        
        # Add to project
        project['sources'].append({
            'type': 'figjam',
            'name': board_name,
            'url': figjam_url,
            'added_at': datetime.now().isoformat(),
            'note_count': len(analyzed_notes),
            'connection_count': len(arrows),
            'diagram_count': len(shapes)
        })
        
        project['notes'].extend(analyzed_notes)
        project['last_updated'] = datetime.now().isoformat()
        
        self._update_timeline(project, analyzed_notes)
        self._update_contributors(project, analyzed_notes)
        
        print(f"   ‚úÖ FigJam board ingested successfully")
        
        return {
            'success': True,
            'notes': len(analyzed_notes),
            'connections': len(arrows),
            'diagrams': len(shapes)
        }
    
    def _extract_figjam_key(self, url: str) -> Optional[str]:
        """Extract file key from FigJam URL"""
        match = re.search(r'/board/([a-zA-Z0-9_-]+)', url)
        if match:
            return match.group(1)
        match = re.search(r'/file/([a-zA-Z0-9_-]+)', url)
        if match:
            return match.group(1)
        return None
    
    def _fetch_figjam_board(self, file_key: str) -> Optional[Dict]:
        """Fetch from FigJam API"""
        url = f"https://api.figma.com/v1/files/{file_key}"
        headers = {"X-Figma-Token": self.figjam_token}
        
        try:
            response = requests.get(url, headers=headers, timeout=30)
            if response.status_code == 200:
                return response.json()
            else:
                print(f"   ‚ùå API error: {response.status_code}")
                return None
        except Exception as e:
            print(f"   ‚ùå Error: {e}")
            return None
    
    def _map_color(self, node):
        """Map RGB to color names"""
        fills = node.get('fills', [])
        if not fills or fills[0].get('type') != 'SOLID':
            return 'YELLOW'
        
        c = fills[0].get('color', {})
        r, g, b = c.get('r', 1), c.get('g', 1), c.get('b', 1)
        
        if r > 0.8 and g < 0.5 and b < 0.5:
            return 'RED'
        elif r > 0.8 and g > 0.5 and b < 0.3:
            return 'ORANGE'
        elif r > 0.8 and g > 0.8 and b < 0.5:
            return 'YELLOW'
        elif r < 0.5 and g > 0.7 and b < 0.5:
            return 'GREEN'
        elif r < 0.5 and g < 0.5 and b > 0.8:
            return 'BLUE'
        elif r > 0.5 and g < 0.5 and b > 0.7:
            return 'PURPLE'
        elif r > 0.8 and g < 0.5 and b > 0.6:
            return 'PINK'
        else:
            return 'GRAY'
    
    # =====================================================
    # AUDIO: FILE UPLOAD WITH WHISPER
    # =====================================================
    
    def ingest_audio_file(self, project_id: str, audio_path: str):
        """Ingest audio file with Whisper transcription"""
        print(f"\\nüéôÔ∏è  Processing audio file...")
        
        if not WHISPER_AVAILABLE or not self.whisper_model:
            return {'error': 'Whisper not available'}
        
        project = self.projects[project_id]
        file_name = os.path.basename(audio_path)
        
        print(f"   Transcribing: {file_name}")
        
        try:
            result = self.whisper_model.transcribe(
                audio_path,
                word_timestamps=True,
                verbose=False
            )
        except Exception as e:
            return {'error': f'Transcription failed: {e}'}
        
        segments = result['segments']
        print(f"   ‚úì Transcribed {len(segments)} segments")
        
        # Extract insights
        analyzed_notes = []
        for i, seg in enumerate(segments):
            text = seg['text'].strip()
            if len(text) < 10:
                continue
            
            # Detect tone
            tone = self._analyze_tone(seg, text)
            
            # Extract key points
            points = self._extract_insights(text)
            
            for point in points:
                analysis = self._analyze_content(point, 'YELLOW')
                
                note = {
                    'id': f"audio_{file_name}_{i}_{len(analyzed_notes)}",
                    'source': 'audio',
                    'source_name': file_name,
                    'content': point,
                    'full_segment': text,
                    'predicted_type': analysis['predicted_type'],
                    'confidence': analysis['confidence'],
                    'contributor': 'Speaker',
                    'created_at': datetime.now().isoformat(),
                    'timestamp': f"{seg['start']:.1f}s",
                    'audio_tone': tone,
                    'sentiment': self._detect_sentiment(point),
                    'priority': self._calc_priority(point, analysis),
                    'tags': self._extract_tags(point)
                }
                analyzed_notes.append(note)
        
        project['sources'].append({
            'type': 'audio',
            'name': file_name,
            'added_at': datetime.now().isoformat(),
            'duration': f"{result.get('duration', 0):.1f}s",
            'note_count': len(analyzed_notes)
        })
        
        project['notes'].extend(analyzed_notes)
        project['last_updated'] = datetime.now().isoformat()
        
        self._update_timeline(project, analyzed_notes)
        self._update_contributors(project, analyzed_notes)
        
        print(f"   ‚úÖ Extracted {len(analyzed_notes)} insights")
        
        return {
            'success': True,
            'notes': len(analyzed_notes),
            'duration': result.get('duration', 0)
        }
    
    def _analyze_tone(self, segment, text):
        """Detect speaker tone"""
        if '!' in text or text.isupper():
            return 'emphatic'
        elif '?' in text:
            return 'questioning'
        else:
            return 'neutral'
    
    def _extract_insights(self, text):
        """Extract key points from text"""
        points = []
        
        if any(w in text.lower() for w in ['problem', 'issue', 'difficult']):
            points.append(f"Pain point: {text}")
        elif '?' in text:
            points.append(f"Question: {text}")
        elif any(w in text.lower() for w in ['decided', 'agreed', 'will']):
            points.append(f"Decision: {text}")
        elif '"' in text:
            points.append(f"Quote: {text}")
        else:
            points.append(text)
        
        return points
    
    # =====================================================
    # DOCUMENTS: FILE UPLOAD
    # =====================================================
    
    def ingest_document_file(self, project_id: str, doc_path: str):
        """Ingest document file"""
        print(f"\\nüìÑ Processing document...")
        
        file_name = os.path.basename(doc_path)
        ext = os.path.splitext(file_name)[1].lower()
        
        if ext == '.pdf':
            return self._ingest_pdf(project_id, doc_path)
        elif ext in ['.ppt', '.pptx']:
            return self._ingest_ppt(project_id, doc_path)
        else:
            return self._ingest_text(project_id, doc_path)
    
    def _ingest_pdf(self, project_id: str, pdf_path: str):
        """Extract from PDF"""
        if not PDF_AVAILABLE:
            return {'error': 'PyPDF2 not installed'}
        
        project = self.projects[project_id]
        file_name = os.path.basename(pdf_path)
        
        analyzed_notes = []
        
        try:
            with open(pdf_path, 'rb') as f:
                reader = PyPDF2.PdfReader(f)
                print(f"   Processing {len(reader.pages)} pages")
                
                for page_num, page in enumerate(reader.pages, 1):
                    text = page.extract_text()
                    paras = [p.strip() for p in text.split('\\n\\n') if len(p.strip()) > 50]
                    
                    for para in paras:
                        analysis = self._analyze_content(para, 'YELLOW')
                        
                        note = {
                            'id': f"pdf_{file_name}_p{page_num}_{len(analyzed_notes)}",
                            'source': 'pdf',
                            'source_name': file_name,
                            'content': para[:200],
                            'full_text': para,
                            'predicted_type': analysis['predicted_type'],
                            'confidence': analysis['confidence'],
                            'contributor': 'Author',
                            'created_at': datetime.now().isoformat(),
                            'page_number': page_num,
                            'sentiment': self._detect_sentiment(para),
                            'priority': self._calc_priority(para, analysis),
                            'tags': self._extract_tags(para)
                        }
                        analyzed_notes.append(note)
        except Exception as e:
            return {'error': f'PDF processing failed: {e}'}
        
        project['sources'].append({
            'type': 'pdf',
            'name': file_name,
            'added_at': datetime.now().isoformat(),
            'pages': len(reader.pages),
            'note_count': len(analyzed_notes)
        })
        
        project['notes'].extend(analyzed_notes)
        project['last_updated'] = datetime.now().isoformat()
        
        self._update_timeline(project, analyzed_notes)
        self._update_contributors(project, analyzed_notes)
        
        print(f"   ‚úÖ Extracted {len(analyzed_notes)} insights")
        
        return {'success': True, 'notes': len(analyzed_notes)}
    
    def _ingest_ppt(self, project_id: str, ppt_path: str):
        """Extract from PowerPoint"""
        if not PPTX_AVAILABLE:
            return {'error': 'python-pptx not installed'}
        
        project = self.projects[project_id]
        file_name = os.path.basename(ppt_path)
        
        analyzed_notes = []
        
        try:
            prs = Presentation(ppt_path)
            print(f"   Processing {len(prs.slides)} slides")
            
            for slide_num, slide in enumerate(prs.slides, 1):
                text = ' '.join(shape.text for shape in slide.shapes if hasattr(shape, "text"))
                
                if len(text) > 20:
                    analysis = self._analyze_content(text, 'YELLOW')
                    
                    note = {
                        'id': f"ppt_{file_name}_s{slide_num}",
                        'source': 'powerpoint',
                        'source_name': file_name,
                        'content': text[:200],
                        'full_text': text,
                        'predicted_type': analysis['predicted_type'],
                        'confidence': analysis['confidence'],
                        'contributor': 'Presenter',
                        'created_at': datetime.now().isoformat(),
                        'slide_number': slide_num,
                        'sentiment': self._detect_sentiment(text),
                        'priority': self._calc_priority(text, analysis),
                        'tags': self._extract_tags(text)
                    }
                    analyzed_notes.append(note)
        except Exception as e:
            return {'error': f'PPT processing failed: {e}'}
        
        project['sources'].append({
            'type': 'powerpoint',
            'name': file_name,
            'added_at': datetime.now().isoformat(),
            'slides': len(prs.slides),
            'note_count': len(analyzed_notes)
        })
        
        project['notes'].extend(analyzed_notes)
        self._update_timeline(project, analyzed_notes)
        
        print(f"   ‚úÖ Extracted {len(analyzed_notes)} insights")
        
        return {'success': True, 'notes': len(analyzed_notes)}
    
    def _ingest_text(self, project_id: str, text_path: str):
        """Process text file"""
        project = self.projects[project_id]
        file_name = os.path.basename(text_path)
        
        with open(text_path, 'r') as f:
            text = f.read()
        
        analyzed_notes = []
        paras = [p.strip() for p in text.split('\\n\\n') if len(p.strip()) > 50]
        
        for i, para in enumerate(paras):
            analysis = self._analyze_content(para, 'YELLOW')
            
            note = {
                'id': f"txt_{file_name}_{i}",
                'source': 'document',
                'source_name': file_name,
                'content': para[:200],
                'full_text': para,
                'predicted_type': analysis['predicted_type'],
                'confidence': analysis['confidence'],
                'contributor': 'Author',
                'created_at': datetime.now().isoformat(),
                'sentiment': self._detect_sentiment(para),
                'priority': self._calc_priority(para, analysis),
                'tags': self._extract_tags(para)
            }
            analyzed_notes.append(note)
        
        project['sources'].append({
            'type': 'document',
            'name': file_name,
            'added_at': datetime.now().isoformat(),
            'note_count': len(analyzed_notes)
        })
        
        project['notes'].extend(analyzed_notes)
        self._update_timeline(project, analyzed_notes)
        
        return {'success': True, 'notes': len(analyzed_notes)}
    
    # =====================================================
    # ANALYSIS FUNCTIONS
    # =====================================================
    
    def _analyze_content(self, content: str, color: str) -> Dict:
        """Analyze content type"""
        content_lower = content.lower()
        
        if '?' in content:
            return {'predicted_type': 'question', 'confidence': 0.8}
        elif '"' in content:
            return {'predicted_type': 'quote', 'confidence': 0.7}
        elif any(w in content_lower for w in ['problem', 'issue', 'error', 'broken']):
            return {'predicted_type': 'pain_point', 'confidence': 0.75}
        elif any(w in content_lower for w in ['love', 'great', 'awesome', 'excellent']):
            return {'predicted_type': 'positive', 'confidence': 0.7}
        elif any(w in content_lower for w in ['could', 'should', 'what if', 'idea']):
            return {'predicted_type': 'idea', 'confidence': 0.7}
        else:
            return {'predicted_type': 'neutral', 'confidence': 0.6}
    
    def _detect_sentiment(self, content: str) -> str:
        """Detect sentiment"""
        content_lower = content.lower()
        pos = sum(1 for w in ['love', 'great', 'good', 'excellent'] if w in content_lower)
        neg = sum(1 for w in ['hate', 'bad', 'terrible', 'broken'] if w in content_lower)
        return 'positive' if pos > neg else 'negative' if neg > pos else 'neutral'
    
    def _calc_priority(self, content: str, analysis: Dict) -> str:
        """Calculate priority"""
        if analysis['predicted_type'] == 'pain_point':
            return 'high'
        if any(w in content.lower() for w in ['critical', 'urgent', 'blocker']):
            return 'high'
        if analysis['predicted_type'] == 'neutral':
            return 'low'
        return 'medium'
    
    def _extract_tags(self, content: str) -> List[str]:
        """Extract tags"""
        tags = []
        content_lower = content.lower()
        
        tag_map = {
            'navigation': ['navigation', 'nav', 'menu'],
            'mobile': ['mobile', 'phone'],
            'performance': ['slow', 'fast', 'loading'],
            'accessibility': ['accessibility', 'a11y'],
            'search': ['search', 'find'],
            'error': ['error', 'bug', 'broken']
        }
        
        for tag, keywords in tag_map.items():
            if any(kw in content_lower for kw in keywords):
                tags.append(tag)
        
        return tags[:3]
    
    def _update_timeline(self, project: Dict, notes: List[Dict]):
        """Update timeline"""
        for note in notes:
            project['timeline'].append({
                'timestamp': note.get('created_at'),
                'contributor': note['contributor'],
                'content_preview': note['content'][:100],
                'note_id': note['id'],
                'source': note['source']
            })
        project['timeline'].sort(key=lambda x: x['timestamp'])
    
    def _update_contributors(self, project: Dict, notes: List[Dict]):
        """Update contributors"""
        for note in notes:
            contributor = note['contributor']
            if contributor not in project['contributors']:
                project['contributors'][contributor] = {
                    'total_contributions': 0,
                    'note_types': defaultdict(int)
                }
            
            project['contributors'][contributor]['total_contributions'] += 1
            project['contributors'][contributor]['note_types'][note['predicted_type']] += 1
    
    # =====================================================
    # SYNTHESIS
    # =====================================================
    
    def synthesize_project(self, project_id: str) -> Dict:
        """Generate project synthesis"""
        project = self.projects[project_id]
        notes = project['notes']
        
        by_type = defaultdict(list)
        by_priority = defaultdict(list)
        
        for note in notes:
            by_type[note['predicted_type']].append(note)
            by_priority[note['priority']].append(note)
        
        all_tags = []
        for note in notes:
            all_tags.extend(note.get('tags', []))
        
        tag_counts = Counter(all_tags)
        themes = [
            {'name': tag, 'frequency': count, 'percentage': (count/len(notes))*100}
            for tag, count in tag_counts.most_common(10)
        ]
        
        action_items = [
            {
                'content': n['content'], 
                'type': n['predicted_type'],
                'contributor': n['contributor'],
                'source': n['source_name']
            }
            for n in notes if n['priority'] == 'high'
        ][:20]
        
        sentiment_dist = Counter(n.get('sentiment', 'neutral') for n in notes)
        
        synthesis = {
            'project_name': project['name'],
            'last_updated': project['last_updated'],
            'total_notes': len(notes),
            'total_sources': len(project['sources']),
            'contributors': len(project['contributors']),
            'by_type': dict(by_type),
            'by_priority': dict(by_priority),
            'by_contributor': project['contributors'],
            'timeline': project['timeline'],
            'themes': themes,
            'action_items': action_items,
            'stats': {
                'sentiment_distribution': dict(sentiment_dist),
                'avg_confidence': sum(n.get('confidence', 0) for n in notes) / len(notes) if notes else 0
            }
        }
        
        project['insights'] = synthesis
        return synthesis
    
    def refresh_project(self, project_id: str):
        """Refresh analysis"""
        return self.synthesize_project(project_id)

print("‚úÖ PRISM Brain AI v2 module loaded successfully!")
''')

print("‚úÖ prism_brain.py created successfully!")
print("üìÅ You can now create a new notebook and import with:")
print("   from prism_brain import PRISMBrainV2")

‚úÖ prism_brain.py created successfully!
üìÅ You can now create a new notebook and import with:
   from prism_brain import PRISMBrainV2


In [1]:
with open('prism_brain.py', 'r') as f:
    lines = f.readlines()

print("=== LOOKING FOR TOKEN CONFIGURATION ===\n")
for i, line in enumerate(lines[:100], 1):  # Check first 100 lines
    if any(keyword in line.lower() for keyword in ['token', 'figma', 'figjam', 'api_key', 'auth']):
        print(f"{i:3d} | {line.rstrip()}")

=== LOOKING FOR TOKEN CONFIGURATION ===

 46 |     - FigJam: Full board analysis (sticky notes + arrows + diagrams)
 52 |     def __init__(self, training_data=None, figjam_token=None):
 54 |         self.figjam_token = figjam_token
 75 |         print("   ‚úì FigJam: URL input with full board analysis")
