# Test Biomni NIBR Setup
This notebook verifies that the Biomni container is properly configured with:
- Mounted data lake
- API keys from nibr/.env
- Working agent initialization

## 1. Environment Check

In [None]:
import os
import sys

print("Python version:", sys.version)
print("\nEnvironment variables:")
print(f"  BIOMNI_DATA_PATH: {os.environ.get('BIOMNI_DATA_PATH', 'NOT SET')}")
print(f"  BIOMNI_SKIP_DOWNLOAD: {os.environ.get('BIOMNI_SKIP_DOWNLOAD', 'NOT SET')}")
print(f"  OpenAI API: {'✓ Configured' if os.environ.get('OPENAI_API_KEY') else '✗ Missing'}")
print(f"  Anthropic API: {'✓ Configured' if os.environ.get('ANTHROPIC_API_KEY') else '✗ Missing'}")

## 2. Data Lake Verification

In [None]:
import os
import pandas as pd
from pathlib import Path

data_path = os.environ.get('BIOMNI_DATA_PATH', '/biomni_data')
data_lake = Path(data_path) / 'data_lake'

if data_lake.exists():
    files = list(data_lake.glob('*.parquet'))
    print(f"✓ Data lake found at: {data_lake}")
    print(f"✓ Total files: {len(files)}")
    
    # Show sample files with sizes
    print("\nSample datasets:")
    for f in sorted(files)[:10]:
        size_mb = f.stat().st_size / (1024*1024)
        print(f"  • {f.name}: {size_mb:.2f} MB")
        
    # Calculate total size
    total_size = sum(f.stat().st_size for f in files) / (1024*1024*1024)
    print(f"\nTotal data lake size: {total_size:.2f} GB")
else:
    print(f"✗ Data lake not found at: {data_lake}")
    print("  Please check your data mount configuration")

## 3. Load Sample Dataset

In [None]:
# Try to load DisGeNET - disease-gene associations
disgenet_path = data_lake / 'DisGeNET.parquet'

if disgenet_path.exists():
    df = pd.read_parquet(disgenet_path)
    print(f"✓ Successfully loaded DisGeNET")
    print(f"  Shape: {df.shape}")
    print(f"  Columns: {list(df.columns)}")
    print("\nFirst 5 rows:")
    display(df.head())
else:
    print(f"DisGeNET not found at {disgenet_path}")

## 4. Initialize Biomni Agent

In [None]:
# Import the local mount wrapper
from biomni_local_mount import A1LocalMount

# Initialize agent with local data
agent = A1LocalMount(
    path='/',
    skip_download=True,
    validate_data=False,
    llm='gpt-4'  # or 'claude-3-sonnet' for Anthropic
)

print("✓ Biomni agent initialized successfully!")
print(f"  Agent type: {type(agent).__name__}")
print(f"  Data path: {agent.get_data_path() if hasattr(agent, 'get_data_path') else 'N/A'}")

## 5. Test Simple Query

In [None]:
# Test with a simple biomedical question
query = "What are the top 5 genes associated with Alzheimer's disease according to DisGeNET?"

print(f"Query: {query}")
print("\nProcessing...")

try:
    result = agent.run(query)
    print("\n✓ Query executed successfully!")
    print("\nResult:")
    print(result)
except Exception as e:
    print(f"\n✗ Error during query execution: {e}")
    print("\nTroubleshooting:")
    print("1. Check that API keys are properly set in nibr/.env")
    print("2. Verify data is mounted correctly")
    print("3. Ensure you have sufficient API credits")

## 6. Test Tool Registry

In [None]:
# Check available tools
from biomni.tool import ToolRegistry

registry = ToolRegistry()
tools = registry.get_all_tools()

print(f"✓ Found {len(tools)} tools in registry")
print("\nSample tools:")
for tool_name in list(tools.keys())[:10]:
    print(f"  • {tool_name}")

## 7. System Information

In [None]:
import platform
import psutil

print("System Information:")
print(f"  OS: {platform.system()} {platform.release()}")
print(f"  Python: {platform.python_version()}")
print(f"  CPU cores: {psutil.cpu_count()}")
print(f"  Memory: {psutil.virtual_memory().total / (1024**3):.1f} GB")
print(f"  Memory available: {psutil.virtual_memory().available / (1024**3):.1f} GB")

# Check disk usage for data
if data_lake.exists():
    disk = psutil.disk_usage(str(data_lake))
    print(f"\nDisk usage for {data_lake}:")
    print(f"  Total: {disk.total / (1024**3):.1f} GB")
    print(f"  Used: {disk.used / (1024**3):.1f} GB ({disk.percent:.1f}%)")
    print(f"  Free: {disk.free / (1024**3):.1f} GB")

## Summary

This notebook has verified:
- ✓ Environment variables are properly configured
- ✓ Data lake is mounted and accessible
- ✓ Biomni agent can be initialized
- ✓ Basic queries can be executed
- ✓ Tool registry is available

If all checks pass, your Biomni NIBR setup is ready for research!