# Clinical Trials MCP - Experiments

This notebook contains experiments and analysis for the clinical trials matching system.

In [None]:
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / "src"))

import pandas as pd
from database import DatabaseManager
from vectorstore import VectorStoreManager
from matching import TrialMatcher
from config import get_settings

## Setup

In [None]:
settings = get_settings()
db_manager = DatabaseManager(settings.database_url)
vector_manager = VectorStoreManager(
    persist_directory=settings.vector_store_path,
    embedding_model=settings.embedding_model
)
matcher = TrialMatcher(db_manager, vector_manager)

## Explore Data

In [None]:
# Load and explore patient data
patients_df = db_manager.get_all_patients()
print(f"Total patients: {len(patients_df)}")
patients_df.head()

In [None]:
# Explore conditions
conditions_df = db_manager.get_all_conditions()
print(f"Total conditions: {len(conditions_df)}")
conditions_df.head()

In [None]:
# Explore clinical trials
trials_df = db_manager.get_all_trials()
print(f"Total trials: {len(trials_df)}")
trials_df.head()

## Test Matching

In [None]:
# Test matching for a specific patient
patient_id = "PATIENT001"  # Replace with actual patient ID
matches = matcher.find_matching_trials(patient_id, top_k=5)

print(f"Top matches for patient {patient_id}:")
for match in matches:
    print(f"\nTrial: {match['trial_id']}")
    print(f"Title: {match['title']}")
    print(f"Similarity: {match['similarity_score']:.3f}")

## Vector Search Analysis

In [None]:
# Test semantic search
query = "diabetes type 2 with cardiovascular complications"
results = vector_manager.search(query, n_results=10)

print(f"Search results for: '{query}'\n")
for i, result in enumerate(results['ids'][0], 1):
    print(f"{i}. {result}")
    print(f"   Distance: {results['distances'][0][i-1]:.3f}")
    print(f"   Metadata: {results['metadatas'][0][i-1]}")
    print()

## Further Experiments

Add your own experiments below:
- Evaluate matching quality
- Test different embedding models
- Analyze demographic distributions
- Visualize trial coverage