# Clinical Trials MCP - Experiments

This notebook contains experiments and analysis for the clinical trials matching system.

In [None]:
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / "src"))

import pandas as pd
from database import DatabaseManager
from vectorstore import VectorStoreManager
from matching import TrialMatcher
from config import get_settings

## Setup

In [None]:
settings = get_settings()
db_manager = DatabaseManager(settings.database_url)
vector_manager = VectorStoreManager(
    persist_directory=settings.vector_store_path,
    embedding_model=settings.embedding_model
)
matcher = TrialMatcher(db_manager, vector_manager)

## Explore Data

In [None]:
# Load and explore patient data
patients_df = db_manager.get_all_patients()
print(f"Total patients: {len(patients_df)}")
patients_df.head()

In [None]:
# Explore conditions
conditions_df = db_manager.get_all_conditions()
print(f"Total conditions: {len(conditions_df)}")
conditions_df.head()

In [None]:
# Explore clinical trials
trials_df = db_manager.get_all_trials()
print(f"Total trials: {len(trials_df)}")
trials_df.head()

## Test Matching

In [None]:
# Test matching for a specific patient
patient_id = "PATIENT001"  # Replace with actual patient ID
matches = matcher.find_matching_trials(patient_id, top_k=5)

print(f"Top matches for patient {patient_id}:")
for match in matches:
    print(f"\nTrial: {match['trial_id']}")
    print(f"Title: {match['title']}")
    print(f"Similarity: {match['similarity_score']:.3f}")

## Vector Search Analysis

In [None]:
# Test semantic search
query = "diabetes type 2 with cardiovascular complications"
results = vector_manager.search(query, n_results=10)

print(f"Search results for: '{query}'\n")
for i, result in enumerate(results['ids'][0], 1):
    print(f"{i}. {result}")
    print(f"   Distance: {results['distances'][0][i-1]:.3f}")
    print(f"   Metadata: {results['metadatas'][0][i-1]}")
    print()

## Further Experiments

Add your own experiments below:
- Evaluate matching quality
- Test different embedding models
- Analyze demographic distributions
- Visualize trial coverage

In [1]:
lst=[{'text': 'Title: Telephone Delivered Behavioral Skills Intervention for Blacks With T2DM\nEligibility: Inclusion Criteria: * 1\\) Age ≥18 years * 2\\) Clinical diagnosis of T2DM and HbA1c ≥9% at the screening visit * 3\\) Self-identified as Black or African American * 4\\) Subject must be taking at least one oral medication for diabetes, hypertension, or hyperlipidemia and must be willing to use the MEMS cap and bottle for 12 months * 5\\) Subjects must be able to communicate in English * 6\\) Subjects must have access to a telephone (landline or cell phone) for the 12 week intervention period Exclusion Criteria: * 1\\) Mental confusion on interview suggesting significant dementia * 2\\) Participation in other diabetes clinical trials * 3\\) Alcohol or drug abuse/dependency * 4\\) Active psychosis or acute mental disorder * 5\\) Life expectancy \\<6 months\nConditions: diabetes mellitus, type 2, diabetes mellitus, adult-onset, diabetes mellitus, non-insulin-dependent, diabetes mellitus, noninsulin dependent, diabetes mellitus, type ii\nLocations: [{"facility": "Medical University of South Carolina", "city": "Charleston", "state": "South Carolina", "zip": "29425", "country": "United States", "geoPoint": {"lat": 32.77632, "lon": -79.93275}}]\nInterventions: \n  - Type: BEHAVIORAL ; Name: Diabetes Knowledge/Information ; Description: This group will receive telephone-delivered diabetes knowledge/information lasting 30 minutes for 12 weeks.\n  - Type: BEHAVIORAL ; Name: Motivation/Behavioral Skills ; Description: This intervention consists of patient activation, patient empowerment, and behavioral skills training delivered via telephone lasting 30 minutes every week for 12 weeks.\n  - Type: BEHAVIORAL ; Name: Combined Intervention ; Description: This group will receive all components of the diabetes knowledge/information and the motivation/behavioral skills interventions via telephone lasting 30 minutes every week for 12 weeks.\n  - Type: BEHAVIORAL ; Name: Usual Care ; Description: This group will receive telephone-delivered general health education lasting 30 minutes for 12 weeks to control for attention and content.\nOutcomes: \n  - PRIMARY: Hemoglobin A1c (HbA1c) at 12 Months Post Randomization ; Timeframe: 12-months post randomization ; Population: Final analysis used baseline A1c analysis of covariance for differences in levels of A1c between the treatment groups at 12months with baseline A1c as covariate.', 'chunk_similarity_score': 0.016, 'nct_id': 'NCT00929838', 'phase': 'NA', 'status': 'COMPLETED', 'minimum_age': 18, 'maximum_age': '', 'enrollment': '256.0', 'conditions_count': 5, 'primary_condition': 'diabetes mellitus, type 2', 'chunk_index': 0, 'conditions': 'diabetes mellitus, type 2, diabetes mellitus, adult-onset, diabetes mellitus, non-insulin-dependent, diabetes mellitus, noninsulin dependent, diabetes mellitus, type ii'}, {'text': 'Title: Model Driven Diabetes Care\nEligibility: Inclusion Criteria: * Confirmed diagnosis with Diabetes Mellitus type 1 for at least one year. * Age over 18 years * Has basic familiarity with mobile phones, and uses mobile phone on a daily basis. Exclusion Criteria: * Severe complications due to their diabetes. * Unable to understand or conform to the guidelines when presented with the phone\'s software.\nConditions: diabetes mellitus, type 1\nLocations: [{"facility": "University Hopital of North Norway", "city": "Troms\\u00f8", "zip": "9038", "country": "Norway", "geoPoint": {"lat": 69.6489, "lon": 18.95508}}]\nInterventions: \n  - Type: DEVICE ; Name: Few Touch Application ; Description: Users get access to the regular version of the Few Touch Application for Type 1 Diabetes.\n  - Type: DEVICE ; Name: Diastat ; Description: Users get the Few Touch Application with Diastat module activated.\nOutcomes: \n  - PRIMARY: Change in the Frequency of Hyper- and Hypo-glycemic Events From Baseline to Week 8-12. ; Description: The number of self-measured blood glucose values \\< 4 mmol/L (72 mg/dL) or \\> 15 mmol/L (270 mg/dL) will be recorded during baseline (first 4 weeks post-enrollment/start of study) and during weeks 8-12 post-enrollment for all participants. ; Timeframe: Up to 12 weeks post-enrollment ; Population: 14 participants in each group were active users and had sufficient data to be analyzed for the outcome.\n  - SECONDARY: Change in HbA1c ; Description: HbA1c will be measured at the start of the study (week 1 post-enrollment) and during the last week of intervention (week 12 for the intervention group and week 20 for the active comparator group). ; Timeframe: up to 20 weeks post-enrollment ; Population: 11 participants in each group met for measurement of HbA1c. Reasons for not meeting was not recorded.\n  - OTHER_PRE_SPECIFIED: Usability ; Description: System Usability Scale (SUS) will be applied to assess usability of the approach and recorded during the last week of intervention (week 12 for the intervention group and week 20 for the active comparator group). ; Timeframe: up to 20 weeks post-enrollment\n  - OTHER_PRE_SPECIFIED: Empowerment ; Description: Diabetes Empowerment Scale-Short Form (DES-SF) will be used to assess empowerment at the start of the study (week 1 post-enrollment) and during week 12 of intervention. ; Timeframe: Up to 12 weeks post-enrollment.', 'chunk_similarity_score': 0.016, 'nct_id': 'NCT01774149', 'phase': 'NA', 'status': 'COMPLETED', 'minimum_age': 18, 'maximum_age': '', 'enrollment': '30.0', 'conditions_count': 1, 'primary_condition': 'diabetes mellitus, type 1', 'chunk_index': 0, 'conditions': 'diabetes mellitus, type 1'}]
lst

[{'text': 'Title: Telephone Delivered Behavioral Skills Intervention for Blacks With T2DM\nEligibility: Inclusion Criteria: * 1\\) Age ≥18 years * 2\\) Clinical diagnosis of T2DM and HbA1c ≥9% at the screening visit * 3\\) Self-identified as Black or African American * 4\\) Subject must be taking at least one oral medication for diabetes, hypertension, or hyperlipidemia and must be willing to use the MEMS cap and bottle for 12 months * 5\\) Subjects must be able to communicate in English * 6\\) Subjects must have access to a telephone (landline or cell phone) for the 12 week intervention period Exclusion Criteria: * 1\\) Mental confusion on interview suggesting significant dementia * 2\\) Participation in other diabetes clinical trials * 3\\) Alcohol or drug abuse/dependency * 4\\) Active psychosis or acute mental disorder * 5\\) Life expectancy \\<6 months\nConditions: diabetes mellitus, type 2, diabetes mellitus, adult-onset, diabetes mellitus, non-insulin-dependent, diabetes mellitu

In [3]:
type(lst[0]['conditions'])

str

In [4]:
lst[0]['conditions']


'diabetes mellitus, type 2, diabetes mellitus, adult-onset, diabetes mellitus, non-insulin-dependent, diabetes mellitus, noninsulin dependent, diabetes mellitus, type ii'

In [5]:

def parse_conditions(cond):

    if not cond:
        return []
    if isinstance(cond,str):
        parts = [c.strip() for c in cond.split(",") if c.strip()]
        return parts
    
    else:
        return []


conditions = parse_conditions(lst[0]['conditions'])
print(conditions)

['diabetes mellitus', 'type 2', 'diabetes mellitus', 'adult-onset', 'diabetes mellitus', 'non-insulin-dependent', 'diabetes mellitus', 'noninsulin dependent', 'diabetes mellitus', 'type ii']
