In [4]:
from pycomptox.chemical import Chemical
from pycomptox.exposure import ExposurePrediction, DemographicExposure
import pandas as pd

# Initialize clients
searcher = Chemical()
exposure = ExposurePrediction()
demographic = DemographicExposure()

print("✓ Clients initialized successfully")

✓ Clients initialized successfully


## Example 1: Get Exposure Predictions for a Chemical

Retrieve predicted exposure data for a specific chemical.

In [5]:
# Search for a chemical
results = searcher.search_by_exact_value("Bisphenol A")
dtxsid = results[0]['dtxsid']
print(f"Chemical: {results[0]['preferredName']}")
print(f"DTXSID: {dtxsid}")
print()

# Get exposure predictions
predictions = exposure.general_prediction_SEEMs_by_dtxsid(dtxsid)

if predictions:
    print(f"Found {len(predictions)} exposure predictions\n")
    
    # Convert to DataFrame
    df = pd.DataFrame(predictions)
    print(f"Available columns: {', '.join(df.columns.tolist())}\n")
    
    # Show sample data
    display(df.head(10))
else:
    print("No exposure prediction data available")

Chemical: Bisphenol A
DTXSID: DTXSID7020182

Found 6 exposure predictions

Available columns: value, units, predictor



Unnamed: 0,value,units,predictor
0,2780000.0,kg/day,Production Volume
1,0.0,Presence/Absence,Stockholm Convention
2,1.0,Likelihood from 0 (none) to 1 (certain),Probability Dietary
3,1.0,Likelihood from 0 (none) to 1 (certain),Probability Residential
4,0.0,Likelihood from 0 (none) to 1 (certain),Probability Pesticde
5,0.0,Likelihood from 0 (none) to 1 (certain),Probability Industrial


## Example 2: Get Demographic-Specific Exposure Data

Retrieve exposure data broken down by demographic groups.

In [6]:
# Get demographic exposure data
demo_data = demographic.prediction_SEEMs_data_by_dtxsid(dtxsid)

if demo_data:
    print(f"Found {len(demo_data)} demographic exposure records\n")
    
    df_demo = pd.DataFrame(demo_data)
    
    # Show available columns
    print(f"Available columns: {', '.join(df_demo.columns.tolist())}\n")
    
    # Display sample data
    display(df_demo.head(15))
else:
    print("No demographic exposure data available")

Found 19 demographic exposure records

Available columns: units, demographic, median, predictor, u95



Unnamed: 0,units,demographic,median,predictor,u95
0,mg/kg/day,Total,5.5e-05,SEEM3 Consensus,0.02044
1,mg/kg/day,Total,3.77,RAIDAR,
2,mg/kg/day,Total,0.01766,Food.Contact,
3,mg/kg/day,Repro. Age Females,1.4e-05,SEEM2 Heuristic,0.004177
4,mg/kg/day,Males,3.9e-05,SEEM2 Heuristic,0.006306
5,mg/kg/day,Females,1.2e-05,SEEM2 Heuristic,0.002898
6,mg/kg/day,Age 6-11,6.3e-05,SEEM2 Heuristic,0.010537
7,mg/kg/day,Age 12-19,5.9e-05,SEEM2 Heuristic,0.017186
8,mg/kg/day,Age 20-65,5.7e-05,SEEM2 Heuristic,0.011509
9,mg/kg/day,Age 66+,6.6e-05,SEEM2 Heuristic,0.019478


## Example 3: Analyze Exposure Routes

Break down exposure by different routes (oral, dermal, inhalation).

In [7]:
if predictions and 'route' in df.columns:
    # Count exposure routes
    route_counts = df['route'].value_counts()
    print("Exposure by Route:")
    for route, count in route_counts.items():
        print(f"  {route}: {count} records")
    
    # Show statistics by route
    if 'exposureValue' in df.columns:
        print("\nExposure Statistics by Route:")
        for route in df['route'].unique():
            route_data = df[df['route'] == route]['exposureValue']
            print(f"\n  {route}:")
            print(f"    Mean: {route_data.mean():.2e}")
            print(f"    Median: {route_data.median():.2e}")
            print(f"    Range: {route_data.min():.2e} - {route_data.max():.2e}")
else:
    print("Route information not available in the data")

Route information not available in the data


## Example 4: Compare Exposure Across Demographics

Compare exposure levels across different demographic groups.

In [8]:
if demo_data:
    df_demo = pd.DataFrame(demo_data)
    
    # Group by demographic category
    if 'demographicGroup' in df_demo.columns:
        print("Exposure by Demographic Group:")
        demo_groups = df_demo['demographicGroup'].value_counts()
        for group, count in demo_groups.items():
            print(f"  {group}: {count} records")
    
    # Age group analysis
    if 'ageGroup' in df_demo.columns:
        print("\nExposure by Age Group:")
        age_groups = df_demo['ageGroup'].value_counts()
        for age, count in age_groups.items():
            print(f"  {age}: {count} records")
else:
    print("Demographic data not available")

## Example 5: Batch Exposure Analysis

Compare exposure predictions across multiple chemicals.

In [9]:
# Search for multiple chemicals
chemical_names = ["Bisphenol A", "Phthalic acid", "Triclosan"]
chemicals = []

for name in chemical_names:
    results = searcher.search_by_exact_value(name)
    if results:
        dtxsid = results[0]['dtxsid']
        chemicals.append({'name': name, 'dtxsid': dtxsid})
        print(f"✓ {name}: {dtxsid}")

print(f"\nAnalyzing exposure for {len(chemicals)} chemicals...\n")

# Get exposure data for each
exposure_summary = []

for chem in chemicals:
    preds = exposure.general_prediction_SEEMs_by_dtxsid(chem['dtxsid'])
    demo = demographic.prediction_SEEMs_data_by_dtxsid(chem['dtxsid'])
    
    exposure_summary.append({
        'Chemical': chem['name'],
        'DTXSID': chem['dtxsid'],
        'Exposure Predictions': len(preds) if preds else 0,
        'Demographic Records': len(demo) if demo else 0
    })

summary_df = pd.DataFrame(exposure_summary)
display(summary_df)

✓ Bisphenol A: DTXSID7020182
✓ Phthalic acid: DTXSID8021484
✓ Triclosan: DTXSID5032498

Analyzing exposure for 3 chemicals...

✓ Phthalic acid: DTXSID8021484
✓ Triclosan: DTXSID5032498

Analyzing exposure for 3 chemicals...



Unnamed: 0,Chemical,DTXSID,Exposure Predictions,Demographic Records
0,Bisphenol A,DTXSID7020182,6,19
1,Phthalic acid,DTXSID8021484,6,17
2,Triclosan,DTXSID5032498,6,20


## Example 6: Exposure Pathways Analysis

Analyze different exposure pathways and sources.

In [10]:
if predictions:
    df = pd.DataFrame(predictions)
    
    # Analyze by pathway
    if 'pathway' in df.columns:
        print("Exposure Pathways:")
        pathway_counts = df['pathway'].value_counts()
        for pathway, count in pathway_counts.items():
            print(f"  {pathway}: {count} records")
    
    # Analyze by source
    if 'source' in df.columns:
        print("\nExposure Sources:")
        source_counts = df['source'].value_counts()
        for source, count in source_counts.items():
            print(f"  {source}: {count} records")
    
    # Show key columns
    available_cols = [col for col in ['pathway', 'source', 'route', 'exposureValue', 'unit'] if col in df.columns]
    if available_cols:
        print("\nSample Exposure Data:")
        display(df[available_cols].head(10))

## Example 7: Population Exposure Statistics

Calculate population-level exposure statistics.

In [11]:
if predictions:
    df = pd.DataFrame(predictions)
    
    if 'exposureValue' in df.columns:
        print("Population Exposure Statistics:")
        print(f"  Total Records: {len(df)}")
        print(f"  Mean Exposure: {df['exposureValue'].mean():.2e}")
        print(f"  Median Exposure: {df['exposureValue'].median():.2e}")
        print(f"  Min Exposure: {df['exposureValue'].min():.2e}")
        print(f"  Max Exposure: {df['exposureValue'].max():.2e}")
        print(f"  Std Dev: {df['exposureValue'].std():.2e}")
        
        # Percentiles
        print("\nPercentiles:")
        for percentile in [5, 25, 50, 75, 95]:
            value = df['exposureValue'].quantile(percentile/100)
            print(f"  {percentile}th: {value:.2e}")

## Example 8: Complete Exposure Analysis

Comprehensive exposure analysis for a chemical.

In [12]:
def analyze_exposure(chemical_name):
    """Complete exposure analysis for a chemical."""
    print(f"Analyzing Exposure: {chemical_name}")
    print("="*70)
    
    # 1. Search
    results = searcher.search_by_exact_value(chemical_name)
    if not results:
        print(f"Chemical not found: {chemical_name}")
        return
    
    dtxsid = results[0]['dtxsid']
    print(f"✓ Found: {results[0]['preferredName']} ({dtxsid})")
    print()
    
    # 2. Exposure Predictions
    preds = exposure.general_prediction_SEEMs_by_dtxsid(dtxsid)
    print(f"Exposure Predictions: {len(preds) if preds else 0}")
    
    if preds:
        df = pd.DataFrame(preds)
        
        # Route breakdown
        if 'route' in df.columns:
            print("\nExposure Routes:")
            for route, count in df['route'].value_counts().items():
                print(f"  {route}: {count} records")
        
        # Exposure statistics
        if 'exposureValue' in df.columns:
            print("\nExposure Statistics:")
            print(f"  Mean: {df['exposureValue'].mean():.2e}")
            print(f"  Median: {df['exposureValue'].median():.2e}")
            print(f"  Range: {df['exposureValue'].min():.2e} - {df['exposureValue'].max():.2e}")
    
    # 3. Demographic Exposure
    demo = demographic.prediction_SEEMs_data_by_dtxsid(dtxsid)
    print(f"\nDemographic Records: {len(demo) if demo else 0}")
    
    if demo:
        df_demo = pd.DataFrame(demo)
        if 'demographicGroup' in df_demo.columns:
            print("\nDemographic Groups:")
            for group, count in df_demo['demographicGroup'].value_counts().head(5).items():
                print(f"  {group}: {count} records")
    
    print("\n" + "="*70 + "\n")

# Analyze a chemical
analyze_exposure("Caffeine")

Analyzing Exposure: Caffeine
✓ Found: Caffeine (DTXSID0020232)

Exposure Predictions: 6

Demographic Records: 17




## Example 9: High vs Low Exposure Comparison

Compare chemicals with different exposure profiles.

In [13]:
# Compare exposure across chemicals
comparison_data = []

for chem in chemicals:
    preds = exposure.general_prediction_SEEMs_by_dtxsid(chem['dtxsid'])
    
    if preds:
        df = pd.DataFrame(preds)
        if 'exposureValue' in df.columns:
            comparison_data.append({
                'Chemical': chem['name'],
                'Mean Exposure': df['exposureValue'].mean(),
                'Median Exposure': df['exposureValue'].median(),
                'Max Exposure': df['exposureValue'].max(),
                'Records': len(df)
            })

if comparison_data:
    comparison_df = pd.DataFrame(comparison_data)
    # Sort by median exposure
    comparison_df = comparison_df.sort_values('Median Exposure', ascending=False)
    
    print("Exposure Comparison (sorted by median):")
    display(comparison_df)

## Summary

This notebook demonstrated:
- Getting exposure predictions for chemicals
- Analyzing demographic-specific exposure data
- Breaking down exposure by route and pathway
- Comparing exposure across multiple chemicals
- Calculating population-level exposure statistics

The exposure module provides comprehensive data for assessing human exposure to chemicals across different populations and scenarios.