In [1]:
from pycomptox.chemical import Chemical
from pycomptox.hazard import (
    ToxValDB, ToxValDBCancer, ToxValDBGenetox, ToxValDBSkinEye,
    ToxRefDBEffects, ToxRefDBSummary, ToxRefDBData, ToxRefDBObservation,
    PPRTV, IRIS, ADMEIVIVE, HAWC
)
import pandas as pd

# Initialize clients
searcher = Chemical()
toxvaldb = ToxValDB()
toxvaldb_cancer = ToxValDBCancer()
toxvaldb_genetox = ToxValDBGenetox()
toxrefdb_effects = ToxRefDBEffects()
pprtv = PPRTV()
iris = IRIS()

print("✓ Clients initialized successfully")

✓ Clients initialized successfully


## Example 1: Get ToxValDB Data

Retrieve general toxicity values from ToxValDB.

In [3]:
# Search for a chemical
results = searcher.search_by_exact_value("Bisphenol A")
dtxsid = results[0]['dtxsid']
print(f"Chemical: {results[0]['preferredName']}")
print(f"DTXSID: {dtxsid}")
print()

# Get ToxValDB data
toxval_data = toxvaldb.get_data_by_dtxsid(dtxsid)

if toxval_data:
    print(f"Found {len(toxval_data)} ToxValDB records\n")
    
    # Convert to DataFrame
    df = pd.DataFrame(toxval_data)
    print(f"Available columns: {', '.join(df.columns.tolist()[:10])}...\n")
    
    # Show key columns
    columns = ['toxvalType', 'toxvalNumeric', 'toxvalUnits', 'studyType', 'species']
    available_cols = [col for col in columns if col in df.columns]
    if available_cols:
        display(df[available_cols].head(10))
else:
    print("No ToxValDB data available")

Chemical: Bisphenol A
DTXSID: DTXSID7020182

Found 132 ToxValDB records

Available columns: id, dtxsid, casrn, name, source, subsource, toxvalType, toxvalTypeDefinition, toxvalSubtype, toxvalTypeSuperCategory...

Found 132 ToxValDB records

Available columns: id, dtxsid, casrn, name, source, subsource, toxvalType, toxvalTypeDefinition, toxvalSubtype, toxvalTypeSuperCategory...



Unnamed: 0,toxvalType,toxvalNumeric,toxvalUnits,studyType
0,NOEL,40.0,mg/kg-day,subchronic
1,LEL,160.0,mg/kg-day,reproduction developmental
2,LOAEL,160.0,mg/kg-day,reproduction developmental
3,LEL,1280.0,mg/kg-day,developmental
4,LOAEL,1280.0,mg/kg-day,developmental
5,NEL,1280.0,mg/kg-day,developmental
6,NOAEL,1280.0,mg/kg-day,developmental
7,NOEL,0.5,mg,reproduction developmental
8,NOEL,1.0,%,chronic
9,NOEL,1.0,%,chronic


## Example 2: Get Cancer Data

Retrieve cancer-specific toxicity data.

In [5]:
# Get cancer data
cancer_data = toxvaldb_cancer.get_data_by_dtxsid(dtxsid)

if cancer_data:
    print(f"Found {len(cancer_data)} cancer records\n")
    
    df_cancer = pd.DataFrame(cancer_data)
    
    # Show key information
    columns = ['cancerType', 'studyType', 'species', 'strain', 'effectLevel']
    available_cols = [col for col in columns if col in df_cancer.columns]
    if available_cols:
        display(df_cancer[available_cols].head(10))
    else:
        display(df_cancer.head(10))
else:
    print("No cancer data available")

No cancer data available


## Example 3: Get Genotoxicity Data

Retrieve genotoxicity test results.

In [6]:
# Get genotoxicity summary
genetox_summary = toxvaldb_genetox.get_summary_by_dtxsid(dtxsid)

if genetox_summary:
    print(f"Found {len(genetox_summary)} genotoxicity records\n")
    
    df_genetox = pd.DataFrame(genetox_summary)
    
    # Show assay types
    if 'assayType' in df_genetox.columns:
        print("Genotoxicity Assay Types:")
        assay_counts = df_genetox['assayType'].value_counts()
        for assay, count in assay_counts.items():
            print(f"  {assay}: {count} tests")
        print()
    
    # Show results
    if 'result' in df_genetox.columns:
        print("Genotoxicity Results:")
        result_counts = df_genetox['result'].value_counts()
        for result, count in result_counts.items():
            print(f"  {result}: {count} tests")
    
    # Display sample data
    columns = ['assayType', 'result', 'species', 'strain']
    available_cols = [col for col in columns if col in df_genetox.columns]
    if available_cols:
        display(df_genetox[available_cols].head(10))
else:
    print("No genotoxicity data available")

Found 1 genotoxicity records



## Example 4: Get ToxRefDB Effects Data

Retrieve effects data from ToxRefDB studies.

In [8]:
# Get ToxRefDB effects by study type
effects_data = toxrefdb_effects.get_data_by_dtxsid(dtxsid)

if effects_data:
    print(f"Found {len(effects_data)} ToxRefDB effects records\n")
    
    df_effects = pd.DataFrame(effects_data)
    
    # Show study types
    if 'studyType' in df_effects.columns:
        print("Study Types:")
        study_counts = df_effects['studyType'].value_counts()
        for study, count in study_counts.items():
            print(f"  {study}: {count} records")
        print()
    
    # Show key columns
    columns = ['studyType', 'effectName', 'effectLevel', 'species', 'sex']
    available_cols = [col for col in columns if col in df_effects.columns]
    if available_cols:
        display(df_effects[available_cols].head(10))
else:
    print("No ToxRefDB effects data available")

Found 30 ToxRefDB effects records

Study Types:
  DEV: 30 records



Unnamed: 0,studyType,species,sex
0,DEV,rat,F
1,DEV,rat,F
2,DEV,rat,F
3,DEV,rat,F
4,DEV,rat,F
5,DEV,rat,F
6,DEV,rat,F
7,DEV,rat,F
8,DEV,rat,F
9,DEV,rat,F


## Example 5: Get Reference Values (PPRTV)

Retrieve Provisional Peer-Reviewed Toxicity Values.

In [10]:
# Get PPRTV data
pprtv_data = pprtv.get_all_pprtv_chemical_by_dtxsid(dtxsid)

if pprtv_data:
    print(f"Found {len(pprtv_data)} PPRTV records\n")
    
    df_pprtv = pd.DataFrame(pprtv_data)
    
    # Show available data
    display(df_pprtv.head(10))
    
    # Show key toxicity values
    if 'toxicityValue' in df_pprtv.columns:
        print("\nToxicity Values:")
        for _, row in df_pprtv.head(5).iterrows():
            val_type = row.get('valueType', 'N/A')
            value = row.get('toxicityValue', 'N/A')
            unit = row.get('unit', 'N/A')
            print(f"  {val_type}: {value} {unit}")
else:
    print("No PPRTV data available")

No PPRTV data available


## Example 6: Get IRIS Data

Retrieve Integrated Risk Information System data.

In [12]:
# Get IRIS data
iris_data = iris.get_data_by_dtxsid(dtxsid)

if iris_data:
    print(f"Found {len(iris_data)} IRIS records\n")
    
    df_iris = pd.DataFrame(iris_data)
    display(df_iris.head(10))
    
    # Show reference doses/concentrations
    if 'referenceValue' in df_iris.columns:
        print("\nReference Values:")
        for _, row in df_iris.head(5).iterrows():
            ref_type = row.get('referenceType', 'N/A')
            value = row.get('referenceValue', 'N/A')
            print(f"  {ref_type}: {value}")
else:
    print("No IRIS data available")

Found 1 IRIS records



Unnamed: 0,dtxsid,chemicalName,casrn,lastSignificantRevision,literatureScreeningReview,criticalEffectsSystems,rfdChronic,rfdSubchronic,rfcChronic,rfcSubchronic,tumorSite,irisUrl
0,DTXSID7020182,Bisphenol A,80-05-7,1988-09-26,Yes,,5 x 10 -2 mg/kg-day,,,,,https://iris.epa.gov/ChemicalLanding/&substanc...


## Example 7: Batch Hazard Analysis

Compare hazard data across multiple chemicals.

In [16]:
# Search for multiple chemicals
chemical_names = ["Bisphenol A", "Bisphenol S", "Bisphenol F"]
chemicals = []

for name in chemical_names:
    results = searcher.search_by_exact_value(name)
    if results:
        dtxsid = results[0]['dtxsid']
        chemicals.append({'name': name, 'dtxsid': dtxsid})
        print(f"✓ {name}: {dtxsid}")

print(f"\nAnalyzing hazard data for {len(chemicals)} chemicals...\n")

# Get hazard data for each
hazard_summary = []

for chem in chemicals:
    toxval = toxvaldb.get_data_by_dtxsid(chem['dtxsid'])
    cancer = toxvaldb_cancer.get_data_by_dtxsid(chem['dtxsid'])
    genetox = toxvaldb_genetox.get_summary_by_dtxsid(chem['dtxsid'])
    effects = toxrefdb_effects.get_data_by_dtxsid(chem['dtxsid'])
    
    hazard_summary.append({
        'Chemical': chem['name'],
        'DTXSID': chem['dtxsid'],
        'ToxVal Records': len(toxval) if toxval else 0,
        'Cancer Records': len(cancer) if cancer else 0,
        'Genetox Records': len(genetox) if genetox else 0,
        'Effects Records': len(effects) if effects else 0
    })

summary_df = pd.DataFrame(hazard_summary)
display(summary_df)

✓ Bisphenol A: DTXSID7020182
✓ Bisphenol S: DTXSID3022409
✓ Bisphenol F: DTXSID9022445

Analyzing hazard data for 3 chemicals...



Unnamed: 0,Chemical,DTXSID,ToxVal Records,Cancer Records,Genetox Records,Effects Records
0,Bisphenol A,DTXSID7020182,132,0,1,30
1,Bisphenol S,DTXSID3022409,95,0,1,0
2,Bisphenol F,DTXSID9022445,56,0,0,0


## Example 8: Toxicity Value Distribution

Analyze the distribution of toxicity values.

In [17]:
if toxval_data:
    df = pd.DataFrame(toxval_data)
    
    # Analyze by toxicity type
    if 'toxvalType' in df.columns:
        print("Toxicity Value Types:")
        type_counts = df['toxvalType'].value_counts()
        for tox_type, count in type_counts.head(10).items():
            print(f"  {tox_type}: {count} values")
    
    # Analyze by study type
    if 'studyType' in df.columns:
        print("\nStudy Types:")
        study_counts = df['studyType'].value_counts()
        for study, count in study_counts.head(10).items():
            print(f"  {study}: {count} studies")
    
    # Analyze by species
    if 'species' in df.columns:
        print("\nTest Species:")
        species_counts = df['species'].value_counts()
        for species, count in species_counts.head(5).items():
            print(f"  {species}: {count} studies")

Toxicity Value Types:
  LEL: 38 values
  NOEL: 21 values
  NOAEL: 14 values
  LOEL: 12 values
  LD50: 11 values
  LOAEL: 6 values
  Medium-Specific Concentration: 6 values
  MEG: 5 values
  BMDL (10): 4 values
  RfD: 3 values

Study Types:
  uterotrophic: 36 studies
  reproduction developmental: 28 studies
  developmental: 15 studies
  acute: 13 studies
  Media Exposure Guidelines: 13 studies
  short-term: 8 studies
  Toxicity Value: 7 studies
  chronic: 6 studies
  Acute Exposure Guidelines: 3 studies
  repeat dose other: 2 studies


## Example 9: Complete Hazard Profile

Generate a comprehensive hazard profile for a chemical.

In [24]:
def analyze_hazard(chemical_name):
    """Complete hazard analysis for a chemical."""
    print(f"Hazard Profile: {chemical_name}")
    print("="*70)
    
    # 1. Search
    results = searcher.search_by_exact_value(chemical_name)
    if not results:
        print(f"Chemical not found: {chemical_name}")
        return
    
    dtxsid = results[0]['dtxsid']
    print(f"✓ Found: {results[0]['preferredName']} ({dtxsid})")
    print()
    
    # 2. ToxValDB
    toxval = toxvaldb.get_data_by_dtxsid(dtxsid)
    print(f"ToxValDB Records: {len(toxval) if toxval else 0}")
    
    if toxval:
        df = pd.DataFrame(toxval)
        if 'toxvalType' in df.columns:
            print("  Top toxicity types:")
            for tox_type, count in df['toxvalType'].value_counts().head(3).items():
                print(f"    - {tox_type}: {count}")
    
    # 3. Cancer Data
    cancer = toxvaldb_cancer.get_data_by_dtxsid(dtxsid)
    print(f"\nCancer Records: {len(cancer) if cancer else 0}")
    
    # 4. Genotoxicity
    genetox = toxvaldb_genetox.get_summary_by_dtxsid(dtxsid)
    print(f"Genotoxicity Records: {len(genetox) if genetox else 0}")
    
    if genetox:
        df_genetox = pd.DataFrame(genetox)
        if 'result' in df_genetox.columns:
            print("  Results:")
            for result, count in df_genetox['result'].value_counts().items():
                print(f"    - {result}: {count}")
    
    # 5. ToxRefDB Effects
    effects = toxrefdb_effects.get_data_by_dtxsid(dtxsid)
    print(f"\nToxRefDB Effects: {len(effects) if effects else 0}")
    
    # 6. Reference Values
    pprtv_vals = pprtv.get_all_pprtv_chemical_by_dtxsid(dtxsid)
    iris_vals = iris.get_data_by_dtxsid(dtxsid)
    print(f"\nPPRTV Records: {len(pprtv_vals) if pprtv_vals else 0}")
    print(f"IRIS Records: {len(iris_vals) if iris_vals else 0}")
    
    print("\n" + "="*70 + "\n")

# Analyze a chemical
analyze_hazard("Caffeine")

Hazard Profile: Caffeine
✓ Found: Caffeine (DTXSID0020232)

ToxValDB Records: 35
  Top toxicity types:
    - NOAEL: 15
    - NOEL: 8
    - LOEL: 6

Cancer Records: 1
Genotoxicity Records: 1

ToxRefDB Effects: 0

PPRTV Records: 0
IRIS Records: 0



PPRTV Records: 0
IRIS Records: 0




## Example 10: Compare Genotoxicity Profiles

Compare genotoxicity results across related chemicals.

In [26]:
# Get genotoxicity data for each chemical
genetox_comparison = []

for chem in chemicals:
    genetox = toxvaldb_genetox.get_summary_by_dtxsid(chem['dtxsid'])
    
    if genetox:
        df = pd.DataFrame(genetox)
        
        # Count positive vs negative results
        if 'result' in df.columns:
            result_counts = df['result'].value_counts().to_dict()
            genetox_comparison.append({
                'Chemical': chem['name'],
                'Total Tests': len(df),
                'Positive': result_counts.get('Positive', 0),
                'Negative': result_counts.get('Negative', 0),
                'Equivocal': result_counts.get('Equivocal', 0)
            })

if genetox_comparison:
    comparison_df = pd.DataFrame(genetox_comparison)
    print("Genotoxicity Comparison:")
    display(comparison_df)
else:
    print("No genotoxicity data available for comparison")

No genotoxicity data available for comparison


## Summary

This notebook demonstrated:
- Retrieving toxicity data from ToxValDB
- Getting cancer and genotoxicity information
- Accessing ToxRefDB effects data
- Finding reference values from PPRTV and IRIS
- Comparing hazard profiles across chemicals
- Analyzing toxicity value distributions

The hazard module provides comprehensive toxicological data for chemical risk assessment.