In [13]:
from pycomptox.chemical import Chemical
from pycomptox.bioactivity import BioactivityData, BioactivityAOP, AssayBioactivity
import pandas as pd

# Initialize clients
searcher = Chemical()
bioactivity = BioactivityData()
aop = BioactivityAOP()
assay = AssayBioactivity()

print("✓ Clients initialized successfully")

✓ Clients initialized successfully


## Example 1: Get Bioactivity Data for a Chemical

Retrieve all bioactivity assay data for a specific chemical.

In [5]:
# Search for Bisphenol A
results = searcher.search_by_exact_value("Bisphenol A")
dtxsid = results[0]['dtxsid']
print(f"Chemical: {results[0]['preferredName']}")
print(f"DTXSID: {dtxsid}")
print()

# Get bioactivity data
data = bioactivity.get_data_by_dtxsid_and_projection(dtxsid)

print(f"Found {len(data)} bioactivity records\n")

# Convert to DataFrame for easier viewing
df = pd.DataFrame(data)
print(f"Available columns: {', '.join(df.columns.tolist())}\n")

# Show sample data
columns = ['aeid', 'assayComponentEndpointName', 'activityClassification', 'ac50', 'ac50Units']
display(df.head(10))

Chemical: Bisphenol A
DTXSID: DTXSID7020182

Found 2798 bioactivity records

Available columns: concMin, aeid, dtxsid, chnm, spid, m4id, chid, casn, bmad, respMax, respMin, maxMean, maxMeanConc, maxMed, maxMedConc, concMax, nconc, npts, nrep, nmedGtblPos, nmedGtblNeg, m5id, modl, hitc, fitc, coff, actp, modelType, chidRep, stkc, stkcUnit, testedConcUnit, mc3Param, mc4Param, mc5Param, mc6Param

Found 2798 bioactivity records

Available columns: concMin, aeid, dtxsid, chnm, spid, m4id, chid, casn, bmad, respMax, respMin, maxMean, maxMeanConc, maxMed, maxMedConc, concMax, nconc, npts, nrep, nmedGtblPos, nmedGtblNeg, m5id, modl, hitc, fitc, coff, actp, modelType, chidRep, stkc, stkcUnit, testedConcUnit, mc3Param, mc4Param, mc5Param, mc6Param



Unnamed: 0,concMin,aeid,dtxsid,chnm,spid,m4id,chid,casn,bmad,respMax,...,actp,modelType,chidRep,stkc,stkcUnit,testedConcUnit,mc3Param,mc4Param,mc5Param,mc6Param
0,0.032,3032,DTXSID7020182,Bisphenol A,EPAPLT0125C03,7827405,20182,80-05-7,6.805902,17.663843,...,,3,1,20.0,mM,uM,"{'conc': [0.2, 200.0, 20.0, 0.8, 100.0, 100.0,...","{'pow_a': 0.5101984977599341, 'pow_p': 0.61816...","{'a': 0.07151845889523176, 'er': 1.91088397495...","{'flag': ['Bmd > ac50, indication of high base..."
1,0.00516,2386,DTXSID7020182,Bisphenol A,EPAPLT0023I15,7834467,20182,80-05-7,13.639922,-4.701041,...,,4,1,20.259199,mM,uM,"{'conc': [0.1, 4.0, 0.005, 1.0, 10.0, 0.4, 4.0...","{'pow_a': -17.209965937342183, 'pow_p': 0.3352...","{'a': -17.209965937342183, 'p': 0.335214455348...","{'flag': [], 'mc6MthdId': []}"
2,0.00516,2387,DTXSID7020182,Bisphenol A,EPAPLT0023I15,7836470,20182,80-05-7,4.387449,102.938136,...,,3,1,20.259199,mM,uM,"{'conc': [30.0, 0.05, 30.0, 4.0, 0.02, 100.0, ...","{'pow_a': 0.004177427805106235, 'pow_p': 2.152...","{'a': 0.5285747716449192, 'b': -7.599838254331...",{'flag': ['Active with only highest conc above...
3,0.00516,2390,DTXSID7020182,Bisphenol A,EPAPLT0023I15,7838473,20182,80-05-7,22.470985,33.330875,...,,4,1,20.259199,mM,uM,"{'conc': [0.1, 0.05, 1.0, 100.0, 100.0, 10.0, ...","{'pow_a': -13.366232331200406, 'pow_p': 0.3952...","{'a': -13.366232331200406, 'p': 0.395229650143...","{'flag': [], 'mc6MthdId': []}"
4,0.0129,3021,DTXSID7020182,Bisphenol A,TP0001299H02,7841330,20182,80-05-7,0.04774,0.092131,...,,4,1,100.0,mM,uM,"{'conc': [0.08, 0.01, 3.0, 3.0, 20.0, 100.0, 1...","{'pow_a': 0.02391757704817587, 'pow_p': 0.3000...","{'er': -3.5995529454590565, 'ga': 0.0087882202...",{'flag': ['No median responses above baseline ...
5,0.00516,2391,DTXSID7020182,Bisphenol A,EPAPLT0023I15,7840476,20182,80-05-7,2.143691,412.992959,...,,3,1,20.259199,mM,uM,"{'conc': [0.005, 10.0, 0.05, 100.0, 1.0, 1.0, ...","{'pow_a': 0.0030927523010532477, 'pow_p': 2.34...","{'a': 1.7060760525157652, 'b': 22.399672434507...","{'flag': ['Noisy data'], 'mc6MthdId': [10]}"
6,0.0129,3019,DTXSID7020182,Bisphenol A,TP0001299H02,7841229,20182,80-05-7,0.070361,0.142321,...,,4,1,100.0,mM,uM,"{'conc': [0.5, 0.5, 20.0, 20.0, 3.0, 100.0, 0....","{'pow_a': 0.016465337566990583, 'pow_p': 0.300...","{'er': -3.3158937129978634, 'ga': 0.1037301928...",{'flag': ['No median responses above baseline ...
7,0.0129,3020,DTXSID7020182,Bisphenol A,TP0001299H02,7841279,20182,80-05-7,5.369804,9.419667,...,,3,1,100.0,mM,uM,"{'conc': [0.08, 100.0, 20.0, 0.01, 100.0, 0.01...","{'pow_a': -1.4044791789916602, 'pow_p': 0.5779...","{'p': 7.974683673059571, 'er': 1.3400492599801...","{'flag': ['Bmd > ac50, indication of high base..."
8,0.0129,3022,DTXSID7020182,Bisphenol A,TP0001299H02,7841380,20182,80-05-7,7.197189,19.625031,...,,4,1,100.0,mM,uM,"{'conc': [20.0, 0.01, 0.5, 0.5, 0.01, 20.0, 0....","{'pow_a': -0.0006559877988666447, 'pow_p': 2.4...","{'a': -0.0006559877988666447, 'p': 2.425495588...",{'flag': ['Active with only highest conc above...
9,0.0129,3023,DTXSID7020182,Bisphenol A,TP0001299H02,7841429,20182,80-05-7,0.120938,0.133176,...,,4,1,100.0,mM,uM,"{'conc': [0.08, 0.01, 3.0, 0.5, 100.0, 20.0, 0...","{'pow_a': -0.02584123190894807, 'pow_p': 0.424...","{'a': -0.0018911327993540597, 'er': -2.3459978...","{'flag': ['Bmd > ac50, indication of high base..."


## Example 2: Filter Active Assays

Find assays where the chemical shows activity.

In [14]:
# TBD

## Example 3: Analyze Assay Targets

Look at what biological targets are being tested.

In [15]:
# Group by biological process
if 'intendedTargetFamily' in df.columns:
    target_counts = df['intendedTargetFamily'].value_counts()
    print("Top 10 Target Families:")
    for target, count in target_counts.head(10).items():
        print(f"  {target}: {count} assays")

# Count by assay source
if 'assaySource' in df.columns:
    print("\nAssays by Source:")
    print(df['assaySource'].value_counts())

## Example 4: Get AOP Information

Retrieve Adverse Outcome Pathway data to understand potential biological effects.

In [17]:
# TBD

## Example 5: Get Bioactivity Summary Statistics

Calculate summary statistics for bioactivity data.

In [19]:
# TBD

## Example 6: Batch Analysis

Compare bioactivity profiles across multiple chemicals.

In [22]:
# Search for multiple chemicals
chemical_names = ["Bisphenol A", "Bisphenol S", "Bisphenol F"]
chemicals = []

for name in chemical_names:
    results = searcher.search_by_exact_value(name)
    if results:
        dtxsid = results[0]['dtxsid']
        chemicals.append({'name': name, 'dtxsid': dtxsid})
        print(f"✓ {name}: {dtxsid}")

print(f"\nAnalyzing {len(chemicals)} chemicals...\n")

# Get bioactivity data for each
summary_data = []

for chem in chemicals:
    data = bioactivity.get_data_by_dtxsid_and_projection(chem['dtxsid'])
    df_chem = pd.DataFrame(data)
    
    # active_count = len(df_chem[df_chem['activityClassification'] == 'Active'])
    
    summary_data.append({
        'Chemical': chem['name'],
        'DTXSID': chem['dtxsid'],
        'Total Assays': len(df_chem),
        # 'Active Assays': active_count,
        # 'Active %': f"{active_count/len(df_chem)*100:.1f}%" if len(df_chem) > 0 else "N/A"
    })

summary_df = pd.DataFrame(summary_data)
display(summary_df)

✓ Bisphenol A: DTXSID7020182
✓ Bisphenol S: DTXSID3022409
✓ Bisphenol F: DTXSID9022445

Analyzing 3 chemicals...



Unnamed: 0,Chemical,DTXSID,Total Assays
0,Bisphenol A,DTXSID7020182,2798
1,Bisphenol S,DTXSID3022409,703
2,Bisphenol F,DTXSID9022445,465


## Example 7: Find Common Active Assays

Identify assays that are active across multiple related chemicals.

In [26]:
# Get active assays for each chemical
active_assays = {}

for chem in chemicals:
    data = bioactivity.get_data_by_dtxsid_and_projection(chem['dtxsid'])
    df_chem = pd.DataFrame(data)
    # active = df_chem[df_chem['activityClassification'] == 'Active']
    # active_assays[chem['name']] = set(active['assayComponentEndpointName'].tolist())

# Find common assays
if len(active_assays) >= 2:
    common_assays = set.intersection(*active_assays.values())
    print(f"Assays active in all {len(chemicals)} chemicals: {len(common_assays)}\n")
    
    if common_assays:
        print("Common active assays:")
        for assay in sorted(list(common_assays))[:20]:
            print(f"  - {assay}")

## Example 8: Complete Bioactivity Analysis

Comprehensive analysis combining bioactivity data and AOP information.

In [30]:
def analyze_bioactivity(chemical_name):
    """Complete bioactivity analysis for a chemical."""
    print(f"Analyzing: {chemical_name}")
    print("="*70)
    
    # 1. Search
    results = searcher.search_by_exact_value(chemical_name)
    if not results:
        print(f"Chemical not found: {chemical_name}")
        return
    
    dtxsid = results[0]['dtxsid']
    print(f"✓ Found: {results[0]['preferredName']} ({dtxsid})")
    print()
    
    # 2. Bioactivity Data
    data = bioactivity.get_data_by_dtxsid_and_projection(dtxsid)
    df = pd.DataFrame(data)
    print(f"Total Bioactivity Records: {len(df)}")
    
    # 3. Activity Classification
    # activity_counts = df['activityClassification'].value_counts()
    # TBD
# Analyze a chemical
analyze_bioactivity("Caffeine")

Analyzing: Caffeine
✓ Found: Caffeine (DTXSID0020232)

Total Bioactivity Records: 918
