# PubChem GHS Safety Data Links Examples

This notebook demonstrates how to use the `PubChemLink` class to check PubChem GHS Safety data availability from the CompTox Dashboard.

In [None]:
from pycomptox import PubChemLink
import pandas as pd

# Initialize client
client = PubChemLink()

## Example 1: Check PubChem Data for a Single Chemical

In [None]:
# Check for Bisphenol A
dtxsid = "DTXSID7020182"
result = client.check_existence_by_dtxsid(dtxsid)

print(f"Chemical: {dtxsid}")
if result['isSafetyData']:
    print(f"✓ PubChem GHS data available")
    print(f"  URL: {result['safetyUrl']}")
else:
    print(f"✗ No PubChem GHS data")

## Example 2: Batch PubChem Check

In [None]:
# Check multiple chemicals
chemicals = [
    "DTXSID7020182",  # Bisphenol A
    "DTXSID2021315",  # Caffeine
    "DTXSID5020001",  # 1,2,3-Trichloropropane
    "DTXSID3020637",  # Formaldehyde
    "DTXSID6020139"   # Benzene
]

results = client.check_existence_by_dtxsid_batch(chemicals)

# Convert to DataFrame
df = pd.DataFrame(results)
df['has_pubchem'] = df['isSafetyData']

print(f"PubChem GHS data available: {df['has_pubchem'].sum()}/{len(df)}")
print("\nResults:")
print(df)

## Example 3: Integration with Chemical Search

In [None]:
from pycomptox import Chemical

# Search for phthalates
chem = Chemical()
search_results = chem.search_by_starting_value("phthalate")

# Get PubChem data for first 5 results
dtxsids = [r['dtxsid'] for r in search_results[:5]]
pubchem_data = client.check_existence_by_dtxsid_batch(dtxsids)

# Combine results
for search_result in search_results[:5]:
    dtxsid = search_result['dtxsid']
    pubchem_result = next((p for p in pubchem_data if p['dtxsid'] == dtxsid), None)
    
    print(f"\n{search_result['preferredName']} ({dtxsid})")
    if pubchem_result and pubchem_result['isSafetyData']:
        print(f"  PubChem: {pubchem_result['safetyUrl'][:80]}...")
    else:
        print(f"  PubChem: No GHS data")

## Example 4: Compare Wikipedia vs PubChem Data

In [None]:
from pycomptox import WikiLink, PubChemLink

wiki = WikiLink()
pubchem = PubChemLink()

# Test chemicals
test_dtxsids = ["DTXSID7020182", "DTXSID2021315", "DTXSID5020001"]

# Get data from both sources
wiki_results = wiki.check_existence_by_dtxsid_batch(test_dtxsids)
pubchem_results = pubchem.check_existence_by_dtxsid_batch(test_dtxsids)

# Create comparison DataFrame
comparison = []
for dtxsid in test_dtxsids:
    wiki_data = next((w for w in wiki_results if w['dtxsid'] == dtxsid), None)
    pubchem_data = next((p for p in pubchem_results if p['dtxsid'] == dtxsid), None)
    
    comparison.append({
        'DTXSID': dtxsid,
        'Wikipedia': bool(wiki_data and wiki_data.get('safetyUrl')),
        'PubChem': pubchem_data and pubchem_data['isSafetyData']
    })

df_comparison = pd.DataFrame(comparison)
print("GHS Data Availability Comparison:")
print(df_comparison)

## Example 5: Visualize Data Coverage

In [None]:
import matplotlib.pyplot as plt

# Create pie chart
coverage = df['has_pubchem'].value_counts()

fig, ax = plt.subplots(figsize=(8, 6))
ax.pie(coverage.values, labels=['No PubChem Data', 'Has PubChem Data'], 
       autopct='%1.1f%%', startangle=90, colors=['lightcoral', 'lightblue'])
ax.set_title('PubChem GHS Data Coverage')
plt.show()

## Example 6: Filter by Data Availability

In [None]:
# Get chemicals with PubChem data
with_pubchem = df[df['has_pubchem'] == True]

print(f"Chemicals with PubChem GHS data: {len(with_pubchem)}")
for _, row in with_pubchem.iterrows():
    print(f"  {row['dtxsid']}")
    print(f"    {row['safetyUrl'][:80]}...")

## Example 7: Complete Safety Profile with Multiple Sources

In [None]:
from pycomptox import ChemicalDetails, WikiLink, PubChemLink

dtxsid = "DTXSID7020182"

# Get detailed info
details = ChemicalDetails()
info = details.data_by_dtxsid(dtxsid)

# Get safety data links
wiki = WikiLink()
pubchem = PubChemLink()
wiki_data = wiki.check_existence_by_dtxsid(dtxsid)
pubchem_data = pubchem.check_existence_by_dtxsid(dtxsid)

# Display complete profile
print(f"=" * 60)
print(f"Chemical Safety Profile: {info['preferredName']}")
print(f"=" * 60)
print(f"DTXSID: {dtxsid}")
print(f"CASRN: {info.get('casrn', 'N/A')}")
print(f"Formula: {info.get('molFormula', 'N/A')}")

print(f"\nGHS Safety Data Sources:")
if wiki_data.get('safetyUrl'):
    print(f"  ✓ Wikipedia: {wiki_data['safetyUrl']}")
else:
    print(f"  ✗ Wikipedia: No data")

if pubchem_data['isSafetyData']:
    print(f"  ✓ PubChem: {pubchem_data['safetyUrl']}")
else:
    print(f"  ✗ PubChem: No data")

## Example 8: Export Data

In [None]:
# Export to CSV
df.to_csv('pubchem_links.csv', index=False)
print("✓ Exported to pubchem_links.csv")

# Also export only chemicals with PubChem data
with_pubchem.to_csv('pubchem_links_available.csv', index=False)
print("✓ Exported chemicals with PubChem data to pubchem_links_available.csv")