# Wikipedia Links Examples

This notebook demonstrates how to use the `WikiLink` class to check Wikipedia GHS Safety data availability from the CompTox Dashboard.

In [None]:
from pycomptox import WikiLink
import pandas as pd

# Initialize client
wiki = WikiLink()

## Example 1: Check Wikipedia Data for a Single Chemical

In [None]:
# Check for Bisphenol A
dtxsid = "DTXSID7020182"
result = wiki.check_existence_by_dtxsid(dtxsid)

print(f"Chemical: {dtxsid}")
if result['safetyUrl']:
    print(f"✓ Wikipedia GHS data available")
    print(f"  URL: {result['safetyUrl']}")
else:
    print(f"✗ No Wikipedia GHS data")

## Example 2: Batch Wikipedia Check

In [None]:
# Check multiple chemicals
chemicals = [
    "DTXSID7020182",  # Bisphenol A
    "DTXSID2021315",  # Caffeine
    "DTXSID5020001",  # 1,2,3-Trichloropropane
    "DTXSID3020637",  # Formaldehyde
    "DTXSID6020139"   # Benzene
]

results = wiki.check_existence_by_dtxsid_batch(chemicals)

# Convert to DataFrame
df = pd.DataFrame(results)
df['has_wiki'] = df['safetyUrl'].apply(lambda x: bool(x))

print(f"Wikipedia GHS data available: {df['has_wiki'].sum()}/{len(df)}")
print("\nResults:")
print(df)

## Example 3: Integration with Chemical Search

In [None]:
from pycomptox import Chemical

# Search for phthalates
chem = Chemical()
search_results = chem.search_by_starting_value("phthalate")

# Get Wikipedia data for first 5 results
dtxsids = [r['dtxsid'] for r in search_results[:5]]
wiki_data = wiki.check_existence_by_dtxsid_batch(dtxsids)

# Combine results
for search_result in search_results[:5]:
    dtxsid = search_result['dtxsid']
    wiki_result = next((w for w in wiki_data if w['dtxsid'] == dtxsid), None)
    
    print(f"\n{search_result['preferredName']} ({dtxsid})")
    if wiki_result and wiki_result['safetyUrl']:
        print(f"  Wikipedia: {wiki_result['safetyUrl'][:80]}...")
    else:
        print(f"  Wikipedia: No GHS data")

## Example 4: Filter Chemicals with Wikipedia Data

In [None]:
# Get chemicals with Wikipedia data
with_wiki = df[df['has_wiki'] == True]

print(f"Chemicals with Wikipedia GHS data: {len(with_wiki)}")
for _, row in with_wiki.iterrows():
    print(f"  {row['dtxsid']}")
    print(f"    {row['safetyUrl'][:80]}...")

## Example 5: Visualize Wikipedia Coverage

In [None]:
import matplotlib.pyplot as plt

# Create pie chart
coverage = df['has_wiki'].value_counts()

fig, ax = plt.subplots(figsize=(8, 6))
ax.pie(coverage.values, labels=['No Wikipedia Data', 'Has Wikipedia Data'], 
       autopct='%1.1f%%', startangle=90, colors=['lightcoral', 'lightgreen'])
ax.set_title('Wikipedia GHS Data Coverage')
plt.show()

## Example 6: Complete Safety Profile

In [None]:
from pycomptox import ChemicalDetails

dtxsid = "DTXSID7020182"

# Get detailed info
details = ChemicalDetails()
info = details.data_by_dtxsid(dtxsid)

# Get Wikipedia link
wiki_data = wiki.check_existence_by_dtxsid(dtxsid)

# Display complete profile
print(f"=" * 60)
print(f"Chemical Profile: {info['preferredName']}")
print(f"=" * 60)
print(f"DTXSID: {dtxsid}")
print(f"CASRN: {info.get('casrn', 'N/A')}")
print(f"Formula: {info.get('molFormula', 'N/A')}")

if wiki_data['safetyUrl']:
    print(f"\nWikipedia GHS Safety Data:")
    print(f"  {wiki_data['safetyUrl']}")
else:
    print(f"\nNo Wikipedia GHS safety data available")

## Example 7: Compare Chemical Classes

In [None]:
# Define chemical classes
chemical_classes = {
    'Bisphenols': ['DTXSID7020182', 'DTXSID4020216', 'DTXSID1020265'],
    'Phthalates': ['DTXSID5020607', 'DTXSID6021232', 'DTXSID2021781'],
}

coverage_data = []

for class_name, dtxsids in chemical_classes.items():
    try:
        results = wiki.check_existence_by_dtxsid_batch(dtxsids)
        with_data = sum(1 for r in results if r['safetyUrl'])
        coverage = (with_data / len(results)) * 100
        
        coverage_data.append({
            'Class': class_name,
            'Total': len(results),
            'With Wikipedia': with_data,
            'Coverage %': coverage
        })
    except Exception as e:
        print(f"Error processing {class_name}: {e}")

# Display results
coverage_df = pd.DataFrame(coverage_data)
print("\nWikipedia GHS Data Coverage by Chemical Class:")
print(coverage_df)

## Example 8: Export Wikipedia Links

In [None]:
# Export to CSV
df.to_csv('wikipedia_links.csv', index=False)
print("✓ Exported to wikipedia_links.csv")

# Also export only chemicals with Wikipedia data
with_wiki.to_csv('wikipedia_links_available.csv', index=False)
print("✓ Exported chemicals with Wikipedia data to wikipedia_links_available.csv")