# Metals Ontology API Examples

This notebook demonstrates all API functions from the metals module, following section 5 of METALS_ONTOLOGY_PLAN.md.

In [None]:
# Import required modules
import sys
import os
sys.path.insert(0, os.path.abspath('..'))

from entityidentity import (
    metal_identifier,
    match_metal,
    list_metals,
    load_metals,
    extract_metals_from_text,
    extract_metal_pairs
)
import pandas as pd

## 1. Load Metals Database

The `load_metals()` function loads the compiled Parquet database and caches it.

In [None]:
# Load the metals database
metals_df = load_metals()
print(f"Loaded {len(metals_df)} metals")
print(f"\nColumns: {', '.join(metals_df.columns)}")
print(f"\nFirst 5 metals:")
metals_df[['name', 'symbol', 'category_bucket', 'cluster_id']].head()

## 2. Metal Identifier - Core Resolution Function

The `metal_identifier()` function resolves metal names to their canonical form.

In [None]:
# Example 1: Simple symbol resolution
result = metal_identifier("Pt")
if result:
    print("Resolved 'Pt':")
    print(f"  Name: {result['name']}")
    print(f"  Symbol: {result['symbol']}")
    print(f"  Category: {result['category_bucket']}")
    print(f"  Default unit: {result['default_unit']}")

In [None]:
# Example 2: Resolution with category hint
result = metal_identifier("chrome", category="ferroalloy")
if result:
    print("Resolved 'chrome' with ferroalloy hint:")
    print(f"  Name: {result['name']}")
    print(f"  Code: {result.get('code', 'N/A')}")
    print(f"  Default basis: {result.get('default_basis', 'N/A')}")

In [None]:
# Example 3: Trade specification resolution
apt = metal_identifier("APT 88.5%")
if apt:
    print("Resolved 'APT 88.5%':")
    print(f"  Name: {apt['name']}")
    print(f"  Formula: {apt.get('formula', 'N/A')}")
    print(f"  Code: {apt.get('code', 'N/A')}")
    print(f"  Default basis: {apt.get('default_basis', 'N/A')}")

In [None]:
# Example 4: Chemical form resolution
li_carb = metal_identifier("lithium carbonate")
if li_carb:
    print("Resolved 'lithium carbonate':")
    print(f"  Name: {li_carb['name']}")
    print(f"  Formula: {li_carb.get('formula', 'N/A')}")
    print(f"  Category: {li_carb['category_bucket']}")
    print(f"  Cluster: {li_carb.get('cluster_id', 'N/A')}")

In [None]:
# Example 5: Resolution with threshold
result = metal_identifier("unobtainium", threshold=95)
print(f"'unobtainium' with threshold 95: {result}")

# Lower threshold might find something
result = metal_identifier("unobtainium", threshold=50)
print(f"'unobtainium' with threshold 50: {result.get('name') if result else None}")

## 3. Match Metal - Top-K Candidates

The `match_metal()` function returns top-K candidates with scores, useful for review UIs.

In [None]:
# Find top matches for "tungsten"
candidates = match_metal("tungsten", k=5)

print("Top 5 matches for 'tungsten':")
for i, cand in enumerate(candidates, 1):
    print(f"  {i}. {cand['name']} (score: {cand.get('score', 'N/A')})")
    if cand.get('code'):
        print(f"     Code: {cand['code']}")

In [None]:
# Find matches for "lithium"
candidates = match_metal("lithium", k=5)

print("Top 5 matches for 'lithium':")
for i, cand in enumerate(candidates, 1):
    print(f"  {i}. {cand['name']}")
    if cand.get('formula'):
        print(f"     Formula: {cand['formula']}")

## 4. List Metals - Filter by Category or Cluster

The `list_metals()` function returns DataFrames filtered by category or supply chain cluster.

In [None]:
# List all PGM metals
pgm_metals = list_metals(category="pgm")
print("PGM Complex metals:")
print(pgm_metals[['name', 'symbol', 'default_unit']].to_string())

In [None]:
# List metals in porphyry copper chain
copper_chain = list_metals(cluster="porphyry_copper_chain")
print("Porphyry copper chain metals:")
for _, metal in copper_chain.iterrows():
    print(f"  - {metal['name']}: {metal.get('notes', '')}")

In [None]:
# List all battery metals
battery_metals = list_metals(category="battery")
print(f"Battery metals ({len(battery_metals)} total):")
print("\nLithium forms:")
li_metals = battery_metals[battery_metals['name'].str.contains('Lithium', case=False)]
for _, metal in li_metals.iterrows():
    print(f"  - {metal['name']}: {metal.get('formula', 'N/A')}")

print("\nOther battery metals:")
other = battery_metals[~battery_metals['name'].str.contains('Lithium', case=False)]
for _, metal in other.iterrows():
    print(f"  - {metal['name']}")

In [None]:
# Count metals by category
categories = ["precious", "pgm", "base", "battery", "ree", "ferroalloy", "specialty"]

print("Metal counts by category:")
for cat in categories:
    metals = list_metals(category=cat)
    print(f"  {cat:12} : {len(metals):3} metals")

## 5. Extract Metals from Text

The `extract_metals_from_text()` function identifies metal references in unstructured text.

In [None]:
# Example text with various metal references
text = """
The Pt/Pd ratio in automotive catalysts has shifted due to price changes.
APT 88.5% is trading at $320/mtu while lithium carbonate demand grows.
The mine produces copper concentrate with Au and Ag credits.
Battery-grade cobalt sulfate and nickel sulfate are critical for EVs.
"""

# Extract metal references
metals_found = extract_metals_from_text(text)

print("Metals found in text:")
for metal in metals_found:
    snippet = text[metal['span'][0]:metal['span'][1]]
    print(f"  '{snippet}' -> {metal['query']} (type: {metal['hint']})")
    if 'category' in metal:
        print(f"    Category: {metal['category']}")

In [None]:
# Extract and resolve metals
print("\nResolving extracted metals to canonical forms:")
print("-" * 50)

for metal_ref in metals_found:
    resolved = metal_identifier(
        metal_ref['query'],
        category=metal_ref.get('category')
    )
    if resolved:
        print(f"'{metal_ref['query']}' -> {resolved['name']} ({resolved.get('metal_key', 'N/A')})")
        if resolved.get('formula'):
            print(f"  Formula: {resolved['formula']}")
        if resolved.get('default_basis'):
            print(f"  Basis: {resolved['default_basis']}")

In [None]:
# Extract metal pairs
pair_text = "The Pt/Pd ratio, Ni-Co laterites, and Cu/Au/Ag deposits are important."
pairs = extract_metal_pairs(pair_text)

print("Metal pairs found:")
for pair in pairs:
    print(f"  {pair[0]}/{pair[1]}")

## 6. Supply Chain Analysis

Analyze metal relationships through supply chain clusters.

In [None]:
# Analyze by-products in different chains
clusters = [
    "pgm_complex",
    "porphyry_copper_chain",
    "lead_zinc_chain",
    "nickel_cobalt_chain"
]

print("Supply chain cluster analysis:")
print("=" * 50)

for cluster in clusters:
    metals = list_metals(cluster=cluster)
    if not metals.empty:
        print(f"\n{cluster}:")
        print(f"  Primary/major: {', '.join(metals.head(2)['name'].tolist())}")
        if len(metals) > 2:
            print(f"  By-products: {', '.join(metals.iloc[2:]['name'].tolist())}")

## 7. Commercial Specifications

Examine commercial trading specifications and units.

In [None]:
# Check pricing basis for different metal forms
metal_forms = [
    "APT",
    "ferrochrome",
    "lithium carbonate",
    "cobalt sulfate",
    "platinum"
]

print("Commercial pricing basis:")
print("-" * 50)

for form in metal_forms:
    metal = metal_identifier(form)
    if metal:
        print(f"{metal['name']}:")
        print(f"  Unit: {metal.get('default_unit', 'N/A')}")
        print(f"  Basis: {metal.get('default_basis', 'N/A')}")
        if metal.get('pra_hint'):
            print(f"  PRA: {metal['pra_hint']}")

## 8. Data Sources Documentation

Show the authoritative sources used for metal data (Section 10 of METALS_ONTOLOGY_PLAN.md).

In [None]:
# Document data sources
sources_info = """
DATA SOURCE PRIORITY (Section 10 of METALS_ONTOLOGY_PLAN.md):

1. IUPAC (International Union of Pure and Applied Chemistry)
   - Element names and symbols
   - Atomic properties
   - Source priority: 1

2. USGS (United States Geological Survey)
   - Supply chain relationships
   - By-product associations
   - Deposit models
   - Source priority: 2

3. WCO-HS (World Customs Organization - Harmonized System)
   - HS 2022 classification codes
   - Trade nomenclature
   - Source priority: 3

4. Fastmarkets (formerly Metal Bulletin)
   - PRA price specifications
   - Market standard units/basis
   - Commercial grades
   - Source priority: 4
"""

print(sources_info)

# Show source attributions in the data
sample_metals = ['platinum', 'APT', 'lithium carbonate']
print("\nSource attributions in data:")
for name in sample_metals:
    metal = metal_identifier(name)
    if metal and 'sources' in metal:
        print(f"  {metal['name']}: {metal.get('sources', 'N/A')}")