In [None]:
import pandas as pd 
import plotly.graph_objects as go

# With USGS data

They can be downloaded from [here](https://mrdata.usgs.gov/) and seen on this [map](https://mrdata.usgs.gov/general/map-global.html)

## Major mineral deposits

In [None]:
usgs_major = pd.read_csv(r'..\data\USGS\Major mineral deposits of the world\ofr20051294\deposit.csv')

In [None]:
usgs_major

In [None]:
# Grouping deposits by commodity and deposit type from 'deposit.csv'
usgs_major_summary = usgs_major.groupby(['commodity', 'dep_type']).agg(count=('country', 'size'), countries=('country', lambda x: ', '.join(sorted(x.unique())))).reset_index()
usgs_major_summary

## Global Critical Minerals dataset

In [None]:
usgs_cm = pd.read_excel(r'../data/USGS/Global-critical-minerals-usgs.xlsx', sheet_name='pp1802_critmin_pts')

In [None]:
usgs_cm

In [None]:
# Grouping critical minerals by CRITICAL_M and DEPOSIT_TY from 'Global-critical-minerals-usgs.xlsx'
usgs_cm_summary = usgs_cm.groupby(['CRITICAL_M', 'DEPOSIT_TY']).agg(count=('LOCATION', 'size'), countries=('LOCATION', lambda x: ', '.join(sorted(x.unique())))).reset_index()
usgs_cm_summary

In [None]:
# Combine the two datasets into a single clean dataframe with the requested structure
df_usgs = pd.concat([
    usgs_major_summary.rename(columns={
        'commodity': 'Commodity',
        'dep_type': 'Deposit Type',
        'count': 'Count',
        'countries': 'Countries'
    })[['Commodity', 'Deposit Type', 'Count', 'Countries']],
    usgs_cm_summary.rename(columns={
        'CRITICAL_M': 'Commodity',
        'DEPOSIT_TY': 'Deposit Type',
        'count': 'Count',
        'countries': 'Countries'
    })[['Commodity', 'Deposit Type', 'Count', 'Countries']]
], ignore_index=True)

In [None]:
df_usgs.to_csv(r'usgs_deposit_type_count_per_country.csv', index=False)

In [None]:
# Define pathway mapping specific to the USGS deposit types and commodities
usgs_pathway_mapping = {
    ('Aluminum', 'Hydrothermal'): 'Bauxite Pathway',
    ('Aluminum', 'Igneous'): 'High-Alumina Clays Pathway',
    ('Aluminum', 'Metamorphic'): 'High-Alumina Clays Pathway',
    ('Aluminum', 'Sedimentary'): 'High-Alumina Clays Pathway',
    ('Aluminum', 'Surficial'): 'Bauxite Pathway',
    ('Copper', 'Hydrothermal'): 'Sulfide Ores (Pyrometallurgy)',
    ('Copper', 'Igneous'): 'Sulfide Ores (Pyrometallurgy)',
    ('Copper', 'Sedimentary'): 'Oxide Ores (Hydrometallurgy - SX-EW)',
    ('Gold', 'Hydrothermal'): 'Free-Milling Ores',
    ('Gold', 'Igneous'): 'Refractory Ores',
    ('Gold', 'Metamorphic'): 'Refractory Ores',
    ('Gold', 'Sedimentary'): 'Refractory Ores',
    ('Nickel', 'Igneous'): 'Sulfide Ores (Pyrometallurgy)',
    ('Nickel', 'Surficial'): 'Laterite Ores (Hydrometallurgy - HPAL)',
    ('Titanium', 'Surficial'): 'Rutile Pathway (Chloride Process)',
    ('Titanium', 'Igneous'): 'Ilmenite Pathway',
    ('Uranium', 'Sedimentary'): 'Sandstone Deposits (ISR)',
    ('Uranium', 'Igneous'): 'Hard Rock Deposits',
}

# Function to assign pathways for USGS data
def assign_usgs_pathway(row):
    key = (row['Commodity'], row['Deposit Type'])
    return usgs_pathway_mapping.get(key, 'Unknown Pathway')

# Apply the function to the dataset
usgs_data['Mineral Pathway'] = usgs_data.apply(assign_usgs_pathway, axis=1)

# Display the updated dataset for review
tools.display_dataframe_to_user(name="USGS Data with Mineral Pathways", dataframe=usgs_data)


# With CMMI data

They can be seen and downloaded [here](https://portal.ga.gov.au/persona/cmmi)

In [None]:
cmmi = pd.read_csv(r'../data/CMMI/CriticalMineralDepositsGeochemistry.csv')

In [None]:
cmmi

In [None]:
# Filter relevant columns
cmmi_relevant_columns = cmmi[['DEPOSIT_GROUP', 'DEPOSIT_TYPE', 'PRIMARY_COMMODITIES', 'SECONDARY_COMMODITIES', 'DEPOSIT_UID']]

# Extract country codes from DEPOSIT_UID (first three letters, e.g., AUS)
cmmi_relevant_columns['Country'] = cmmi_relevant_columns['DEPOSIT_UID'].str[:3]

# Group by DEPOSIT_TYPE and PRIMARY_COMMODITIES
cmmi_summary = (cmmi_relevant_columns.groupby(['DEPOSIT_GROUP', 'DEPOSIT_TYPE', 'PRIMARY_COMMODITIES']).agg(count=('Country', 'size'),  countries=('Country', lambda x: ', '.join(sorted(x.unique())))).reset_index())
cmmi_summary

In [None]:
cmmi_summary.to_csv(r'cmms_deposit_type_count_per_country.csv', index=False)

In [None]:
# Define pathway mapping based on the provided template
# This dictionary maps DEPOSIT_GROUP and PRIMARY_COMMODITIES to pathways
pathway_mapping = {
    ('Epithermal', 'Ag'): 'Free-Milling Ores (Gravity Separation, Cyanidation)',
    ('Porphyry', 'Cu'): 'Sulfide Ores (Pyrometallurgy)',
    ('Porphyry', 'Au'): 'Sulfide Ores (Pyrometallurgy)',
    ('Orogenic', 'Ag'): 'Free-Milling Ores (Gravity Separation, Cyanidation)',
    ('Laterite', 'Ni'): 'Laterite Ores (Hydrometallurgy - HPAL)',
    ('Laterite', 'Co'): 'Laterite Ores (Hydrometallurgy - HPAL)',
    ('Magmatic', 'Ni'): 'Sulfide Ores (Pyrometallurgy)',
    ('Magmatic', 'Cu'): 'Sulfide Ores (Pyrometallurgy)',
    ('Sedimentary', 'U'): 'Sandstone Deposits (ISR)',
    ('Sedimentary', 'Cu'): 'Oxide Ores (Hydrometallurgy - SX-EW)',
    ('Sedimentary', 'Co'): 'Oxide Ores (Hydrometallurgy - SX-EW)',
    ('IOCG', 'Cu'): 'Sulfide Ores (Pyrometallurgy)',
    ('IOCG', 'Au'): 'Sulfide Ores (Pyrometallurgy)',
    ('Placer', 'Ti'): 'Rutile Pathway (Chloride Process)',
    ('Placer', 'Zr'): 'Rutile Pathway (Chloride Process)',
}

# Function to assign pathways based on DEPOSIT_GROUP and PRIMARY_COMMODITIES
def assign_mineral_pathway(row):
    key = (row['DEPOSIT_GROUP'], row['PRIMARY_COMMODITIES'])
    return pathway_mapping.get(key, 'Unknown Pathway')

# Apply the function to the dataset
cmmi_summary['Mineral Pathway'] = cmmi_summary.apply(assign_mineral_pathway, axis=1)


# Visualisation

In [None]:
# Truncate deposit type names to the first 10 characters for readability
data['DEPOSIT_TY'] = data['DEPOSIT_TY'].str[:10]  # Limit deposit type to 10 characters

# Recreate nodes and links after truncation
countries = data['LOCATION'].unique()
minerals = data['CRITICAL_M'].unique()
deposit_types = data['DEPOSIT_TY'].unique()

nodes = list(countries) + list(minerals) + list(deposit_types)
node_indices = {name: i for i, name in enumerate(nodes)}

# Define the links with truncated deposit type labels
links = {
    'source': [],
    'target': [],
    'value': []
}

# Populate the links with truncated deposit types
for _, row in data.iterrows():
    country_idx = node_indices[row['LOCATION']]
    mineral_idx = node_indices[row['CRITICAL_M']]
    deposit_type_idx = node_indices[row['DEPOSIT_TY']]
    
    # Country to Mineral
    links['source'].append(country_idx)
    links['target'].append(mineral_idx)
    links['value'].append(1)

    # Mineral to Deposit Type
    links['source'].append(mineral_idx)
    links['target'].append(deposit_type_idx)
    links['value'].append(1)

In [None]:
# Adjusting the color of the flows to light grey and text to a darker color for better readability
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=20,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=nodes,
        #color="darkblue"  # Set nodes color to dark blue for contrast
    ),
    link=dict(
        source=links['source'],
        target=links['target'],
        value=links['value'],
        color="lightgrey"  # Set flow color to light grey for readability
    )
)])

# Update layout with larger dimensions and save as HTML
fig.update_layout(
    title_text="Country, Critical Minerals, and Deposit Types from USGS",
    font_size=14,
    width=1600,
    height=1000
)

# Save the updated figure as an HTML file for better visualization
html_path_updated = 'results/sankey_deposits_world_usgs.html'
fig.write_html(html_path_updated)

html_path_updated

## Canada

In [None]:
# Filter the data specifically for Canada
canada_data = data[data['LOCATION'] == "Canada"]

# Extract nodes for just minerals and deposit types for Canada
minerals_canada = canada_data['CRITICAL_M'].unique()
deposit_types_canada = canada_data['DEPOSIT_TY'].unique()

# Create nodes for the Sankey diagram (minerals and deposit types)
nodes_canada = list(minerals_canada) + list(deposit_types_canada)
node_indices_canada = {name: i for i, name in enumerate(nodes_canada)}

# Define the links for the Canada-specific Sankey diagram
links_canada = {
    'source': [],
    'target': [],
    'value': []
}

# Populate links between minerals and deposit types for Canada
for _, row in canada_data.iterrows():
    mineral_idx = node_indices_canada[row['CRITICAL_M']]
    deposit_type_idx = node_indices_canada[row['DEPOSIT_TY']]

    # Mineral to Deposit Type link
    links_canada['source'].append(mineral_idx)
    links_canada['target'].append(deposit_type_idx)
    links_canada['value'].append(1)

# Create the Canada-specific Sankey figure
fig_canada = go.Figure(data=[go.Sankey(
    node=dict(
        pad=20,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=nodes_canada,
        #color="darkblue"
    ),
    link=dict(
        source=links_canada['source'],
        target=links_canada['target'],
        value=links_canada['value'],
        color="lightgrey"
    )
)])

# Update layout and save as an HTML file for viewing
fig_canada.update_layout(
    title_text="Canada's Critical Minerals and Deposit Types (USGS)",
    font_size=18,
    width=1600,
    height=1000
)

# Save the figure as an HTML file for Canada-specific Sankey diagram
html_path_canada = 'results/sankey_deposits_canada_usgs.html'
fig_canada.write_html(html_path_canada)

html_path_canada