In [1]:
import pandas as pd 
import plotly.graph_objects as go

# With USGS data

They can be downloaded from [here](https://mrdata.usgs.gov/) and seen on this [map](https://mrdata.usgs.gov/general/map-global.html)

## Major mineral deposits

In [13]:
usgs_major = pd.read_csv(r'..\data\USGS\Major mineral deposits of the world\ofr20051294\deposit.csv')

In [14]:
usgs_major

Unnamed: 0,gid,dep_name,country,state,latitude,longitude,commodity,dep_type,type_detail,model_code,model_name,metallic,citation
0,1,Obatu-Shela,Afghanistan,,32.00000,66.28330,Aluminum,Surficial,,,,,"Orris and Bliss, 2002"
1,2,Hagigak,Afghanistan,,34.67780,68.06250,Barite,Hydrothermal,,,,,"Orris and Bliss, 2002"
2,3,Darrahe-Nur,Afghanistan,,34.66110,70.54170,"Beryllium-niobium,tin",Igneous,,,,,"Orris and Bliss, 2002"
3,4,Loghar,Afghanistan,,34.16700,68.03300,Chromium,Igneous,,,,,"Orris and Bliss, 2002"
4,5,Aynak,Afghanistan,,34.26600,69.30100,Copper,Sedimentary,,,,,"ESCAP, 1996"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3163,3164,Dorowa,Zimbabwe,,-19.03330,31.75000,"Phosphate, Iron",Igneous,,,,,
3164,3165,Shawa,Zimbabwe,,-19.18330,31.71670,"Phosphate, Vermiculite",Igneous,,,,,
3165,3166,Benson Pegmatites,Zimbabwe,,-17.02000,32.26670,Tantalum,Igneous,,,,,
3166,3167,Kamativi,Zimbabwe,,-18.33333,27.06667,"Tin, Tantalum, Tungsten, Columbium(Niobium)",Igneous,,,,,


In [19]:
# Grouping deposits by commodity and deposit type from 'deposit.csv'
usgs_major_summary = usgs_major.groupby(['commodity', 'dep_type']).agg(count=('country', 'size'), countries=('country', lambda x: ', '.join(sorted(x.unique())))).reset_index()
usgs_major_summary

Unnamed: 0,commodity,dep_type,count,countries
0,Aluminum,Hydrothermal,3,"Kazakhstan, Russia"
1,Aluminum,Igneous,5,"Mongolia, Russia, Ukraine"
2,Aluminum,Metamorphic,1,Kazakhstan
3,Aluminum,Sedimentary,7,"Kazakhstan, Russia"
4,Aluminum,Surficial,57,"Afghanistan, Australia, Brazil, Cambodia, Came..."
...,...,...,...,...
701,zeolite,Unclassified,2,United States
702,zinc,Hydrothermal,2,United States
703,"zinc, lead",Hydrothermal,3,United States
704,"zinc, lead, silver",Hydrothermal,2,United States


## Global Critical Minerals dataset

In [6]:
usgs_cm = pd.read_excel(r'../data/USGS/Global-critical-minerals-usgs.xlsx', sheet_name='pp1802_critmin_pts')

In [7]:
usgs_cm

Unnamed: 0,DEPOSIT_NA,CRITICAL_M,DEPOSIT_TY,LATITUDE,LONGITUDE,LOCATION,LOC_DETAIL
0,Coeur d'Alene district,Antimony,Simple Sb,47.53407,-116.248500,United States of America,Idaho
1,Krasna Gora,Antimony,Au-Sb,49.58199,14.335470,Czech Republic,
2,Hillgrove,Antimony,Au-Sb,-30.57138,151.906870,Australia,
3,Blue Spec,Antimony,Au-Sb,-21.82987,120.249290,Australia,
4,Becker Cochrane,Antimony,Simple Sb,60.18333,-135.216660,Canada,
...,...,...,...,...,...,...,...
2116,Manono-Kitolo Mine,Lithium; Beryllium,Pegmatite,-7.27750,27.450833,Democratic Republic of the Congo,
2117,Odisha,Titanium; Zirconium and Hafnium; Manganese,Sedimentary related; Coastal placer or paleopl...,21.63000,85.670000,India,
2118,Pudozhgorsk,Vanadium,Vanadiferous Titanomagnetite,61.61900,36.245000,Russia,
2119,Jinduicheng,Rhenium,Porphyry Mo,34.33000,100.950000,China,


In [21]:
# Grouping critical minerals by CRITICAL_M and DEPOSIT_TY from 'Global-critical-minerals-usgs.xlsx'
usgs_cm_summary = usgs_cm.groupby(['CRITICAL_M', 'DEPOSIT_TY']).agg(count=('LOCATION', 'size'), countries=('LOCATION', lambda x: ', '.join(sorted(x.unique())))).reset_index()
usgs_cm_summary

Unnamed: 0,CRITICAL_M,DEPOSIT_TY,count,countries
0,Antimony,Au-Sb,10,"Australia, Bolivia, Czech Republic, France, Po..."
1,Antimony,Deposit type unclassified,13,"Argentina, Austria, Bolivia, China, Kyrgyzstan..."
2,Antimony,Disseminated Sb,2,"Canada, Mexico"
3,Antimony,Sb-Hg,1,Kyrgyzstan
4,Antimony,Simple Quartz-Stibnite,2,"China, South Africa"
...,...,...,...,...
247,Zirconium and Hafnium,Coastal placer or paleoplacer,13,"Australia, Brazil, India, United States of Ame..."
248,Zirconium and Hafnium,Igneous,1,Russia
249,Zirconium and Hafnium,Oil sands,1,Canada
250,Zirconium and Hafnium; Titanium,Coastal placer or paleoplacer,2,"Australia, Sri Lanka"


In [22]:
# Combine the two datasets into a single clean dataframe with the requested structure
df_usgs = pd.concat([
    usgs_major_summary.rename(columns={
        'commodity': 'Commodity',
        'dep_type': 'Deposit Type',
        'count': 'Count',
        'countries': 'Countries'
    })[['Commodity', 'Deposit Type', 'Count', 'Countries']],
    usgs_cm_summary.rename(columns={
        'CRITICAL_M': 'Commodity',
        'DEPOSIT_TY': 'Deposit Type',
        'count': 'Count',
        'countries': 'Countries'
    })[['Commodity', 'Deposit Type', 'Count', 'Countries']]
], ignore_index=True)

In [24]:
df_usgs.to_csv(r'usgs_deposit_type_count_per_country.csv', index=False)

In [None]:
# Define pathway mapping specific to the USGS deposit types and commodities
usgs_pathway_mapping = {
    ('Aluminum', 'Hydrothermal'): 'Bauxite Pathway',
    ('Aluminum', 'Igneous'): 'High-Alumina Clays Pathway',
    ('Aluminum', 'Metamorphic'): 'High-Alumina Clays Pathway',
    ('Aluminum', 'Sedimentary'): 'High-Alumina Clays Pathway',
    ('Aluminum', 'Surficial'): 'Bauxite Pathway',
    ('Copper', 'Hydrothermal'): 'Sulfide Ores (Pyrometallurgy)',
    ('Copper', 'Igneous'): 'Sulfide Ores (Pyrometallurgy)',
    ('Copper', 'Sedimentary'): 'Oxide Ores (Hydrometallurgy - SX-EW)',
    ('Gold', 'Hydrothermal'): 'Free-Milling Ores',
    ('Gold', 'Igneous'): 'Refractory Ores',
    ('Gold', 'Metamorphic'): 'Refractory Ores',
    ('Gold', 'Sedimentary'): 'Refractory Ores',
    ('Nickel', 'Igneous'): 'Sulfide Ores (Pyrometallurgy)',
    ('Nickel', 'Surficial'): 'Laterite Ores (Hydrometallurgy - HPAL)',
    ('Titanium', 'Surficial'): 'Rutile Pathway (Chloride Process)',
    ('Titanium', 'Igneous'): 'Ilmenite Pathway',
    ('Uranium', 'Sedimentary'): 'Sandstone Deposits (ISR)',
    ('Uranium', 'Igneous'): 'Hard Rock Deposits',
}

# Function to assign pathways for USGS data
def assign_usgs_pathway(row):
    key = (row['Commodity'], row['Deposit Type'])
    return usgs_pathway_mapping.get(key, 'Unknown Pathway')

# Apply the function to the dataset
usgs_data['Mineral Pathway'] = usgs_data.apply(assign_usgs_pathway, axis=1)

# Display the updated dataset for review
tools.display_dataframe_to_user(name="USGS Data with Mineral Pathways", dataframe=usgs_data)


# With CMMI data

They can be seen and downloaded [here](https://portal.ga.gov.au/persona/cmmi)

In [25]:
cmmi = pd.read_csv(r'../data/CMMI/CriticalMineralDepositsGeochemistry.csv')

  cmmi = pd.read_csv(r'../data/CMMI/CriticalMineralDepositsGeochemistry.csv')


In [26]:
cmmi

Unnamed: 0,FID,DEPOSIT_UID,DEPOSIT_NAME,DEPOSIT_LOCAL_ID,DEPOSIT_ENVIRONMENT,DEPOSIT_GROUP,DEPOSIT_TYPE,PRIMARY_COMMODITIES,SECONDARY_COMMODITIES,ALL_COMMODITIES,...,SAMPLE_LONGITUDE_WGS84,SAMPLE_LATITUDE_WGS84,SAMPLE_EASTING,SAMPLE_NORTHING,SAMPLE_UTM_ZONE,SAMPLE_LOCATION_DESCRIPTION,LOCATION_ACCURACY,COMMENTS,LAST_UPDATE,SAMPLE_GEOMETRY
0,CriticalMineralDepositsGeochemistry.ga.4003.14...,AUS.NSW.335841,Woodlawn,GA.335841,Volcanic basin hydrothermal,Volcanogenic massive sulfide (VMS),Bimodal-felsic VMS,"Zn, Cu, Pb","Ag, Au","Zn, Cu, Pb, (Ag, Au)",...,149.5706,-35.0605,734421.0,6117230.0,55S,"Woodlawn mine, 50m from ore body",1000.0,,2022-01-25,POINT (-35.060462 149.57055)
1,CriticalMineralDepositsGeochemistry.ga.4004.14...,AUS.NSW.335841,Woodlawn,GA.335841,Volcanic basin hydrothermal,Volcanogenic massive sulfide (VMS),Bimodal-felsic VMS,"Zn, Cu, Pb","Ag, Au","Zn, Cu, Pb, (Ag, Au)",...,149.5705,-35.0578,734421.0,6117530.0,55S,"Woodlawn mine, 50m from ore body",1000.0,,2022-01-25,POINT (-35.057759 149.570466)
2,CriticalMineralDepositsGeochemistry.ga.4005.14...,AUS.NSW.335841,Woodlawn,GA.335841,Volcanic basin hydrothermal,Volcanogenic massive sulfide (VMS),Bimodal-felsic VMS,"Zn, Cu, Pb","Ag, Au","Zn, Cu, Pb, (Ag, Au)",...,149.5705,-35.0578,734421.0,6117530.0,55S,"Woodlawn mine, 50m from ore body",1000.0,,2022-01-25,POINT (-35.057759 149.570466)
3,CriticalMineralDepositsGeochemistry.ga.4006.14...,AUS.NSW.335841,Woodlawn,GA.335841,Volcanic basin hydrothermal,Volcanogenic massive sulfide (VMS),Bimodal-felsic VMS,"Zn, Cu, Pb","Ag, Au","Zn, Cu, Pb, (Ag, Au)",...,149.5705,-35.0578,734421.0,6117530.0,55S,"Woodlawn mine, 50m from ore body",1000.0,,2022-01-25,POINT (-35.057759 149.570466)
4,CriticalMineralDepositsGeochemistry.ga.4007.14...,AUS.NSW.335841,Woodlawn,GA.335841,Volcanic basin hydrothermal,Volcanogenic massive sulfide (VMS),Bimodal-felsic VMS,"Zn, Cu, Pb","Ag, Au","Zn, Cu, Pb, (Ag, Au)",...,149.5705,-35.0578,734421.0,6117530.0,55S,"Woodlawn mine, 50m from ore body",1000.0,,2022-01-25,POINT (-35.057759 149.570466)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26833,CriticalMineralDepositsGeochemistry.USA.NV.NV0...,USA.NV.NV00095g,Screamer,USA.NV.NV00095g,Magmatic hydrothermal,Carlin-type,Carlin-type Au,Au,,Au,...,-116.3932,40.9870,,,,,,,2024-05-07,POINT (40.987 -116.3932)
26834,CriticalMineralDepositsGeochemistry.USA.NV.NV0...,USA.NV.NV00095g,Screamer,USA.NV.NV00095g,Magmatic hydrothermal,Carlin-type,Carlin-type Au,Au,,Au,...,-116.3932,40.9870,,,,,,,2024-05-07,POINT (40.987 -116.3932)
26835,CriticalMineralDepositsGeochemistry.USA.NV.NV0...,USA.NV.NV00095g,Screamer,USA.NV.NV00095g,Magmatic hydrothermal,Carlin-type,Carlin-type Au,Au,,Au,...,-116.3932,40.9870,,,,,,,2024-05-07,POINT (40.987 -116.3932)
26836,CriticalMineralDepositsGeochemistry.USA.NV.NV0...,USA.NV.NV00095g,Screamer,USA.NV.NV00095g,Magmatic hydrothermal,Carlin-type,Carlin-type Au,Au,,Au,...,-116.3932,40.9870,,,,,,,2024-05-07,POINT (40.987 -116.3932)


In [28]:
# Filter relevant columns
cmmi_relevant_columns = cmmi[['DEPOSIT_GROUP', 'DEPOSIT_TYPE', 'PRIMARY_COMMODITIES', 'SECONDARY_COMMODITIES', 'DEPOSIT_UID']]

# Extract country codes from DEPOSIT_UID (first three letters, e.g., AUS)
cmmi_relevant_columns['Country'] = cmmi_relevant_columns['DEPOSIT_UID'].str[:3]

# Group by DEPOSIT_TYPE and PRIMARY_COMMODITIES
cmmi_summary = (cmmi_relevant_columns.groupby(['DEPOSIT_GROUP', 'DEPOSIT_TYPE', 'PRIMARY_COMMODITIES']).agg(count=('Country', 'size'),  countries=('Country', lambda x: ', '.join(sorted(x.unique())))).reset_index())
cmmi_summary

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cmmi_relevant_columns['Country'] = cmmi_relevant_columns['DEPOSIT_UID'].str[:3]


Unnamed: 0,DEPOSIT_GROUP,DEPOSIT_TYPE,PRIMARY_COMMODITIES,count,countries
0,Alkali-calcic,Albitite-hosted U,U,19,AUS
1,Alkali-calcic,Albitite-hosted U,"U, U3O8, Cu",1,AUS
2,Alkali-calcic,Albitite-hosted U,"U3O8, U",12,AUS
3,Apatite intrusion,Apatite intrusion REE,REO,1,AUS
4,Black shale,Black shale V ± Mo ± Ni,Au,1,USA
...,...,...,...,...,...
520,unknown,unknown,Fl,3,AUS
521,unknown,unknown,Mg,3,AUT
522,unknown,unknown,"Pb, Co, Ag, Ni, Zn, Cu",3,AUS
523,unknown,unknown,Sb,3,DEU


In [29]:
cmmi_summary.to_csv(r'cmms_deposit_type_count_per_country.csv', index=False)

In [None]:
# Define pathway mapping based on the provided template
# This dictionary maps DEPOSIT_GROUP and PRIMARY_COMMODITIES to pathways
pathway_mapping = {
    ('Epithermal', 'Ag'): 'Free-Milling Ores (Gravity Separation, Cyanidation)',
    ('Porphyry', 'Cu'): 'Sulfide Ores (Pyrometallurgy)',
    ('Porphyry', 'Au'): 'Sulfide Ores (Pyrometallurgy)',
    ('Orogenic', 'Ag'): 'Free-Milling Ores (Gravity Separation, Cyanidation)',
    ('Laterite', 'Ni'): 'Laterite Ores (Hydrometallurgy - HPAL)',
    ('Laterite', 'Co'): 'Laterite Ores (Hydrometallurgy - HPAL)',
    ('Magmatic', 'Ni'): 'Sulfide Ores (Pyrometallurgy)',
    ('Magmatic', 'Cu'): 'Sulfide Ores (Pyrometallurgy)',
    ('Sedimentary', 'U'): 'Sandstone Deposits (ISR)',
    ('Sedimentary', 'Cu'): 'Oxide Ores (Hydrometallurgy - SX-EW)',
    ('Sedimentary', 'Co'): 'Oxide Ores (Hydrometallurgy - SX-EW)',
    ('IOCG', 'Cu'): 'Sulfide Ores (Pyrometallurgy)',
    ('IOCG', 'Au'): 'Sulfide Ores (Pyrometallurgy)',
    ('Placer', 'Ti'): 'Rutile Pathway (Chloride Process)',
    ('Placer', 'Zr'): 'Rutile Pathway (Chloride Process)',
}

# Function to assign pathways based on DEPOSIT_GROUP and PRIMARY_COMMODITIES
def assign_mineral_pathway(row):
    key = (row['DEPOSIT_GROUP'], row['PRIMARY_COMMODITIES'])
    return pathway_mapping.get(key, 'Unknown Pathway')

# Apply the function to the dataset
cmmi_summary['Mineral Pathway'] = cmmi_summary.apply(assign_mineral_pathway, axis=1)


# Visualisation

In [4]:
# Truncate deposit type names to the first 10 characters for readability
data['DEPOSIT_TY'] = data['DEPOSIT_TY'].str[:10]  # Limit deposit type to 10 characters

# Recreate nodes and links after truncation
countries = data['LOCATION'].unique()
minerals = data['CRITICAL_M'].unique()
deposit_types = data['DEPOSIT_TY'].unique()

nodes = list(countries) + list(minerals) + list(deposit_types)
node_indices = {name: i for i, name in enumerate(nodes)}

# Define the links with truncated deposit type labels
links = {
    'source': [],
    'target': [],
    'value': []
}

# Populate the links with truncated deposit types
for _, row in data.iterrows():
    country_idx = node_indices[row['LOCATION']]
    mineral_idx = node_indices[row['CRITICAL_M']]
    deposit_type_idx = node_indices[row['DEPOSIT_TY']]
    
    # Country to Mineral
    links['source'].append(country_idx)
    links['target'].append(mineral_idx)
    links['value'].append(1)

    # Mineral to Deposit Type
    links['source'].append(mineral_idx)
    links['target'].append(deposit_type_idx)
    links['value'].append(1)

In [5]:
# Adjusting the color of the flows to light grey and text to a darker color for better readability
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=20,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=nodes,
        #color="darkblue"  # Set nodes color to dark blue for contrast
    ),
    link=dict(
        source=links['source'],
        target=links['target'],
        value=links['value'],
        color="lightgrey"  # Set flow color to light grey for readability
    )
)])

# Update layout with larger dimensions and save as HTML
fig.update_layout(
    title_text="Country, Critical Minerals, and Deposit Types from USGS",
    font_size=14,
    width=1600,
    height=1000
)

# Save the updated figure as an HTML file for better visualization
html_path_updated = 'results/sankey_deposits_world_usgs.html'
fig.write_html(html_path_updated)

html_path_updated

'results/sankey_deposits_world_usgs.html'

## Canada

In [6]:
# Filter the data specifically for Canada
canada_data = data[data['LOCATION'] == "Canada"]

# Extract nodes for just minerals and deposit types for Canada
minerals_canada = canada_data['CRITICAL_M'].unique()
deposit_types_canada = canada_data['DEPOSIT_TY'].unique()

# Create nodes for the Sankey diagram (minerals and deposit types)
nodes_canada = list(minerals_canada) + list(deposit_types_canada)
node_indices_canada = {name: i for i, name in enumerate(nodes_canada)}

# Define the links for the Canada-specific Sankey diagram
links_canada = {
    'source': [],
    'target': [],
    'value': []
}

# Populate links between minerals and deposit types for Canada
for _, row in canada_data.iterrows():
    mineral_idx = node_indices_canada[row['CRITICAL_M']]
    deposit_type_idx = node_indices_canada[row['DEPOSIT_TY']]

    # Mineral to Deposit Type link
    links_canada['source'].append(mineral_idx)
    links_canada['target'].append(deposit_type_idx)
    links_canada['value'].append(1)

# Create the Canada-specific Sankey figure
fig_canada = go.Figure(data=[go.Sankey(
    node=dict(
        pad=20,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=nodes_canada,
        #color="darkblue"
    ),
    link=dict(
        source=links_canada['source'],
        target=links_canada['target'],
        value=links_canada['value'],
        color="lightgrey"
    )
)])

# Update layout and save as an HTML file for viewing
fig_canada.update_layout(
    title_text="Canada's Critical Minerals and Deposit Types (USGS)",
    font_size=18,
    width=1600,
    height=1000
)

# Save the figure as an HTML file for Canada-specific Sankey diagram
html_path_canada = 'results/sankey_deposits_canada_usgs.html'
fig_canada.write_html(html_path_canada)

html_path_canada

'results/sankey_deposits_canada_usgs.html'