# Examine GOLD Path to ENVO Triad
#### This notebook examines the GOLD environmental paths for possible biome-material-feature triads.

In [1]:
from lib import data_operations as dop
from pandasql import sqldf

def pysqldf(q):
    return sqldf(q, globals())

## Read GOLD path spreadsheet

In [2]:
eco_paths_df = dop.make_dataframe("data/GOLDs5levelEcosystemClassificationPaths.xlsx", file_type="excel")
eco_paths_df.ecosystem_path_id = eco_paths_df.ecosystem_path_id.astype(str)
eco_paths_df.head() # peek at data

Unnamed: 0,ecosystem_path_id,ecosystem,ecosystem_category,ecosystem_type,ecosystem_subtype,specific_ecosystem
0,4845,Engineered,Artificial ecosystem,Mud microcosm,Unclassified,Unclassified
1,4536,Engineered,Bioreactor,Aerobic,Unclassified,Unclassified
2,4912,Engineered,Bioreactor,Anaerobic,Food waste,Unclassified
3,4914,Engineered,Bioreactor,Anaerobic,Manure,Unclassified
4,4442,Engineered,Bioreactor,Anaerobic,Unclassified,Unclassified


## Create aquatic subset
#### i.e., Environmental > Aquatic
**NOTE:** For now, we will filter out 'Uclassified' specific ecosystems  
See https://www.w3.org/TR/skos-primer/#sechierarchy for desciption of skos modifers

In [3]:
q = """
select 
    * 
from
    eco_paths_df
where
    ecosystem = 'Environmental'
and
    ecosystem_category = 'Aquatic'
and
    specific_ecosystem != 'Unclassified'
"""
aquatic_df = sqldf(q)
aquatic_df.head() # peek at data

Unnamed: 0,ecosystem_path_id,ecosystem,ecosystem_category,ecosystem_type,ecosystem_subtype,specific_ecosystem
0,4059,Environmental,Aquatic,Freshwater,Drinking water,Chlorinated
1,4058,Environmental,Aquatic,Freshwater,Drinking water,Delivery networks
2,5122,Environmental,Aquatic,Freshwater,Drinking water,Filtered water
3,4547,Environmental,Aquatic,Freshwater,Drinking water,Unchlorinated
4,4164,Environmental,Aquatic,Freshwater,Groundwater,Acid Mine Drainage


#### Get distinct list of aquatic ecosytem types

In [4]:
q = """
select distinct
    ecosystem_type
from
    aquatic_df
"""
sqldf(q)

Unnamed: 0,ecosystem_type
0,Freshwater
1,Marine
2,Non-marine Saline and Alkaline
3,Thermal springs


## Get distinct subtypes and specific ecosystems for each acquatic type

#### The goal is to search for patterns that fit the biome-material-feature triad

## Freshwater
#### patterns
* All have ecosystem_type: **Freshwater**
  - biome: skos:exactMatch ENVO_00000873 # freshwater biome
* ecosystem_subtype: **Drinking water**
  - biome: skos:exactMatch ENVO_00000873 # freshwater biome 
  - material: skos:broadMatch ENVO_00003064 # drinking water 
  - feature: difficult to determine; most look like qualities
* ecosystem_subtype: **Groundwater**
  - biome: skos:exactMatch ENVO_00000873 # freshwater biome 
  - material: skos:exactMatch ENVO_00002041 # ground water 
  - feature: search ENVO for enviromental feature (e.g., cave, mine)
* ecosystem_subtype: **Ice**
  - biome: skos:exactMatch ENVO_00000873 # freshwater biome 
  - material: skos:exactMatch ENVO_01001125 # ice 
  - feature: search ENVO for enviromental feature (e.g., glacier)
* ecosystem_subtype: **Lake**
  - biome: skos:exactMatch ENVO_00000873 # freshwater biome 
  - material: skos:exactMatch ENVO_00002007 # sediment THERE IS ONLY ONE RECORD
  - feature: skos:exactMatch ENVO_00000021 # freshwater lake
* ecosystem_subtype: **Lentic**
  - biome: skos:exactMatch ENVO_00000873 # freshwater biome 
  - material: skos:broadMatch ENVO_00002011 # freshwater
  - feature: search specific ecosystem otherwise use skos:broadMatch ENVO_01000617 # lentic water body
* ecosystem_subtype: **Pond**
  - biome: skos:exactMatch ENVO_00000873 # freshwater biome 
  - material: skos:exactMatch ENVO_00002007 # sediment THERE IS ONLY ONE RECORD
  - feature: skos:exactMatch ENVO_00000033 # pond
* ecosystem_subtype: **River**
  - biome: skos:exactMatch ENVO_00000873 # freshwater biome 
  - material: skos:exactMatch ENVO_00002007 # sediment THERE IS ONLY ONE RECORD
  - feature: skos:exactMatch ENVO_01000297 # freshwater river
* ecosystem_subtype: **Storm water**
  - biome: skos:exactMatch ENVO_00000873 # freshwater biome 
  - material: skos:exactMatch ENVO_01001267 # stormwater 
  - feature: skos:broadMatch ENVO_00002034 # biofilm
* ecosystem_subtype: **Wetlands**
  - biome: skos:exactMatch ENVO_00000873 # freshwater biome 
  - material: with the execption of sediment, this is difficult to determine; perhas freshwater?
  - feature: 
    + skos:exactMatch ENVO_00000035 # marsh
    + skos:broadMatch ENVO_00000044 # peatland
    + skos:exactMatch ENVO_01001208 # swamp area


In [5]:
q = """
select
    ecosystem_type, ecosystem_subtype, specific_ecosystem
from
    aquatic_df
where
    ecosystem_type = 'Freshwater'
"""
sqldf(q)

Unnamed: 0,ecosystem_type,ecosystem_subtype,specific_ecosystem
0,Freshwater,Drinking water,Chlorinated
1,Freshwater,Drinking water,Delivery networks
2,Freshwater,Drinking water,Filtered water
3,Freshwater,Drinking water,Unchlorinated
4,Freshwater,Groundwater,Acid Mine Drainage
5,Freshwater,Groundwater,Cave water
6,Freshwater,Groundwater,Coalbed water
7,Freshwater,Groundwater,Contaminated
8,Freshwater,Groundwater,Mine
9,Freshwater,Groundwater,Mine drainage


## Marine
#### patterns
* All have ecosystem_type: **Marine**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
* ecosystem_subtype: **Coastal**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
  - material: skos:exactMatch ENVO_00002007 # sediment THERE IS ONLY ONE RECORD
  - feature: skos:closeMatch ENVO_00000303 # sea coast
* ecosystem_subtype: **Cold seeps**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
  - material: skos:exactMatch ENVO_00002007 # sediment THERE IS ONLY ONE RECORD
  - feature: skos:exactMatch ENVO_01000263 # cold seep
* ecosystem_subtype: **Fossil**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
  - material: skos:exactMatch ENVO_01000140 # whale fall
  - feature: skos:exactMatch ENVO_00002164 # fossil
* ecosystem_subtype: **Hydrothermal vents**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
  - material: many specfice ecosystems seem like qualities (e.g., Diffuse flow)
  - feature: skos:exactMatch ENVO_00000215 # hydrothermal vent
* ecosystem_subtype: **Intertidal zone**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
  - material: specific ecosystems seem like features (e.g., coral reef)
  - feature: skos:exactMatch ENVO_00000316 # intertidal zone
* ecosystem_subtype: **Neritic zone**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
  - material: 
    + skos:exactMatch ENVO_00002007 # sediment (2 records)
    + skos:broadMatch ENVO_00002007 # sediment (1 record)
  - feature: skos:exactMatch ENVO_00000206 # marine neritic zone
* ecosystem_subtype: **Oceanic**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
  - material: 
    + skos:broadMatch ENVO_01000134 # hydrothermal fluid (crustal fluid?)
    + skos:exactMatch ENVO_00002007 # sediment
    + skos:broadMatch ENVO_00002007 # sediment (oil-contaminated sediment)
  - feature: 
    + skos:exactMatch ENVO_00000244 # abyssal plain
    + skos:exactMatch ENVO_00000210 # marine aphotic zone
    + skos:exactMatch ENVO_01000105 # marine benthic feature
    + skos:exactMatch ENVO_01000008 # microbial mat
    + skos:exactMatch ENVO_01000749 # oceanic crust
    + skos:exactMatch ENVO_00000209 # marine photic zone
* ecosystem_subtype: **Subtidal zone**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
  - material: skos:exactMatch ENVO_00002007 # sediment THERE IS ONLY ONE RECORD
  - feature: skos:broadMatch ENVO_01001341 # marine tidal flow zone
* ecosystem_subtype: **Unclassified/Oil-contaminated sediment**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
  - material: skos:broadMatch ENVO_00002007 # sediment (oil-contaminated sediment) THERE IS ONLY ONE RECORD
  - feature: UNDETERMINED
* ecosystem_subtype: **Wetlands**
  - biome: skos:exactMatch ENVO_00000447 # marine biome
  - material: skos:exactMatch ENVO_00002007 # sediment THERE IS ONLY ONE RECORD
  - feature: skos:exactMatch ENVO_00000240 # saline wetland  

In [6]:
q = """
select
    ecosystem_type, ecosystem_subtype, specific_ecosystem
from
    aquatic_df
where
    ecosystem_type = 'Marine'
"""
sqldf(q)

Unnamed: 0,ecosystem_type,ecosystem_subtype,specific_ecosystem
0,Marine,Coastal,Sediment
1,Marine,Cold seeps,Sediment
2,Marine,Fossil,Whale fall
3,Marine,Hydrothermal vents,Black smokers
4,Marine,Hydrothermal vents,Diffuse flow
5,Marine,Hydrothermal vents,Microbial mats
6,Marine,Hydrothermal vents,Sediment
7,Marine,Intertidal zone,Beach
8,Marine,Intertidal zone,Coral reef
9,Marine,Intertidal zone,Estuary


## Non-marine Saline and Alkaline

In [7]:
q = """
select
    ecosystem_type, ecosystem_subtype, specific_ecosystem
from
    aquatic_df
where
    ecosystem_type = 'Non-marine Saline and Alkaline'
"""
sqldf(q)

Unnamed: 0,ecosystem_type,ecosystem_subtype,specific_ecosystem
0,Non-marine Saline and Alkaline,Alkaline,Carbonate
1,Non-marine Saline and Alkaline,Alkaline,Microbial mats
2,Non-marine Saline and Alkaline,Alkaline,Sediment
3,Non-marine Saline and Alkaline,Hypersaline,Microbial mats
4,Non-marine Saline and Alkaline,Hypersaline,Sediment
5,Non-marine Saline and Alkaline,Near-boiling (>90C),Alkaline
6,Non-marine Saline and Alkaline,Saline,Athalassic
7,Non-marine Saline and Alkaline,Saline,Epilimnion
8,Non-marine Saline and Alkaline,Saline,Hypolimnion
9,Non-marine Saline and Alkaline,Saline,Microbial mats


## Thermal springs

In [8]:
q = """
select
    ecosystem_type, ecosystem_subtype, specific_ecosystem
from
    aquatic_df
where
    ecosystem_type = 'Thermal springs'
"""
sqldf(q)

Unnamed: 0,ecosystem_type,ecosystem_subtype,specific_ecosystem
0,Thermal springs,Hot (42-90C),Acidic
1,Thermal springs,Hot (42-90C),Alkaline
2,Thermal springs,Hot (42-90C),Microbial mats
3,Thermal springs,Hot (42-90C),Neutral
4,Thermal springs,Hot (42-90C),Sediment
5,Thermal springs,Near-boiling (>90C),Alkaline
6,Thermal springs,Tepid (25-34C),Sediment
7,Thermal springs,Warm (34-42C),Acidic
8,Thermal springs,Warm (34-42C),Neutral
9,Thermal springs,Warm (34-42C),Sediment
