## Candidate Observation Datasets with Primary Production Theme

This short script 

In [31]:
import os
import pycmap

# api = pycmap.API(token="YOUR_KEY_KEY")

PP_DIR = "./datasets/"

# A list of datasets with primary production observations
pp_ds = [
         {"table": "tblHOT_PP_v2022",
          "url": "https://simonscmap.com/catalog/datasets/HOT_PP_v2022"
         }, 
         {"table": "tblHOT_Bottle_ALOHA",
          "url": "https://simonscmap.com/catalog/datasets/HOT_Bottle_ALOHA"
         },
         {"table": "tblBATS_Primary_Production",
          "url": "https://simonscmap.com/catalog/datasets/BATS_Primary_Production"
         },
         {"table": "tblMGL1704_Gradients2_Diazotroph",
          "url": "https://simonscmap.com/catalog/datasets/MGL1704_Gradients2_Diazotroph"
         },
         {"table": "tblKOK1606_Gradients1_Diazotroph",
          "url": "https://simonscmap.com/catalog/datasets/KOK1606_Gradients1_Diazotroph"
         },
         {"table": "tblHOT242_252_14C_PP_02_3",
          "url": "https://simonscmap.com/catalog/datasets/HOT242_252_14C_PP_0_2_3"
         }, 
         {"table": "tblHOT242_252_14C_PP_WSW_group_sort",
          "url": "https://simonscmap.com/catalog/datasets/HOT242_252_14C_PP_WSW_group_sort"
         }, 
         {"table": "tblKM1709_MS_Fe_14C_PP",
          "url": "https://simonscmap.com/catalog/datasets/KM1709_MS_Fe_14C_PP"
         }, 
         {"table": "tblHOT221_247_14C_PP_02um",
          "url": "https://simonscmap.com/catalog/datasets/HOT221_247_14C_PP_02um"
         },
         {"table": "tblKOK1606_Gradients1_15N13C",
          "url": "https://simonscmap.com/catalog/datasets/Gradients1_KOK1606_15N13C"
         }, 
         {"table": "tblHOT242_252_14C_PP_3_WSW",
          "url": "https://simonscmap.com/catalog/datasets/HOT242_252_14C_PP_3_WSW"
         }, 
         {"table": "tblphytoplankton_physiology",
          "url": "https://simonscmap.com/catalog/datasets/phytoplankton_physiology"
         },
         {"table": "tblGradients5_TN412_15N13C",
          "url": "https://simonscmap.com/catalog/datasets/Gradients5_TN412_15N13C"
         },
         {"table": "tblGradients4_TN397_14C_NPP_DailyOnDeck",
          "url": "https://simonscmap.com/catalog/datasets/Gradients4_TN397_14C_NPP_DailyOnDeck"
         },   
         {"table": "tblTN397_Gradients4_15N13C",
          "url": "https://simonscmap.com/catalog/datasets/Gradients4_TN397_15N13C"
         },       
         {"table": "tblTN397_Gradients4_14C_NPP_Arrays",
          "url": "https://simonscmap.com/catalog/datasets/Gradients4_TN397_14C_NPP_Arrays"
         },
         {"table": "tblKM1513_HOE_legacy_2A_14C_NPP",
          "url": "https://simonscmap.com/catalog/datasets/HOELegacy2A_KM1513_14C_NPP"
         },   
         {"table": "tblMGL1704_Gradients2_14C_NPP",
          "url": "https://simonscmap.com/catalog/datasets/Gradients2_MGL1704_14C_NPP"
         },       
         {"table": "tblKM1906_Gradients3_15N13C",
          "url": "https://simonscmap.com/catalog/datasets/Gradients3_KM1906_15N13C"
         }, 
         {"table": "tblKM1605_HL3_14C_carbon_fixation",
          "url": "https://simonscmap.com/catalog/datasets/KM1605_HL3_14C_carbon_fixation"
         },
         {"table": "tblKOK1507_HLB2_dark_14C_Carbon_fixation",
          "url": "https://simonscmap.com/catalog/datasets/KOK1507_HLB2_dark_14C_Carbon_fixation"
         }   
        ]

def download_datasets(pp_ds: list[str], colocalize: bool=True):
    os.makedirs(PP_DIR, exist_ok=True)
    for i, ds in enumerate(pp_ds):
        print(f"({i+1}/{len(pp_ds)}) Downloading {ds['table']} ...")
        fname = f"{PP_DIR}{ds['table']}.csv"
        df = api.get_dataset(ds['table'])
        df.to_csv(fname, index=False)
        print(f"\t Dataset saved at {fname}")
        if colocalize:
            print("Adding environmental context")
            co_fname = f"{PP_DIR}{ds['table']}_colocalized.csv"
            add_env_context(df).to_csv(co_fname, index=False)
            print(f"\n\t Colocalized dataset saved at {co_fname}")
        
        

def add_env_context(df):
    targets = {
                "tblWOA_Climatology": {
                                        "variables": ["sea_water_temp_WOA_clim",
                                                      "density_WOA_clim",
                                                      "salinity_WOA_clim",
                                                      "nitrate_WOA_clim",
                                                      "phosphate_WOA_clim",
                                                      "silicate_WOA_clim",
                                                      "oxygen_WOA_clim"],
                                        "tolerances": [0, 0.75, 0.75, 5]
                                        },
                "tblPisces_NRT": {
                                  "variables": ["NO3", "PO4", "O2", "Si", "PP", "CHL", "PHYC"],
                                   "tolerances": [4, 0.5, 0.5, 5]
                                   },        
                }
    return pycmap.Sample(source=df,
                         targets=targets, 
                         replaceWithMonthlyClimatolog=True,
                         servers=["Rossby"]  
                         )
    
download_datasets(pp_ds, colocalize=True)


(1/22) Downloading tblHOT_PP_v2022 ...
	 Dataset saved at ./datasets/tblHOT_PP_v2022.csv
Adding environmental context
Gathering metadata .... 
Sampling starts
Sampling finished                                                                                                    
	 Colocalized dataset saved at ./datasets/tblHOT_PP_v2022_colocalized.csv
(2/22) Downloading tblHOT_Bottle_ALOHA ...
	 Dataset saved at ./datasets/tblHOT_Bottle_ALOHA.csv
Adding environmental context
Gathering metadata .... 
Sampling starts
Sampling finished                                                                                                    
	 Colocalized dataset saved at ./datasets/tblHOT_Bottle_ALOHA_colocalized.csv
(3/22) Downloading tblBATS_Primary_Production ...
	 Dataset saved at ./datasets/tblBATS_Primary_Production.csv
Adding environmental context
Gathering metadata .... 
Sampling starts
Sampling finished                                                                                        