# [Feature Name]
***TRY DB Extraction***

In [None]:
# IMPORTS
import TRYpros
import numpy as np
import pandas as pd
# Set pandas to show all columns.
pd.set_option("display.max_columns", None) 

In [2]:
# EXTENSIONS
# Automatically reload external imported files 
# when their content changes.
%load_ext autoreload
%autoreload 2

## 1. Prepare Environment

In [3]:
# SETTINGS
pd.set_option("display.max_columns", None) # Set pandas to show all columns.

In [None]:
# CONSTANTS
DATA_DIR = "/path/to/directory/containing/raw/try/data"
PATH_SRC = f"{DATA_DIR}/path/to/specific/file.txt"
PATH_DST = f"/path/to/directory/in/which/to/save/extracted/data"
FEATURE_NAME = "a_suitable_name_for_file_contents"
FEATURE_TYPE = "str" # also may be "float" or "int"
PATH_REF = "./dataset_ids.txt"
DROP_COLS = ["LastName", "FirstName", "Dataset", "Reference", 
             "Replicates", "SpeciesName", "ValueKindName",
             "OrigUncertaintyStr", "UncertaintyName",
             "RelUncertaintyPercent"] # Columns to ignore from the TRY DB.

# DEFINE FEATURE HANDLER
FH = TRYpros.FeatureHandler(
    path_src=PATH_SRC, path_ref=PATH_REF,
    feature_name=FEATURE_NAME, d_type=FEATURE_TYPE)

## 2. Load Data

In [None]:
# LOAD DATA
FH.load_big_data(drop_cols=DROP_COLS)

## 3. Process Trait Data

### 2.1. ID Review

In [None]:
# VIEW KNOWN IDS
print(f"Keep IDs = {FH.keep_ids["trait"]}")
FH.known_ids["trait"]

### 2.2. Extract Data

In [None]:
# EXTRACT TRAIT DATA & COVARIATE DATA (METADATA)
FH.extract_trait_covariate_data()

### 2.3. Manual Investigation

In [None]:
# QUESTION: Are values numeric/categorical? What forms are they in?
FH.view_units_value_forms(data_type="trait")

In [None]:
# QUESTION: What contextual information is available?
FH.get_context(FH.data_trait["non_std"])

In [None]:
# QUESTION: What does data associated with specific value forms look like?
FH.get_unique_matches(
    data=FH.data_trait["non_std"],
    match_col='value_form',
    to_match=["na", "/", ".", "-"])

In [None]:
# ASK YOUR QUESTIONS ...

### 2.4. Define & Apply Transformation Functions

In [None]:
# DEFINE DATA TRANSFORMATIONS
def example_unit_standardization_function(r):
    """ Maps alternate unit notations to one standard notation. """
    unit = r.OrigUnitStr
    if unit == unit: # Not NaN.
        if unit in ["mm2 mg-1", "mm2/mg"]: 
            return "mm^{2}mg^{-1}"
        if unit in ['m2/kg', 'm2 kg-1']:
            return "m^{1}kg^{-1}"
        if unit in ['cm2/g']:
            return 'cm^{2}g^{-1}'
        if unit in ['g/cm2', "(g/cm2)"]:
            return 'g^{1}cm^{-2}'
    return unit

# WRAP EACH TRANSFORMATION FUNCTION IN 
# ONE DFColValTransformation OBJECT
t_unit_std = TRYpros.DFColValTransformation(
    f=example_unit_standardization_function, 
    col="OrigUnitStr")

In [None]:
# TEST TRANSFORMATIONS BEFORE APPLICATION
res_t = FH.data_trait["non_std"]
res_t = t_unit_std(res_t)
print(res_t.value_form.unique())

In [None]:
# CONFIGURE TRANSFORMATIONS
# Add each transformation object to list of transformations to be 
# applied to each data type and standardization type.
FH.transforms["non_std"]["trait"] = [t_unit_std]

In [None]:
# APPLY CONFIGURED TRANSFORMATIONS
FH.apply_transformations("trait")

In [None]:
# CHECK SUCCESSFUL APPLICATION
FH.data_trait["non_std"].OrigValueStr.unique()

## 4. Process Lon Lat Data

### 4.1. ID Review

In [None]:
# VIEW KNOWN IDS
print(f"Keep IDs = {FH.keep_ids["lonlat"]}")
FH.known_ids["lonlat"]

### 4.2. Extract Data

In [None]:
# EXTRACT LON LAT DATA
FH.extract_lonlat_data()

### 4.3. Manual Investigation

In [None]:
# CONSIDER COMBINING LON LAT DATA FOR EASIER INVESTIGATION
data_latlon_std = FH.get_combine_lonlat("std")
data_latlon_non_std = FH.get_combine_lonlat("non_std")

In [None]:
# QUESTION: What do value units and forms look like?
FH.view_units_value_forms(data_type="lonlat")

In [None]:
# QUESTION: Do some columns provide useful context information?
FH.get_context(data_latlon_non_std)

In [None]:
# QUESTION: What are all the different forms of value expression?
data_latlon_non_std.value_form.unique()

In [None]:
# QUESTION: What data is available in the UTM format?
data_utm = FH.get_utm_data()
data_utm

In [None]:
# QUESTION: What data is available in the NZTM format?
data_nztm = FH.get_nztm_data()
FH.get_context(data_nztm)

In [None]:
# ASK YOUR QUESTIONS ...

### 4.4. Define & Apply Transformation Functions

In [None]:
# DEFINE FUNCTIONS

# TRYpros provides few pre-defined transformation objects like:

# 1. A transformation object for mapping 
# alternate notations of latitude/longitude 
# decimal degrees into the standard numeric notation with values
# in the range ([-180, 180], [-90, 90]).
ll_val_std_deg = TRYpros.get_transformation_lonlat_std()

# 2. A transformation object to recompute value form column intended
# to be used after some other value transformation so that the value
# form column reflect latest form of values.
ll_form_recompute = TRYpros.get_transformation_get_value_form("OrigValueStr")

In [None]:
# TEST TRANSFORMATIONS PRIOR TO APPLICATION
print("Unique Value Forms (Before):", 
      data_latlon_non_std.value_form.unique())
res_ll = data_latlon_non_std
res_ll = ll_val_std_deg(res_ll)
res_ll = ll_form_recompute(res_ll)
print("Unique Value Forms (After):", 
      res_ll.value_form.unique())

In [None]:
# CONFIGURE TRANSFORMATIONS
FH.transforms["non_std"]["lonlat"] = [ll_val_std_deg, ll_form_recompute]

In [None]:
# APPLY TRANSFORMATIONS
FH.apply_transformations("lonlat")

In [None]:
# OPTIONALLY PERFORM OTHER PROCESSING STEPS
# TRYpros provides the following functions to convert
# UTM / NZTM values into decimal degrees.

# CONVERT UTM VALUES
FH.lonlat_utm_to_decimal_degrees()

# CONVERT NZTM VALUES
FH.lonlat_nztm_to_decimal_degrees()

# AVG TRAIT VALUES
FH.avg_trait_values(data_type="lonlat", id1=4710, id2=4711)

In [None]:
# CHECK SUCCESSFUL APPLICATION
FH.view_range(data_type="lonlat", std_type="std")
FH.view_range(data_type="lonlat", std_type="non_std")
# If there are abnormal values, then something may be wrong.

## 5. Process Year Data

### 4.1. ID Review

In [None]:
# VIEW KNOWN IDS
print(f"Keep IDs = {FH.keep_ids["year"]}")
FH.known_ids["year"]

In [None]:
# UPDATE KEEP LIST
# Only data corresponding to Trait/Data IDs in the keep list are loaded
# into the FeatureHandler's attribute (data_trait, data_lonlat, or data_year).
FH.keep_ids["year"] = [241, 212, 696, 2254, 2255, 6601, 8571, 8737, 9732]
FH.get_considered_traits("year")

### 4.2. Extract Data

In [None]:
# EXTRACT YEAR/DATE RELATED DATA
FH.extract_year_data()

### 4.3. Manual Investigation

In [None]:
# QUESTION: What do value units and forms look like?
FH.view_units_value_forms(data_type="year")

In [None]:
# QUESTION: What context information is available?
FH.get_context(FH.data_year["non_std"], context_cols=[
    "OriglName"])["OriglName"].tolist()

In [None]:
# QUESTION: What data is associated with specific DatasetIDs?
display(FH.get_unique_matches(
    data=FH.data_year["non_std"], match_col="DatasetID",
    to_match=[1], keep=["DataID", "OrigValueStr", "value_form"]))
display(FH.get_unique_matches(
    data=FH.data_year["non_std"], match_col="DataID",
    to_match=[241], keep=["DataID", "OrigValueStr", "value_form"]))

In [None]:
# ASK YOUR QUESTIONS ...

### 4.4. Define & Apply Transformation Functions

In [None]:
# DEFINE TRANSFORMATIONS
# TRYpros also provides a function that gets a transformation
# object that extracts year from various date notations in the TRY DB.
y_ext_year_std = TRYpros.get_transformation_get_year("StdValue")
y_ext_year_non_std = TRYpros.get_transformation_get_year("OrigValueStr")

y_get_val_form_non_std = TRYpros.get_transformation_get_value_form("StdValue")
y_get_val_form_std = TRYpros.get_transformation_get_value_form("OrigValueStr")

In [None]:
# TEST TRANSFORMATIONS PRIOR TO APPLICATION
res_y = FH.data_year["non_std"]
print("before:", res_y["value_form"].unique())
res_y = y_ext_year_non_std(res_y)
res_y = y_get_val_form_non_std(res_y)
print("after:", res_y["value_form"].unique())

In [None]:
# CONFIGURE TRANSFORMATIONS
FH.transforms["std"]["year"] = [y_ext_year_std, y_get_val_form_std]
FH.transforms["non_std"]["year"] = [y_ext_year_non_std, y_get_val_form_non_std]

In [None]:
# APPLY CONFIGURED TRANSFORMATIONS
FH.apply_transformations(data_type="year")

In [None]:
# CHECK TO ENSURE TRANSFORMATION SUCCESS
FH.view_range("year", "non_std")
FH.view_range("year", "std")

## 6. Combine Data

In [None]:
# COMBINE TRAIT, LONLAT AND YEAR DATA
data_extracted = FH.combine_data()

# VIEW ON MAP
TRYpros.map_plot(data_extracted, 
                 title=f"TRY Data Distribution: {FH.feature_name}")

## 7. Save Extracted Data

In [None]:
# SAVE PROCESSED EXTRACTED DATA
TRYpros.save_data(
    data = data_extracted, 
    dest_fold = PATH_DST,
    feature_name = FH.feature_name,
    feature_unit = FH.get_feature_unit())