# MiBiPreT data analysis mock-up

## User story
Company Tauw wants to analyse the data of a specific field site to evaluate if biodegradation is a feasible option for contaminant removal.

In [None]:
import mibipret
from mibipret.data import test_data

%matplotlib inline

In [None]:
# some example data provided by the package, either a small dataset in the repository or downloading it from an online resource
test_data = mibipret.data.test_data()

In [None]:
# first load data per sheet
metabolites_data = mibipret.data.load_excel(file_path="path/to/data/data.xlsx", sheet=1, verbose=True, store_provenance=True)
hydro_data = mibipret.data.load_excel(file_path="path/to/data/data.xlsx", sheet=2, verbose=True, store_provenance=True)
contaminants_data = mibipret.data.load_excel(file_path="path/to/data/data.xlsx", sheet=5, verbose=True, store_provenance=True)

# or in one go
metabolites, hydro, contaminants = mibipret.data.load_excel(file_path="path/to/data/data.xlsx", sheet=[1,2,5], verbose=True, store_provenance=True)

# could also do this with csv files
metabolites_data = mibipret.data.load_csv(file_path="path/to/data/data.csv", verbose=True, store_provenance=True)

In [None]:
# standardize runs check_units, check_columns and/or validation under the hood
# validation is similar to standardize, it combines various checks, but it does not create a new standardized dataset as standardize does
st_sample_data = mibipret.data.standardize(data=[contaminants, metabolites], data_type="sample", store_csv=True, verbose=True, store_provenance=True)

In [None]:
# we use the `options` function to check what types of analyses/modeling/visualization/reports we can do on the dataset
# if func argument is provided, it will check whether this function is possible and if not what else is needed
mibipret.decision_support.options(st_sample_data, func=mibipret.visualize.traffic3d)

In [None]:
# perform natural attenuation screening for contaminants provided in list or defaulting to the default set "BTEXIIN"
# na_screening uses stochiometric equations to analyze electron balance, these equations are contained in included file
# potentially link to online database
# if geographical data (x,y,z) for each well is in the original dataset, this will be also stored in the na_output
# the mibipret.analysis.sample collection of methods all have output per sample (that was analyzed) and can potentially 
# be added to the original standardized dataframe using the in_place argument
na_output = mibipret.analysis.sample.na_screening(data=st_data, contaminants=["name1", "name2", "name3"], in_place=True)
na_output

In [None]:
# once we did the na_analysis we can visualize the data as traffic lights plotted for each sample in space
# for this it is required that the spatial information is provided.
# because we ran na_screening analysis with in_place=True, the output was also added to the original st_data 
# we could therefore also run this method on st_data instead. 
mibipret.visualize.na_traffic3d(data=na_output, save_fig="plot_name.jpg")

In [None]:
# some analysis methods do not return a value per sample/row but rather combine/aggregate/reduce the information 
# of all (or a selection of) rows. These analysis methods are grouped under the mibipret.analysis.reduction module
# The methods in this group will always create a new object with the output data
pca_output = mibipret.analysis.reduction.pca(data=st_data, species=[column_list], n_comp=2)

In [None]:
# perform the visualization for the pca output
mibipret.visualize.ordination(data=pca_output)

In [None]:
# use the requirements function in the decision_support module to give a backtrace of required steps and 
# input data for a specific function
mibipret.decision_support.requirements(func=mibipret.visualize.keeling)

In [None]:
# at the end build the report, the report might take as input all the intermediate steps in the notebook
# or it might only take the standardized data, it will then execute all steps that are necessary
# Users might contribute these decision_support "recipes" themselves. They could either be in notebook format
# or in some other workflow specification. Still to be decided.

mibipret.decision_support.report(input=[na_output, pca_output, provenance], format="latex")