## Converting HydroShoot Impulse Experiment data

In [4]:
import numpy as np
import pandas as pd

sys.path.insert(1, os.path.join(sys.path[0], '../../'))  # for importing local packages from src

In [11]:
"""Configure data sources"""

# EXPERIMENT_ROOT = '/thesis-cyborg-plants/results/fixed_predawn_7days_jun_jul_aug'
# EXPERIMENT_ROOT = '/media/mcndt/thesis/thesis-cyborg-plants/results/fixed_predawn_7days_jun_jul_aug'
EXPERIMENT_ROOT = 'F:/thesis-cyborg-plants/results/meteo_impulse_hydroshoot'

In [13]:
"""Loading dataset"""
from src.model.hydroshoot.hydroshoot_experiment import load_runs, HydroShootExperiment

assert(os.path.isdir(EXPERIMENT_ROOT))
runs = load_runs(EXPERIMENT_ROOT)
print(len(runs))
print(runs[0])
print('Targets:', runs[0].get_targets())
print('Variables:', runs[0].get_state_variables())

30
HydroShootExperiment(n_steps=240, state_size=360)
Targets: ('input_Tac', 'input_u', 'input_hs', 'input_Rg', 'output_Rg', 'output_An', 'output_E', 'output_Tleaf')
Variables: ('Eabs', 'Ci', 'psi_head', 'gs', 'gb', 'Ei', 'An', 'FluxC', 'u', 'E', 'Tlc', 'Flux', 'par_photo.dHd')


In [28]:
from src.util import get_dirs_in_directory

run_dirs = get_dirs_in_directory(EXPERIMENT_ROOT)

experiment_names = [d[15:-2] for d in run_dirs]

experiment_names[:10]

['2012-06-08__width1__0',
 '2012-06-08__width1__1500',
 '2012-06-08__width2__0',
 '2012-06-08__width2__1500',
 '2012-06-08__width3__0',
 '2012-06-08__width3__1500',
 '2012-06-08__width4__0',
 '2012-06-08__width4__1500',
 '2012-06-08__width5__0',
 '2012-06-08__width5__1500']

### Converting dataset


In [29]:
"""Methods for converting data to DataFrame format (from previous notebook)"""

def HydroShoot_input_df(experiment: HydroShootExperiment):
  input_cols = experiment.inputs.loc[:, experiment.inputs.columns != 'time'].columns
  new_names = {col : f'input_{col}' for col in input_cols}
  input_df = experiment.inputs.rename(columns=new_names)
  input_df = input_df.reset_index(drop=True)
  input_df.insert(1, 'type', 'INPUT')
  return input_df


def HydroShoot_output_df(experiment: HydroShootExperiment):
  output_cols = experiment.outputs.loc[:, experiment.outputs.columns != 'time'].columns
  new_names = {col : f'output_{col}' for col in output_cols}
  output_df = experiment.outputs.rename(columns=new_names)
  output_df = output_df.reset_index(drop=True)
  output_df.insert(1, 'type', 'OUTPUT')
  return output_df


def HydroShoot_state_df(experiment: HydroShootExperiment):
  varnames = experiment.states.get_variables()
  state_ids = list(experiment.states._states[varnames[0]].keys())
  steps = experiment.n_steps()

  rows = []
  for step in range(steps):
    for state_id in state_ids:
      row = {'time': experiment.inputs['time'].iloc[step], 'type': 'STATE', 'state_id': state_id, 'state_type': 'leaf'}
      for varname in varnames:
        value = experiment.states._states[varname][state_id][step]
        row[f'state_{varname}'] = value
      rows.append(row)

  state_df = pd.DataFrame(rows)
  return state_df

In [30]:
"""Method for creating a single DataFrame for an experiment, including run id"""

def HydroShoot_experiment_df(experiment: HydroShootExperiment, run_id) -> pd.DataFrame:
  input_df = HydroShoot_input_df(experiment)
  output_df = HydroShoot_output_df(experiment)
  state_df = HydroShoot_state_df(experiment)
  experiment_df = pd.concat([input_df, output_df, state_df])
  experiment_df.insert(0, 'run_id', run_id)
  return experiment_df

dataset_df = HydroShoot_experiment_df(runs[0], experiment_names[0])
dataset_df.head(1)

Unnamed: 0,run_id,time,type,input_Tac,input_u,input_hs,input_Rg,output_Rg,output_An,output_E,...,state_gs,state_gb,state_Ei,state_An,state_FluxC,state_u,state_E,state_Tlc,state_Flux,state_par_photo.dHd
0,2012-06-08__width1__0,2012-06-08,INPUT,17.58,0.418,82.0,0.0,,,,...,,,,,,,,,,


In [33]:
"""Create a single dataframe for all experiment data across all runs."""
from typing import List

def HydroShoot_multirun_df(experiments: List[HydroShootExperiment], experiment_ids) -> pd.DataFrame:
  run_dfs = [HydroShoot_experiment_df(run, run_id=experiment_ids[i]) for i, run in enumerate(experiments)]
  return pd.concat(run_dfs, ignore_index=True)

dataset_df = HydroShoot_multirun_df(runs, experiment_names)

In [34]:
dataset_df.tail()


Unnamed: 0,run_id,time,type,input_Tac,input_u,input_hs,input_Rg,output_Rg,output_An,output_E,...,state_gs,state_gb,state_Ei,state_An,state_FluxC,state_u,state_E,state_Tlc,state_Flux,state_par_photo.dHd
2606395,2012-08-19__width5__1500,2012-08-28 23:00:00,STATE,,,,,,,,...,-2.778902e-14,0.77088,0.0,-0.559761,-0.009458,0.755,0.0,20.076093,0.0,200.0
2606396,2012-08-19__width5__1500,2012-08-28 23:00:00,STATE,,,,,,,,...,6.990325e-14,0.779614,0.0,-0.555621,-0.008582,0.755,4.105306e-16,20.360084,1.142338e-19,200.0
2606397,2012-08-19__width5__1500,2012-08-28 23:00:00,STATE,,,,,,,,...,-1.162114e-13,0.789021,0.0,-0.561875,-0.00788,0.755,0.0,20.159643,0.0,200.0
2606398,2012-08-19__width5__1500,2012-08-28 23:00:00,STATE,,,,,,,,...,-6.706823e-14,0.798394,0.0,-0.574871,-0.007327,0.755,0.0,20.158075,0.0,200.0
2606399,2012-08-19__width5__1500,2012-08-28 23:00:00,STATE,,,,,,,,...,-1.208258e-13,0.80811,0.0,-0.562382,-0.006509,0.755,0.0,20.272962,0.0,200.0


In [35]:
dataset_df.to_csv('../datasets/hydroshoot_impulse.csv', index=False)