## Converting HydroShoot Impulse Experiment data

In [1]:
import numpy as np
import pandas as pd

sys.path.insert(1, os.path.join(sys.path[0], '../../'))  # for importing local packages from src

In [9]:
"""Configure data sources"""

# EXPERIMENT_ROOT = '/thesis-cyborg-plants/results/fixed_predawn_7days_jun_jul_aug'
# EXPERIMENT_ROOT = '/media/mcndt/thesis/thesis-cyborg-plants/results/fixed_predawn_7days_jun_jul_aug'
# EXPERIMENT_ROOT = 'F:/thesis-cyborg-plants/results/meteo_impulse_hydroshoot'
EXPERIMENT_ROOT = '../../results/meteo_impulse_hydroshoot_v2'

In [10]:
"""Loading dataset"""
from src.model.hydroshoot.hydroshoot_experiment import load_runs, HydroShootExperiment

assert(os.path.isdir(EXPERIMENT_ROOT))
runs = load_runs(EXPERIMENT_ROOT)
print(len(runs))
print(runs[0])
print('Targets:', runs[0].get_targets())
print('Variables:', runs[0].get_state_variables())

11
HydroShootExperiment(n_steps=240, state_size=360)
Targets: ('input_Tac', 'input_u', 'input_hs', 'input_Rg', 'output_Rg', 'output_An', 'output_E', 'output_Tleaf')
Variables: ('psi_head', 'Tlc', 'Flux', 'Eabs', 'Ei', 'E', 'FluxC', 'An', 'Ci', 'u', 'gs', 'gb', 'par_photo.dHd')


In [11]:
from src.util import get_dirs_in_directory

run_dirs = get_dirs_in_directory(EXPERIMENT_ROOT)

experiment_names = [d[15:-2] for d in run_dirs]

experiment_names[:11]

['2012-08-01__width0__0',
 '2012-08-01__width1__0',
 '2012-08-01__width1__1500',
 '2012-08-01__width3__0',
 '2012-08-01__width3__1500',
 '2012-08-01__width5__0',
 '2012-08-01__width5__1500',
 '2012-08-01__width7__0',
 '2012-08-01__width7__1500',
 '2012-08-01__width9__0',
 '2012-08-01__width9__1500']

### Converting dataset


In [12]:
"""Methods for converting data to DataFrame format (from previous notebook)"""

def HydroShoot_input_df(experiment: HydroShootExperiment):
  input_cols = experiment.inputs.loc[:, experiment.inputs.columns != 'time'].columns
  new_names = {col : f'input_{col}' for col in input_cols}
  input_df = experiment.inputs.rename(columns=new_names)
  input_df = input_df.reset_index(drop=True)
  input_df.insert(1, 'type', 'INPUT')
  return input_df


def HydroShoot_output_df(experiment: HydroShootExperiment):
  output_cols = experiment.outputs.loc[:, experiment.outputs.columns != 'time'].columns
  new_names = {col : f'output_{col}' for col in output_cols}
  output_df = experiment.outputs.rename(columns=new_names)
  output_df = output_df.reset_index(drop=True)
  output_df.insert(1, 'type', 'OUTPUT')
  return output_df


def HydroShoot_state_df(experiment: HydroShootExperiment):
  varnames = experiment.states.get_variables()
  state_ids = list(experiment.states._states[varnames[0]].keys())
  steps = experiment.n_steps()

  rows = []
  for step in range(steps):
    for state_id in state_ids:
      row = {'time': experiment.inputs['time'].iloc[step], 'type': 'STATE', 'state_id': state_id, 'state_type': 'leaf'}
      for varname in varnames:
        value = experiment.states._states[varname][state_id][step]
        row[f'state_{varname}'] = value
      rows.append(row)

  state_df = pd.DataFrame(rows)
  return state_df

In [13]:
"""Method for creating a single DataFrame for an experiment, including run id"""

def HydroShoot_experiment_df(experiment: HydroShootExperiment, run_id) -> pd.DataFrame:
  input_df = HydroShoot_input_df(experiment)
  output_df = HydroShoot_output_df(experiment)
  state_df = HydroShoot_state_df(experiment)
  experiment_df = pd.concat([input_df, output_df, state_df])
  experiment_df.insert(0, 'run_id', run_id)
  return experiment_df

dataset_df = HydroShoot_experiment_df(runs[0], experiment_names[0])
dataset_df.head(1)

Unnamed: 0,run_id,time,type,input_Tac,input_u,input_hs,input_Rg,output_Rg,output_An,output_E,...,state_Eabs,state_Ei,state_E,state_FluxC,state_An,state_Ci,state_u,state_gs,state_gb,state_par_photo.dHd
0,2012-08-01__width0__0,2012-08-01,INPUT,20.74,0.03,53.7,0.0,,,,...,,,,,,,,,,


In [14]:
"""Create a single dataframe for all experiment data across all runs."""
from typing import List

def HydroShoot_multirun_df(experiments: List[HydroShootExperiment], experiment_ids) -> pd.DataFrame:
  run_dfs = [HydroShoot_experiment_df(run, run_id=experiment_ids[i]) for i, run in enumerate(experiments)]
  return pd.concat(run_dfs, ignore_index=True)

dataset_df = HydroShoot_multirun_df(runs, experiment_names)

In [15]:
dataset_df.tail()


Unnamed: 0,run_id,time,type,input_Tac,input_u,input_hs,input_Rg,output_Rg,output_An,output_E,...,state_Eabs,state_Ei,state_E,state_FluxC,state_An,state_Ci,state_u,state_gs,state_gb,state_par_photo.dHd
955675,2012-08-01__width9__1500,2012-08-10 23:00:00,STATE,,,,,,,,...,0.0,0.0,0.0,-0.012042,-0.710804,-11845110.0,0.237,-5.000767e-14,0.436211,200.0
955676,2012-08-01__width9__1500,2012-08-10 23:00:00,STATE,,,,,,,,...,0.0,0.0,0.0,-0.011043,-0.715353,-82683370.0,0.237,-7.161751e-15,0.44135,200.0
955677,2012-08-01__width9__1500,2012-08-10 23:00:00,STATE,,,,,,,,...,0.0,0.0,7.592222e-16,-0.010354,-0.738731,11905210.0,0.237,4.982423e-14,0.446676,200.0
955678,2012-08-01__width9__1500,2012-08-10 23:00:00,STATE,,,,,,,,...,0.0,0.0,0.0,-0.009588,-0.751502,-14178620.0,0.237,-4.181083e-14,0.451982,200.0
955679,2012-08-01__width9__1500,2012-08-10 23:00:00,STATE,,,,,,,,...,0.0,0.0,5.241271e-16,-0.008416,-0.729363,17698960.0,0.237,3.354787e-14,0.457706,200.0


In [16]:
dataset_df.to_csv('../datasets/hydroshoot_impulse_v2.csv', index=False)