## A data wrangling application for Hydrus-2D
#### Structure 
1. Simple Example for using `sample_items`, custom module to load hydrus 2d outputs 
2. Workflow for automated section output into csv 


In [2]:
import sys 
import os
from samples import sample_items, read_item, get_sets_from_collection, get_set_from_collection, write_horizontal_sheet
import xarray as xr
import numpy as np
import pandas as pd

### 1. Simple Example for using `sample_items`, custom module to load hydrus 2d outputs 

* uses file naming structure of hydrus-2d outputs to give meaningful attributes to data
* uses meaningful attributes to build collection of hydrus-2d transects with meaningful metadata

In [3]:
# directory of data in owncloud **MODIFY FOR YOUR FILES**
directory = 'c:/Users/QuinnHull/ownCloud-active jobs/BHP Chile/2150 - BHP Chile - MEL Permeability Testing Program for SHL Ores/Hydrus model/Sulphide AND SCC'

# custom module `sample_items, read in from samples.py`
sample = sample_items(sample_directory=directory)
times = sample.get_unique_times() # export times
schemes = sample.get_unique_schemes() # experiment schemes 
sections = sample.get_unique_sections() # section names
paths = sample.get_paths() # paths to data
time_info = sample.get_time_info() # detailed information about the meaning of time
sample_collection = sample.make_sample_collection() # collection of data


  self.time_info = np.genfromtxt(fname=path, names=True, delimiter=',', dtype=None)


#### Interrogate Attrbitues of Collection from `sample_items`
* this is what is read in from the file structure of the directory to make sense of the contents of files

In [4]:
print('times', [time for time in times.values()], '\n')

print('schemes', [str(scheme) for scheme in schemes.values()], '\n')
print('sections', [section for section in sections.keys()], '\n')
time_info = sample.get_time_info()
print('where_periods', np.unique(time_info['period']), '\n')
print('where_irrigation', np.unique(time_info['ï»¿cum_irr_cm']), '\n')

times [24.0, 384.0, 12.0, 48.0, 273.0, 96.0, 12.0, 6.0, 4.75, 1.0, 8.0, 6.0, 3.5, 420.0, 4.75, 2.0, 48.0, 88.0, 48.0, 5.375, 48.0, 384.0, 12.0, 6.5, 384.0, 48.0, 12.0, 3.5, 12.0, 276.0, 6.0] 

schemes ['1', '2', '3', '4', '5', '6', '7'] 

sections ['Cross-Section_No_1_-_Top', 'Cross-Section_No_2_-_5_cm', 'Cross-Section_No_3_-_10_cm', 'Cross-Section_No_4_-_15cm', 'Cross-Section_No_5_-_30cm', 'Cross-Section_No_6_-_60cm', 'Cross-Section_No_7_-_100cm', 'Cross-Section_No_8_-_200cm', 'Mesh-Line_No_1_-_V0cm', 'Mesh-Line_No_2_-_V25cm'] 

where_periods [b'After ON period' b'Before next ON period' b'Mid OFF period'
 b'Mid ON period'] 

where_irrigation [   9.55   19.1    38.2    57.3    76.39  152.79 1680.68] 



#### Basic Data Extraction
* this is how the module combines the metadata and data to build custom mini-collections of data. 
    * This is used later to write into excel documents
    * Also helpful for custom plotting in python (tbd)

In [5]:
section = 'Cross-Section_No_1_-_Top'
scheme = '1'
where_period = b'Before next ON period'
where_irrigation = None #  9.55

li = get_set_from_collection(scheme=scheme,section=section,sample_collection=sample_collection, where_period=where_period, where_irrigation=where_irrigation)
print('Example Content for metadata on cumulative irrigation fraction')
print([l.attrs['section'] for l in li])
print([l.attrs['time_info']['cumulative_irrigation_cm'][0] for l in li])
print([l.attrs['scheme'] for l in li])

Example Content for metadata on cumulative irrigation fraction
['Cross-Section_No_1_-_Top', 'Cross-Section_No_1_-_Top', 'Cross-Section_No_1_-_Top', 'Cross-Section_No_1_-_Top']
[19.1, 38.2, 152.79, 1680.68]
['1', '1', '1', '1']


### Putting it all together for automated Series Creation

##### Reinitializing Everything

In [6]:
directory = 'c:/Users/QuinnHull/ownCloud-active jobs/BHP Chile/2150 - BHP Chile - MEL Permeability Testing Program for SHL Ores/Hydrus model/Sulphide AND SCC'

sample = sample_items(sample_directory=directory)

times = sample.get_unique_times()
schemes = sample.get_unique_schemes()
sections = sample.get_unique_sections()
paths = sample.get_paths()
time_info = sample.get_time_info()
sample_collection = sample.make_sample_collection()

periods = np.unique(time_info['period'])
irrigation_rates = np.unique(time_info['ï»¿cum_irr_cm'])

sections = [key for value, key in enumerate(sections)]
hsections, vsections = sections[:-2], sections[-2:]
schemes = [str(value+1) for value, key in enumerate(schemes)]



#### Saving Directories

In [7]:
save_dir = sample.sample_name 
save_dir_arr = [os.path.join(save_dir, str(period)) for period in periods]
save_dir_arr.append(save_dir)

for save in save_dir_arr:
    if not os.path.exists(save):
        os.makedirs(save)

#### Creating Vertical and Horizontal Sections

In [8]:
for section in hsections:
    write_horizontal_sheet(section=section, schemes=schemes, sample_collection=sample_collection, where_period=b'Before next ON period', where_irrigation=None, save_dir=save_dir,  insert_list=['cumulative_irrigation_cm', 'scheme'])


In [9]:
for section in vsections:
    for where_period in periods:
        write_horizontal_sheet(section=section, schemes=schemes, sample_collection=sample_collection, where_period=where_period, where_irrigation=None, save_dir=os.path.join(save_dir, str(where_period)),  insert_list=['cumulative_irrigation_cm', 'scheme'])


No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
No Data in collection 
