# Dataset Definition DataFrame

For easier interaction a user can input a dataframe of runs to process, this is a simple example of how to do that.

In [None]:
import pandas as pd
from mt_metadata.transfer_functions.processing.aurora import Processing

In [None]:
starts = ["2020-01-01T00:00:00", "2020-02-02T00:00:00"]
ends = ["2020-01-31T12:00:00", "2020-02-28T12:00:00"]

data_list = []

for ii in range(3):
    for start, end in zip(starts, ends):
        entry = {
            "station": "mt01",
            "run": f"{ii:03}",
            "start": start,
            "end": end,
            "mth5_path": r"/home/mth5_path.h5" ,
            "sample_rate": 10,
            "input_channels": ["hx", "hy"],
            "output_channels": ["hz", "ex", "ey"],
            "remote": False
        }
        
        data_list.append(entry)
        
        rr_entry_01 = {
            "station": "rr01",
            "run": f"{ii:03}",
            "start": start,
            "end": end,
            "mth5_path": r"/home/mth5_path.h5" ,
            "sample_rate": 10,
            "input_channels": ["hx", "hy"],
            "output_channels": ["hz", "ex", "ey"],
            "remote": True
        }
        data_list.append(rr_entry_01)
        
        rr_entry_02 = {
            "station": "rr02",
            "run": f"{ii:03}",
            "start": start,
            "end": end,
            "mth5_path": r"/home/mth5_path.h5" ,
            "sample_rate": 10,
            "input_channels": ["hx", "hy"],
            "output_channels": ["hz", "ex", "ey"],
            "remote": True
        }
        data_list.append(rr_entry_02)


sdf = pd.DataFrame(data_list)
sdf.start = pd.to_datetime(sdf.start)
sdf.end = pd.to_datetime(sdf.end)

In [None]:
sdf

Unnamed: 0,station_id,run_id,start,end,mth5_path,sample_rate,input_channels,output_channels,remote
0,mt01,0,2020-01-01,2020-01-31 12:00:00,/home/mth5_path.h5,10,"[hx, hy]","[hz, ex, ey]",False
1,rr01,0,2020-01-01,2020-01-31 12:00:00,/home/mth5_path.h5,10,"[hx, hy]","[hz, ex, ey]",True
2,rr02,0,2020-01-01,2020-01-31 12:00:00,/home/mth5_path.h5,10,"[hx, hy]","[hz, ex, ey]",True
3,mt01,0,2020-02-02,2020-02-28 12:00:00,/home/mth5_path.h5,10,"[hx, hy]","[hz, ex, ey]",False
4,rr01,0,2020-02-02,2020-02-28 12:00:00,/home/mth5_path.h5,10,"[hx, hy]","[hz, ex, ey]",True
5,rr02,0,2020-02-02,2020-02-28 12:00:00,/home/mth5_path.h5,10,"[hx, hy]","[hz, ex, ey]",True
6,mt01,1,2020-01-01,2020-01-31 12:00:00,/home/mth5_path.h5,10,"[hx, hy]","[hz, ex, ey]",False
7,rr01,1,2020-01-01,2020-01-31 12:00:00,/home/mth5_path.h5,10,"[hx, hy]","[hz, ex, ey]",True
8,rr02,1,2020-01-01,2020-01-31 12:00:00,/home/mth5_path.h5,10,"[hx, hy]","[hz, ex, ey]",True
9,mt01,1,2020-02-02,2020-02-28 12:00:00,/home/mth5_path.h5,10,"[hx, hy]","[hz, ex, ey]",False


In [None]:
p = Processing()
p.stations.from_dataset_dataframe(sdf)

In [None]:
p

{
    "processing": {
        "channel_nomenclature.ex": "ex",
        "channel_nomenclature.ey": "ey",
        "channel_nomenclature.hx": "hx",
        "channel_nomenclature.hy": "hy",
        "channel_nomenclature.hz": "hz",
        "decimations": [],
        "id": null,
        "stations.local.id": "mt01",
        "stations.local.mth5_path": "/home/mth5_path.h5",
        "stations.local.remote": false,
        "stations.local.runs": [
            {
                "run": {
                    "id": "000",
                    "input_channels": [
                        {
                            "channel": {
                                "id": "hx",
                                "scale_factor": 1.0
                            }
                        },
                        {
                            "channel": {
                                "id": "hy",
                                "scale_factor": 1.0
                            }
                        }
       

In [None]:
df2 = p.stations.to_dataset_dataframe()
df2

  local_df = local_df.append(rr.to_dataset_dataframe())
  local_df = local_df.append(rr.to_dataset_dataframe())


Unnamed: 0,index,station_id,run_id,start,end,mth5_path,sample_rate,input_channels,output_channels,remote,channel_scale_factors
0,0,mt01,0,2020-01-01 00:00:00+00:00,2020-01-31 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
1,1,mt01,0,2020-02-02 00:00:00+00:00,2020-02-28 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
2,2,mt01,1,2020-01-01 00:00:00+00:00,2020-01-31 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
3,3,mt01,1,2020-02-02 00:00:00+00:00,2020-02-28 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
4,4,mt01,2,2020-01-01 00:00:00+00:00,2020-01-31 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
5,5,mt01,2,2020-02-02 00:00:00+00:00,2020-02-28 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
6,0,rr01,0,2020-01-01 00:00:00+00:00,2020-01-31 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",True,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
7,1,rr01,0,2020-02-02 00:00:00+00:00,2020-02-28 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",True,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
8,2,rr01,1,2020-01-01 00:00:00+00:00,2020-01-31 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",True,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
9,3,rr01,1,2020-02-02 00:00:00+00:00,2020-02-28 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",True,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."


In [None]:
df2.reset_index()

Unnamed: 0,level_0,index,station_id,run_id,start,end,mth5_path,sample_rate,input_channels,output_channels,remote,channel_scale_factors
0,0,0,mt01,0,2020-01-01 00:00:00+00:00,2020-01-31 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
1,1,1,mt01,0,2020-02-02 00:00:00+00:00,2020-02-28 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
2,2,2,mt01,1,2020-01-01 00:00:00+00:00,2020-01-31 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
3,3,3,mt01,1,2020-02-02 00:00:00+00:00,2020-02-28 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
4,4,4,mt01,2,2020-01-01 00:00:00+00:00,2020-01-31 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
5,5,5,mt01,2,2020-02-02 00:00:00+00:00,2020-02-28 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",False,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
6,6,0,rr01,0,2020-01-01 00:00:00+00:00,2020-01-31 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",True,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
7,7,1,rr01,0,2020-02-02 00:00:00+00:00,2020-02-28 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",True,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
8,8,2,rr01,1,2020-01-01 00:00:00+00:00,2020-01-31 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",True,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."
9,9,3,rr01,1,2020-02-02 00:00:00+00:00,2020-02-28 12:00:00+00:00,/home/mth5_path.h5,10.0,"[hx, hy]","[hz, ex, ey]",True,"{'hx': 1.0, 'hy': 1.0, 'hz': 1.0, 'ex': 1.0, '..."


In [None]:
from mth5.mth5 import MTH5
from mt_metadata import MT_EXPERIMENT_MULTIPLE_RUNS
from mt_metadata.timeseries import Experiment

In [None]:
experiment = Experiment()
experiment.from_xml(MT_EXPERIMENT_MULTIPLE_RUNS)

In [None]:
m = MTH5()
m.open_mth5("test_dataset_definition.h5", "w")

[1m2023-09-26T11:52:45.573114-0700 | INFO | mth5.mth5 | _initialize_file | Initialized MTH5 0.2.0 file test_dataset_definition.h5 in mode w[0m


In [None]:
m.from_experiment(experiment)

In [None]:
m.channel_summary.clear_table()
m.channel_summary.summarize()
channel_df = m.channel_summary.to_dataframe()
channel_df

Unnamed: 0,survey,station,run,latitude,longitude,elevation,component,start,end,n_samples,sample_rate,measurement_type,azimuth,tilt,units,hdf5_reference,run_hdf5_reference,station_hdf5_reference
0,CONUS South,UTS14,a,37.563198,-113.301663,2490.775,ex,2020-07-05 23:19:41+00:00,2020-07-06 00:11:55+00:00,3134,1.0,electric,11.193362,0.0,counts,<HDF5 object reference>,<HDF5 object reference>,<HDF5 object reference>
1,CONUS South,UTS14,a,37.563198,-113.301663,2490.775,ey,2020-07-05 23:19:41+00:00,2020-07-06 00:11:55+00:00,3134,1.0,electric,101.193362,0.0,counts,<HDF5 object reference>,<HDF5 object reference>,<HDF5 object reference>
2,CONUS South,UTS14,a,37.563198,-113.301663,2490.775,hx,2020-07-05 23:19:41+00:00,2020-07-06 00:11:55+00:00,3134,1.0,magnetic,11.193362,0.0,counts,<HDF5 object reference>,<HDF5 object reference>,<HDF5 object reference>
3,CONUS South,UTS14,a,37.563198,-113.301663,2490.775,hy,2020-07-05 23:19:41+00:00,2020-07-06 00:11:55+00:00,3134,1.0,magnetic,101.193362,0.0,counts,<HDF5 object reference>,<HDF5 object reference>,<HDF5 object reference>
4,CONUS South,UTS14,a,37.563198,-113.301663,2490.775,hz,2020-07-05 23:19:41+00:00,2020-07-06 00:11:55+00:00,3134,1.0,magnetic,0.0,90.0,counts,<HDF5 object reference>,<HDF5 object reference>,<HDF5 object reference>
5,CONUS South,UTS14,b,37.563198,-113.301663,2490.775,ex,2020-07-06 00:32:41+00:00,2020-07-20 17:43:45+00:00,1271464,1.0,electric,11.193368,0.0,counts,<HDF5 object reference>,<HDF5 object reference>,<HDF5 object reference>
6,CONUS South,UTS14,b,37.563198,-113.301663,2490.775,ey,2020-07-06 00:32:41+00:00,2020-07-20 17:43:45+00:00,1271464,1.0,electric,101.193368,0.0,counts,<HDF5 object reference>,<HDF5 object reference>,<HDF5 object reference>
7,CONUS South,UTS14,b,37.563198,-113.301663,2490.775,hx,2020-07-06 00:32:41+00:00,2020-07-20 17:43:45+00:00,1271464,1.0,magnetic,11.193368,0.0,counts,<HDF5 object reference>,<HDF5 object reference>,<HDF5 object reference>
8,CONUS South,UTS14,b,37.563198,-113.301663,2490.775,hy,2020-07-06 00:32:41+00:00,2020-07-20 17:43:45+00:00,1271464,1.0,magnetic,101.193368,0.0,counts,<HDF5 object reference>,<HDF5 object reference>,<HDF5 object reference>
9,CONUS South,UTS14,b,37.563198,-113.301663,2490.775,hz,2020-07-06 00:32:41+00:00,2020-07-20 17:43:45+00:00,1271464,1.0,magnetic,0.0,90.0,counts,<HDF5 object reference>,<HDF5 object reference>,<HDF5 object reference>


### Run Summary [new fewture in MTH5]
Compresses the table down to individual runs.

In [None]:
m.run_summary

In [13]:
m.close_mth5()

[1m2023-09-26T11:52:46.392872-0700 | INFO | mth5.mth5 | close_mth5 | Flushing and closing test_dataset_definition.h5[0m
