Scratchwork and loose record keeping for how I have been updating metadata fields to reflect newest version of experiment and harmonize import into database. 

* Using AWS and rclone to sync **FROM:s3 TO:local** (using exclude file):
    * `rclone sync antenna-aws:antenna-fs ~/data/antenna-fs --verbose -P --exclude-from sync_exclude.txt --dry-run`
* Sync **FROM:local TO:s3** using an exclude file (using exclude file):
    * `rclone sync ~/data/antenna-fs antenna-aws:antenna-fs --verbose -P --exclude-from sync_exclude.txt --dry-run`
* remove `--dry-run` after testing

In [1]:
import json
import tqdm
from itertools import chain
from pathlib import Path

In [26]:
%pdb off

Automatic pdb calling has been turned OFF


Add `probe_angle` field to metadata since it may now change with a new platform/holder.

```
data_path = Path('~/data/antenna-fs/').expanduser()
session_json_paths = data_path.glob('*/exp_ephys_*/*/session.json')

for session_json_path in tqdm.tqdm(session_json_paths):
    session_meta = json.load(session_json_path.open())
    if not 'probe_angle' in session_meta.keys():
        session_meta.update({'probe_angle': 15})
        with open(session_json_path.as_posix(), 'w') as file:
            json.dump(session_meta, file, indent=4) # matches formatting from matlab
```

Rename `params_hash_piezo` to `stim_hash_piezo` in all files where it is required

```
data_path = Path('~/data/antenna-fs/').expanduser().joinpath('raw')
trial_json_paths = data_path.glob('exp_ephys_*/session_*/*/*.json')

for trial_json_path in tqdm.tqdm(trial_json_paths):
    # rsync/osx garbage
    if trial_json_path.parts[-1][0] != '.':
        trial_meta = json.load(open(trial_json_path))
        if not 'stim_hash_piezo' in trial_meta.keys():
            trial_meta.update({'stim_hash_piezo': trial_meta['params_hash_piezo']})
            trial_meta.pop('params_hash_piezo')

            with open(trial_json_path.as_posix(), 'w') as file:
                json.dump(trial_meta, file, indent=4) # matches formatting from matlab
```

Add `antenna_side` because (yay!) I can now choose based on driver line. All old experiments in this dataset (so, NOT suction electrode experiments) use the _fly's_ right side. 

```
data_path = Path('~/data/antenna-fs/').expanduser()
session_json_paths = data_path.glob('*/exp_ephys_*/*/session.json')

for session_json_path in tqdm.tqdm(session_json_paths):
    session_meta = json.load(session_json_path.open())
    if not 'antenna_side' in session_meta.keys():
        session_meta.update({'antenna_side': 'right'})
        with open(session_json_path.as_posix(), 'w') as file:
            json.dump(session_meta, file, indent=4) # matches formatting from matlab
```

Add `physiology_objective` field to metadata since it affects probe travel calculation and multiple have now been used. (will manually edit the json files where I have used a different objective before syncing)

```
data_path = Path('~/data/antenna-fs/').expanduser()
session_json_paths = data_path.glob('*/exp_ephys_*/*/session.json')

for session_json_path in tqdm.tqdm(session_json_paths):
    session_meta = json.load(session_json_path.open())
    if not 'physiology_objective' in session_meta.keys():
        session_meta.update({'physiology_objective': 'OLYMPUS_LUMFLN60XW'})  # other is 'OLYMPUS_LUMPLFLN40XW'
        with open(session_json_path.as_posix(), 'w') as file:
            json.dump(session_meta, file, indent=4) # matches formatting from matlab
```

Change from a `use_ignore_trials.json` to a `trial_id_flags.csv` file with all of the `trial_ids` and 0 vs 1 for each

```
import os
import pandas as pd
import numpy as np

data_path = Path('~/data/antenna-fs/').expanduser()
use_ignore_json_paths = data_path.glob('*/exp_ephys_*/*/use_ignore_trials.json')

for use_ignore_json_path in use_ignore_json_paths:
    # gather all the trial ids...
    trial_meta_paths = use_ignore_json_path.parent.glob('./*/*.json')

    trial_ids = np.sort([json.load(trial_meta_path.open())['trial_id'] for trial_meta_path in trial_meta_paths])

    use_ignore_dict = json.load(use_ignore_json_path.open())

    use_df = pd.DataFrame(data={'trial_id': trial_ids, 'use_flag': np.ones_like(trial_ids)})

    if use_ignore_dict['start_trial'] > 0:
        use_df.loc[use_df.trial_id<use_ignore_dict['start_trial'], 'use_flag'] = 0

    if use_ignore_dict['end_trial'] != -1:
        use_df.loc[use_df.trial_id>use_ignore_dict['end_trial'], 'use_flag'] = 0

    if len(use_ignore_dict['ignore_trials']) > 0:
        for trial_id in use_ignore_dict['ignore_trials']:
            use_df.loc[use_df.trial_id==trial_id, 'use_flag'] = 0

    # set the calibration trials and unstructured to use_flag = 1 by default
    for folder in ['camera_calibration','unstructured_recordings']:
        calib_json_paths = use_ignore_json_path.parent.glob('./'+folder+'/*.json')
        calib_trial_ids = np.sort([json.load(calib_json_path.open())['trial_id'] for calib_json_path in calib_json_paths])
        if len(calib_trial_ids) > 0:
            for trial_id in calib_trial_ids:
                use_df.loc[use_df.trial_id==trial_id, 'use_flag'] = 1

    use_csv_path = use_ignore_json_path.parent.joinpath('trial_id_flags.csv')
    use_df.to_csv(use_csv_path.as_posix(), index=False)
    
# for use_ignore_json_path in use_ignore_json_paths:
#     os.remove(use_ignore_json_path.as_posix())

```