In [2]:
import sys 
import os
import pandas as pd
import numpy as np
from datetime import datetime

In [29]:
# --------------------------------------------------
def create_dict(directory, plants=False):
    dir_list = []
    dir_dict = {}
    cnt = 0
    
    if plants: 
        for p in plants: 
            file_dict = {
                        "plant": p
                    }

            dir_list.append(file_dict)

    else: 
        for root, dirs, files in os.walk(directory):
            match = re.search(r'\d{4}-\d{2}-\d{2}', root)
            date = datetime.strptime(match.group(), '%Y-%m-%d').date()
        
            for f in files:
        
                if '.json' in f:
                    match = re.search(r'\w{8}-\w{4}-\w{4}-\w{4}-\w{12}', f)
        
                    file_dict = {
                        "DATE": os.path.join(str(date), ''),
                        "RAW_DATA_PATH": os.path.join(root, ''),
                        "SUBDIR": os.path.basename(root),
                        "UUID": match.group()
                    }

                    dir_list.append(file_dict)

    dir_dict["DATA_FILE_LIST"] = dir_list

    return dir_dict


# --------------------------------------------------
def bundle_data(file_list, data_per_bundle):
    data_sets = []
    bundle_list = []
    for index, file in enumerate(file_list):
        if index % data_per_bundle == 0 and index != 0:
            bundle = {}
            bundle["DATA_SETS"] = data_sets
            bundle["ID"] = len(bundle_list)
            bundle_list.append(bundle)
            data_sets = []
        data_sets.append(file)
    bundle = {}
    bundle["DATA_SETS"] = data_sets
    bundle["ID"] = len(bundle_list)
    bundle_list.append(bundle)
    
    return bundle_list

In [30]:
df = pd.read_csv('https://data.cyverse.org/dav-anon/iplant/projects/phytooracle/season_10_lettuce_yr_2020/level_3/stereoTop/season10_plant_clustering/stereoTop_full_season_clustering.csv')\
    .drop(['Unnamed: 0', 'Unnamed: 0.1', 'id', 'geometry', 'index_right', 'ID'], axis=1)

In [31]:
file_dict = create_dict('test', df['plant_name'].unique().tolist())["DATA_FILE_LIST"]

In [33]:
file_dict

[{'plant': 'Green_Thunder_72'},
 {'plant': 'Green_Thunder_179'},
 {'plant': 'Green_Thunder_26'},
 {'plant': 'Green_Thunder_27'},
 {'plant': 'Green_Thunder_111'},
 {'plant': 'Green_Thunder_23'},
 {'plant': 'Green_Thunder_100'},
 {'plant': 'Green_Thunder_49'},
 {'plant': 'Green_Thunder_91'},
 {'plant': 'Green_Thunder_112'},
 {'plant': 'Green_Thunder_114'},
 {'plant': 'Green_Thunder_89'},
 {'plant': 'Green_Thunder_4'},
 {'plant': 'Green_Thunder_239'},
 {'plant': 'Green_Thunder_20'},
 {'plant': 'Green_Thunder_113'},
 {'plant': 'Green_Thunder_144'},
 {'plant': 'Green_Thunder_132'},
 {'plant': 'Green_Thunder_172'},
 {'plant': 'Green_Thunder_213'},
 {'plant': 'Green_Thunder_176'},
 {'plant': 'Green_Thunder_10'},
 {'plant': 'Green_Thunder_177'},
 {'plant': 'Green_Thunder_67'},
 {'plant': 'Green_Thunder_135'},
 {'plant': 'Green_Thunder_149'},
 {'plant': 'Green_Thunder_13'},
 {'plant': 'Green_Thunder_97'},
 {'plant': 'Green_Thunder_33'},
 {'plant': 'Green_Thunder_54'},
 {'plant': 'Green_Thunder_

In [36]:
bundle_list = bundle_data(file_list=file_dict, data_per_bundle=1)

In [37]:
bundle_list

[{'DATA_SETS': [{'plant': 'Green_Thunder_72'}], 'ID': 0},
 {'DATA_SETS': [{'plant': 'Green_Thunder_179'}], 'ID': 1},
 {'DATA_SETS': [{'plant': 'Green_Thunder_26'}], 'ID': 2},
 {'DATA_SETS': [{'plant': 'Green_Thunder_27'}], 'ID': 3},
 {'DATA_SETS': [{'plant': 'Green_Thunder_111'}], 'ID': 4},
 {'DATA_SETS': [{'plant': 'Green_Thunder_23'}], 'ID': 5},
 {'DATA_SETS': [{'plant': 'Green_Thunder_100'}], 'ID': 6},
 {'DATA_SETS': [{'plant': 'Green_Thunder_49'}], 'ID': 7},
 {'DATA_SETS': [{'plant': 'Green_Thunder_91'}], 'ID': 8},
 {'DATA_SETS': [{'plant': 'Green_Thunder_112'}], 'ID': 9},
 {'DATA_SETS': [{'plant': 'Green_Thunder_114'}], 'ID': 10},
 {'DATA_SETS': [{'plant': 'Green_Thunder_89'}], 'ID': 11},
 {'DATA_SETS': [{'plant': 'Green_Thunder_4'}], 'ID': 12},
 {'DATA_SETS': [{'plant': 'Green_Thunder_239'}], 'ID': 13},
 {'DATA_SETS': [{'plant': 'Green_Thunder_20'}], 'ID': 14},
 {'DATA_SETS': [{'plant': 'Green_Thunder_113'}], 'ID': 15},
 {'DATA_SETS': [{'plant': 'Green_Thunder_144'}], 'ID': 16},


In [None]:
    season_dict = {
        '10': {
            'scanner3DTop': {
                'containers': {
                    'gantry_notifications': {
                        'simg': 'gantry_notifications.simg', 
                        'dockerhub_path': 'docker://phytooracle/slack_notifications:latest'
                    },
                    'preprocessing': {
                        'simg': '3d_preprocessing.simg', 
                        'dockerhub_path': 'docker://phytooracle/3d_preprocessing:latest'
                    },
                    'sequential_alignment': {
                        'simg': '3d_sequential_align.simg',
                        'dockerhub_path': 'docker://phytooracle/3d_sequential_align:latest'
                    }
                },
                'workflow_1': {
                    'commands': [
                    'jx2json main_workflow_phase1.jx -a bundle_list.json > main_workflow_phase1.json', 
                    'makeflow -T wq --json main_workflow_phase1.json -a -N phytooracle_3d -M phytooracle_3d -r 3 -p 0 -dall -o dall.log $@'
                    ],
                    'outputs': {
                        'pipeline_out': 'preprocessing_out',
                        'tag': 'preprocessed',
                        'outdir': 'preprocessing'
                    }
                },
                'intermediate': {
                    'commands': {
                    'singularity run 3d_sequential_align.simg -i preprocessing_out/ -o sequential_alignment_out/'
                    },
                    'outputs': {
                        'pipeline_out': 'sequential_alignment_out',
                        'tag': 'aligned',
                        'outdir': 'alignment'
                    }
                },
                'workflow_2': {
                    'commands': {
                        'jx2json main_workflow_phase-2.jx -a bundle_list.json > main_workflow_phase2.json', 
                        'makeflow -T wq --json main_workflow_phase2.json -a -r 2 -M phytooracle_3d -N phytooracle_3d -p 60221 -dall -o dall.log --disable-cache $@'
                    },
                    'outputs': {
                        'pipeline_out': '',
                        'tag': '',
                        'outdir': ''
                    }
                }
            }
        }
    }
