In [36]:
from pathlib import Path
from io import StringIO
import os
import json

import girder_client

class ExporterBase:        
    def _enforce_directory_structure(self, hidebound_dir):
        data = Path(hidebound_dir, 'data')
        meta = Path(hidebound_dir, 'metadata')
        asset_dir = Path(meta, 'asset')
        file_dir = Path(meta, 'file')
        for path in [data, meta, asset_dir, file_dir]:
            if not path.is_dir():
                msg = f'{path.as_posix()} directory does not exist.'
                raise FileNotFoundError(msg)
        
    def export(self, hidebound_dir):
        self._enforce_directory_structure(hidebound_dir)

        asset_dir = Path(hidebound_dir, 'metadata', 'asset')
        file_dir = Path(hidebound_dir, 'metadata', 'file')
        for asset in os.listdir(asset_dir):
            
            # export asset
            asset = Path(asset_dir, asset)
            with open(asset) as f:
                asset_meta = json.load(f)
            self._export_asset(asset_meta)

            # export files
            filepaths = [Path(file_dir, f'{x}.json') for x in asset_meta['file_ids']]
            for filepath in filepaths:
                filepath = Path(file_dir, filepath)
                with open(filepath) as f:
                    file_meta = json.load(f)
                self._export_file(file_meta)

    def _export_asset(self, metadata):
        msg = '_export_asset method must be implemented by subclass.'
        raise NotImplementedError(msg)
    
    def _export_file(self, metadata):
        msg = '_export_file method must be implemented by subclass.'
        raise NotImplementedError(msg)
        
class DsaExporter(ExporterBase):
    def __init__(self, token, root_id, root_type='collection', host='0.0.0.0', port=8080):
        # sudo ip addr show docker0 | grep inet | grep docker0 | awk '{print $2}' | sed 's/\/.*//'
        # will give you the ip address of the docker network which binds to localhost
        client = girder_client.GirderClient(apiUrl=f'http://{host}:{port}/api/v1')
        client.authenticate(apiKey=token)
        self._client = client
        self._root_id = root_id
        self._root_type = root_type
    
    def _export_dirs(self, dirpath, metadata={}):
        dirs = Path(dirpath).parts
        
        # if dirpath has no parents then export to root with metadata
        if len(dirs) == 1:
            return self._client.createFolder(
                self._root_id,
                dirs[0],
                metadata=metadata,
                reuseExisting=True,
                parentType=self._root_type,
            )

        # if dirpath has parents then export all parent directories
        response = dict(_id=self._root_id)
        parent_type = self._root_type
        for dir_ in dirs[:-1]:
            response = self._client.createFolder(
                response['_id'],
                dir_,
                reuseExisting=True,
                parentType=parent_type
            )
            parent_type = 'folder'
            
        # then export last directory with metadata         
        return self._client.createFolder(
            response['_id'],
            dirs[-1],
            metadata=metadata,
            reuseExisting=True,
            parentType='folder',
        )
    
    def _export_asset(self, metadata):
        if metadata['asset_type'] != 'file':
            self._export_dirs(metadata['asset_path_relative'], metadata=metadata)
                
    def _export_file(self, metadata):
        parent_dir = Path(metadata['filepath_relative']).parent
        response = self._export_dirs(parent_dir)

        parent_id = response['_id']
        parent_id = self._client.createItem(parent_id, metadata['filename'], metadata=metadata)['_id']
        response = self._client.uploadFileToItem(parent_id, metadata['filepath'])
        return response
    
token = 'eyS0nj9qPC5E7yK5l7nhGVPqDOBKPdA3EC60Rs9h'
host = '172.17.0.1'
root_id = '5ed735c8d8dd6242642406e5'
uploader = DsaExporter(token, root_id, host=host)
uploader.export('/tmp/hidebound')