In [6]:
import os
import argparse
import glob
import zipfile
from shutil import copy
import sys

def chunker(seq, size):
    return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))

try:
    parser = argparse.ArgumentParser(description='Lazy ingestion of bulk data into FEWS')
    parser.add_argument('root_directory', type=str, default='.', nargs='?',
                       help='directory to run the script in. Default is "."')
    parser.add_argument('include_zip', type=bool, default=True, nargs='?',
                       help='include files in zip-folders. Default is True')
    parser.add_argument('file_mask_regex', default="*.*", nargs='?',
                       help='file name mask regex. Default is "*.*"')
    parser.add_argument('chunk_size', type=int, default=10, nargs='?',
                       help='chunk size to ingest. Default is 10')    
    parser.add_argument('chunk_folder', type=str, default='chunk', nargs='?',
                       help='name of chunk folder. Default is chunk')    

    args = parser.parse_args()
except:
    args = {'root_directory': r'D:\FEWS\trunk\FEWS_VOLTA\ImportBackup\GPM', 
            'include_zip': True, 
            'file_mask_regex': "*.*",
            'chunk_size': 10,
            'chunk_folder': 'chunk'}

args['root_directory']
import os.path
path_inc_regex = os.path.join(args['root_directory'], args['file_mask_regex'])
path_inc_zip = os.path.join(args['root_directory'], '*zip')
path_chunk_folder = os.path.join(args['root_directory'], args['chunk_folder'])
select_regex = list(set(glob.glob(path_inc_regex)) - set(glob.glob(path_inc_zip)))
if args['include_zip'] == True:    
    select_zip = list(set(glob.glob(path_inc_zip)))
if args['include_zip'] == False:
    select_zip = []

# check if chunk folder exist, and create if not
if not os.path.exists(path_chunk_folder):
    print 'created chunk folder'
    os.makedirs(path_chunk_folder)

# check if chunk folder contains data, if not abort script
if os.listdir(path_chunk_folder):
    print path_chunk_folder + ' contains data, wait for FEWS to ingest, or empty manually'
    print 'abort script'    
    sys.exit(0)
    
# chunk folder is empty, get new chunk if possible
chunk = next(chunker(select_regex, args['chunk_size']))
# check if chunk list is not empty
if chunk:
    # chunk list contains files
    print len(chunk)
    for src_file in chunk:
        copy(src_file, path_chunk_folder)
        os.remove(src_file)
    print 'chunk succesfully copied over, abort script'
    sys.exit(0)

# do something if chunk list is empty or not exist
else: 
    print 'no chunk or empty chunk'
    
    # unzip zip folder if zip folders are included and still exist
    if args['include_zip'] == True & select_zip:
        print 'zips included and/or available'
        with zipfile.ZipFile(select_zip[0], "r") as z:
            z.extractall(args['root_directory'])
        
        # remove the unzipped zip folder and abort script
        os.remove(select_zip[0])
        print 'unzip action finished, abort script'
        sys.exit(0)
        
    # do something if no zips are included or not available
    else:
        print 'no zips folders included or not available'
        print 'no files in root directory'
        print 'abort script'
        sys.exit(0)