# Info
Purpose:

    Take incoming folder for sunsat and parse the subfolders and incoming files

Input:

    None

Output:

    Moved and catagorized files

Keywords:

    none

Dependencies:

    - os
    - dateutil
    - re
    - pathlib2
    - datefinder

Needed Files:
  - None

Modification History:

    Written: Samuel LeBlanc, Santa Cruz, CA, 2020-11-06
    Modified: Samuel LeBlanc, Santa Cruz, CA, 2020-12-02
             - added support for files without a date in the name, using either the directories' date, or the file's date.


# Set up the background functions

In [2]:
from __future__ import print_function 
from parse_and_move_incoming_fx import get_date_and_string, \
     pull_labels, get_season, filetypes, get_newfilepath, get_filters_from_json, \
     recurse_through_dir, make_temp_mfile, move_files

# Prepare the command line argument parser

In [11]:
import argparse

In [12]:
long_description = """    Run the incoming file parser and moves the files to the desired sunsat locations
    The File locations and folders are defined by the json file: .filters.json
    Please update the date ranges within that file for any new field mission, 
        if not then assumes rooftop measurements for the season_year
    Can run a call to matlab for any incoming 4STAR raw data"""

In [13]:
parser = argparse.ArgumentParser(description=long_description)
parser.add_argument('-d','--dry_run',help='if set, turn on dry runs, and not move or delete any file/folder',
                    action='store_true')
parser.add_argument('-q','--quiet',help='if set, quiet the comments',
                    action='store_true')
parser.add_argument('-i','--in_dir',nargs='?',
                    help='Input directory to recurse files, parse, and move',
                    default='/data/sunsat/_incoming_gdrive/')
parser.add_argument('-r','--root_dir',nargs='?',
                    help='full file path of the root directory to save to',
                    default='/data/sunsat/')
parser.add_argument('-m','--run_matlab',help='if set, will run the matlab calls if there is 4STAR/4STARB raw files',
                    action='store_true')

_StoreTrueAction(option_strings=['-m', '--run_matlab'], dest='run_matlab', nargs=0, const=True, default=False, type=None, choices=None, help='if set, will run the matlab calls if there is 4STAR/4STARB raw files', metavar=None)

In [14]:
in_ = vars(parser.parse_known_args()[0])

# Load the modules and get the defaults

In [15]:
import os, zipfile
import dateutil.parser
import re
from pathlib2 import Path
from datefinder import find_dates
from datetime import date, datetime
import json
import filecmp
import subprocess
import threading
from aeronet import get_AERONET_file_v2

In [16]:
in_directory = in_.get('in_dir','/data/sunsat/_incoming_gdrive/')
root_folder = in_.get('root_dir','/data/sunsat/')

In [17]:
verbose = not in_.get('quiet',False)
dry_run = in_.get('dry_run',True)
run_matlab = in_.get('run_matlab',False)

In [18]:
if verbose: print( in_)

{'in_dir': '/data/sunsat/_incoming_gdrive/', 'root_dir': '/data/sunsat/', 'quiet': False, 'dry_run': False, 'run_matlab': False}


In [19]:
# Go through and unzip any folder
prefix = '*DRY RUN*: ' if dry_run else ''
for item in os.listdir(in_directory): # loop through items in dir
    if item.lower().endswith('.zip'): # check for ".zip" extension
        file_name = Path(in_directory+item) # get full path of files
        zip_ref = zipfile.ZipFile(str(file_name)) # create zipfile object
        if verbose: 
            print( '{prefix}found zip file: {file_name}, extracting here.'.format(prefix=prefix,file_name=file_name))
        if not dry_run: 
            file_name.parent.joinpath(file_name.stem).mkdir(parents=True,exist_ok=True) # make a dir to extract to
            zip_ref.extractall(str(file_name.parent.joinpath(file_name.stem))) # extract file to dir
        zip_ref.close() # close file
        if not dry_run: os.remove(str(file_name)) # delete zipped file

In [21]:
filters = get_filters_from_json(in_directory)
fl_arr = recurse_through_dir(in_directory,verbose=verbose,filters=filters)

201115_201115_NASA_Ames.lev15 -> None None NASA_Ames 2020-11-15 00:00:00 201115 201115_201115_NASA_Ames
20201112_4STARsisters_AmesRoof -> None 4STARsisters_AmesRoof None 2020-11-12 00:00:00 20201112 20201112_4STARsisters_AmesRoof
20201112_4STARB -> 4STARB 4STARsisters_AmesRoof None 2020-11-12 00:00:00 20201112 20201112_4STARB
4STARB_20201112_003_NIR_SUN.dat -> 4STARB 4STARsisters_AmesRoof 003_NIR_SUN 2020-11-12 00:00:00 20201112 4STARB_20201112_003_NIR_SUN
4STARB_20201112_001_VIS_SUN.dat -> 4STARB 4STARsisters_AmesRoof 001_VIS_SUN 2020-11-12 00:00:00 20201112 4STARB_20201112_001_VIS_SUN
4STARB_20201112_004_TRACK.dat -> 4STARB 4STARsisters_AmesRoof 004_TRACK 2020-11-12 00:00:00 20201112 4STARB_20201112_004_TRACK
4STARB_20201112_003_VIS_SUN.dat -> 4STARB 4STARsisters_AmesRoof 003_VIS_SUN 2020-11-12 00:00:00 20201112 4STARB_20201112_003_VIS_SUN
4STARB_20201112_002_TRACK.dat -> 4STARB 4STARsisters_AmesRoof 002_TRACK 2020-11-12 00:00:00 20201112 4STARB_20201112_002_TRACK
4STARB_20201112_001

In [22]:
data_raw_found, data_raw_files, nexact, nmoved, ncreated, ndataraw =\
                move_files(fl_arr,filters,verbose=verbose,dry_run=dry_run)

~Moving file from /data/sunsat/_incoming_gdrive/201115_201115_NASA_Ames.lev15
   to new path: /data/sunsat/rooftop/Fall_2020/data_processed/aeronet/201115_201115_NASA_Ames.lev15
Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STARB_20201112_4STARsisters_AmesRoof/4STARB_20201112_003_NIR_SUN.dat, removing incoming file
Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STARB_20201112_4STARsisters_AmesRoof/4STARB_20201112_001_VIS_SUN.dat, removing incoming file
Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STARB_20201112_4STARsisters_AmesRoof/4STARB_20201112_004_TRACK.dat, removing incoming file
Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STARB_20201112_4STARsisters_AmesRoof/4STARB_20201112_003_VIS_SUN.dat, removing incoming file
Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STARB_20201112_4STARsisters_AmesRoof/4STARB_20201112_002_TRACK.dat, remo

Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STAR_20201104_4STARboth_AmesRoof_Clear/4STAR_20201104_002_TRACK.dat, removing incoming file
Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STAR_20201104_4STARboth_AmesRoof_Clear/4STAR_20201105_002_TRACK.dat, removing incoming file
Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STAR_20201104_4STARboth_AmesRoof_Clear/4STAR_20201105_008_TRACK.dat, removing incoming file
Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STAR_20201104_4STARboth_AmesRoof_Clear/4STAR_20201105_007_TRACK.dat, removing incoming file
Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STAR_20201104_4STARboth_AmesRoof_Clear/4STAR_20201105_012_TRACK.dat, removing incoming file
Exact same file already exists at: /data/sunsat/rooftop/Fall_2020/data_raw/4STARB_20201104_4STARboth_AmesRoof_Clear/4STARB_20201104_003_VIS_SKYA.dat, removing i

In [65]:
# check if there are raw data files, and if so, get the aeronet
if data_raw_found:
    daystrss = []
    for k in data_raw_files.keys():
        instname,daystr0 = k.split('_')
        fa_tmp = filetypes('{daystr}_AERONET_NASA_Ames.lev15'.format(instname=instname,daystr=daystr0))
        fmla_tmp = get_newfilepath(fa_tmp,filters=filters,fake_file=True,root_folder=root_folder)
        if not daystr in daystrss:
            daystrss.append(daystr0)
            if fa_tmp.campaign.find('rooftop') >= 0:
                aeronet_file = get_AERONET_file_v2(date=fa_tmp.fdate,site='NASA_Ames',path=str(fa_tmp.newpath))
                if verbose: print('Downloaded AERONET file: {}'.format(aeronet_file))

Downloaded AERONET file: /data/sunsat/rooftop/Fall_2020/data_processed/aeronet/201112_201112_NASA_Ames.lev15




  soup = BeautifulSoup(html)


In [66]:
# clean up folders after move
for dirpath, dirnames, filenames in os.walk(in_directory,topdown=False):
    if not dirpath in in_directory:
        try: 
            if verbose: print( '{pre}-removing :{path}'.format(pre=prefix,path=dirpath))
            if not dry_run:
                os.rmdir(dirpath) 
        except: 
            pass

-removing :/data/sunsat/_incoming_gdrive/20201112_4STARsisters_AmesRoof/20201112_4STARB
-removing :/data/sunsat/_incoming_gdrive/20201112_4STARsisters_AmesRoof/20201112_4STARA
-removing :/data/sunsat/_incoming_gdrive/20201112_4STARsisters_AmesRoof
-removing :/data/sunsat/_incoming_gdrive/20201104_4STARboth_AmesRoof_Clear/20201104_4STARA
-removing :/data/sunsat/_incoming_gdrive/20201104_4STARboth_AmesRoof_Clear/20201104_4STARB
-removing :/data/sunsat/_incoming_gdrive/20201104_4STARboth_AmesRoof_Clear


In [75]:
nmats = 0
if run_matlab:
    prefix = '*DRY RUN*: ' if dry_run else ''
    for dr,drs in data_raw_files.items():
        # get the position of the new star.mat and starsun.mat files
        f = filetypes('{}star.mat'.format(dr),filters=filters)
        fml = get_newfilepath(f,filters=filters,fake_file=True,root_folder=root_folder)
        if not dry_run: f.newpath.mkdir(parents=True,exist_ok=True)
        fs = filetypes('{}starsun.mat'.format(dr),filters=filters)
        fmls = get_newfilepath(fs,filters=filters,fake_file=True,root_folder=root_folder)
        if not dry_run: fs.newpath.mkdir(parents=True,exist_ok=True)
            
        # make the position of the new quicklook file
        instname,daystr = dr.split('_')
        fq = filetypes('{daystr}_{instname}_Quicklooks.pptx'.format(instname=instname,daystr=daystr))
        fmlq = get_newfilepath(fq,filters=filters,fake_file=True,root_folder=root_folder)
        if not dry_run: fq.newpath.mkdir(parents=True,exist_ok=True)
        
        # make the position of the new figure files
        ff = filetypes('{daystr}_{instname}_plots.png'.format(instname=instname,daystr=daystr))
        fmlf = get_newfilepath(ff,filters=filters,fake_file=True,root_folder=root_folder)
        if not dry_run: ff.newpath.parent.mkdir(parents=True,exist_ok=True)
        
        # make the position of the aeronet files
        fa = filetypes('{daystr}_AERONET_NASA_Ames.lev15'.format(instname=instname,daystr=daystr))
        fmla = get_newfilepath(fa,filters=filters,fake_file=True,root_folder=root_folder)
        if not dry_run: fa.newpath.mkdir(parents=True,exist_ok=True)
        
        # make the position of the gas_summary files
        fg = filetypes('{instname}_{daystr}_gas_summary.mat'.format(instname=instname,daystr=daystr))
        fmlg = get_newfilepath(fg,filters=filters,fake_file=True,root_folder=root_folder)
        if not dry_run: fg.newpath.mkdir(parents=True,exist_ok=True)
        
        # make a string of the raw files    
        filelist = "'"+"';'".join(drs)+"'"
        if not f.instname in ['4STAR','4STARB']: # only for 4STARs for now.
            continue
            
        mfile = make_temp_mfile(in_directory+'temp.m',filelist=filelist,starmat=str(f.newfile),\
                                starsun=str(fs.newfile),quicklooks=in_directory+str(fq.newfile.name),\
                                fig_path=str(ff.newpath.parent)+'/', aero_path=str(fa.newpath)+'/',\
                                gas_path=str(fg.newpath)+'/', sun_path=str(fs.newpath)+'/',incoming_path=in_directory)

        if verbose: 
            print( ' '.join(['{}matlab'.format(prefix),'-nodisplay','-batch',"{}".format(Path(mfile).stem)]))
        if not dry_run:
            pmfile = Path(mfile)
            os.chdir(str(pmfile.parent))
            process = subprocess.Popen(['matlab','-nodisplay','-batch',"{}".format(pmfile.stem)],
                                       shell=False, stdout=subprocess.PIPE,stderr=subprocess.PIPE)

            while True:
                # handle output by direct access to stdout and stderr
                output = process.stdout.readline()
                if process.poll() is not None:
                    break
                if output:
                    if verbose: print(output.strip())
            rc = process.poll()
            if verbose: print(rc)
            nmats = nmats + 1
                
            if rc==0:
                os.remove(mfile)

*DRY RUN*: matlab -nodisplay -batch temp


In [140]:
print(datetime.now().strftime("%c")+' :Python moved {nmoved} files, Created {ncreated} folders, found {ndataraw} files, and generated {nmats} starmats/suns'\
      .format(nmoved=nmoved,ncreated=ncreated,ndataraw=ndataraw,nmats=nmats))

'Thu Dec  3 10:24:19 2020'