In [2]:
# untar.ipynb
# Author: Mollie Gaines
# Last Edited: 2/13/2020
# Purpose: extract INWM raster from DSWE .tar files in a user-specified working directory into 
#          the appropriate year/season folder
# Usage: input directory containing .tar files
# Example Input: Q:/My Drive/Research/DSWE_SE/Bulk Order 1060486/Dynamic Surface Water Extent
# Example Output: Q:/My Drive/Research/data/DSWE_SE/2017/FALL/LC08_CU_019013_20171118_20181203_C01_V01_INWM.tif


import os, tarfile

In [None]:
def untar(in_dir):
    ''' Decompress DSWE tarfiles, extract INWM file,
        return list of raster objects ready to be mosaicked.'''

    print("Beginning Decompression")

    seasons = {'03':'Spring', '04':'Spring', '05':'Spring', \
               '06':'Summer', '07':'Summer', '08':'Summer', \
               '09':'Fall', '10':'Fall', '11':'Fall',\
               '12':'Winter', '01':'Winter', '02':'Winter'}

    # Get list of all tar files in our input directory
    tar_lst = [t for t in os.listdir(in_dir) if t.endswith(".tar")]

    for i in range(len(tar_lst)):
        tarfilename = tar_lst[i]

        yr = tarfilename[15:19]       # index file name to get year of collection
        mnth = tarfilename[19:21]      # index to get month of collection
        szn = seasons[mnth]

        if mnth == '01' or mnth == '02':   # this will make sure our year winter
            yr = str(int(yr) - 1)          # makes sense (Dec18, Jan19, Feb19) for the ./2018/Winter folder
        
        out_dir = os.path.join('../data/DSWE_SE',yr,szn)
            
        inwm_file = tarfilename.replace("SW.tar", "INWM.tif")
        tarfile_path = os.path.join(in_dir, tarfilename)

        tar = tarfile.open(tarfile_path)
        tar.extract(inwm_file, path=out_dir)
        tar.close()

        if not (i % 500): # most folders with tar files have 2,000 so we should get updates every 25%
            print('{}% complete.'.format(i/len(tar_lst) * 100))


    print("Decompression Complete")
    return

In [None]:
print('Please input your .tar files directory: ')
input_dir = input()

untar(input_dir)

In [20]:
not (1000 % 500)

True

In [27]:
print('{} % should = 50'.format(500/1000 * 100))

50.0 % should = 50


In [24]:
500/1000

0.5