In [12]:
import os
import subprocess
import datetime
import sys
import glob
import tarfile


# Script to tar and gzip the Ultima data

In [13]:
# ------------------------------------------------------------------------------
# System stuff
# ------------------------------------------------------------------------------
# Path of the python file
def get_script_path():
    return os.path.dirname(os.path.realpath(sys.argv[0]))
scriptPath = get_script_path()

# Path to unison.exe
unisonPath = os.path.join(scriptPath, 'Unison', 'Unison-2.40.61 Text.exe')

# Add the unison-required libraries to the path
unisonLibs = os.path.join(scriptPath, 'libs', 'gtk-runtime-2.16.6.0', 'Gtk', 'bin')
if unisonLibs not in sys.path:
    sys.path.append(unisonLibs)
# ------------------------------------------------------------------------------

In [14]:
# ------------------------------------------------------------------------------
# Define paths here. Use single quotes as python expects strings.
# ------------------------------------------------------------------------------
# Mode to run the script in
mode = 'archiving'
# Path to search for local data.
sourcePath = '/Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/'
# Path to where you want the data to end up
targetPath = '/Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/'
# Root variables for the unison command. rootLocal is where to find the
# local archive. rootBackup is where to find the backup drive. rootMobile is
# where to find removable disk. Both back up locations may not be needed. In
# that case leave the unneeded path as an empty string, e.g., ''. Logfiles are
# where you want the log of the synchronisation to be written.
rootLocal = targetPath
rootMobile = ''
logfileMobile = rootMobile + '_logfile.txt'
# Names of the channels. The script will search for targetPath/channel_X.
channel_1 = 'channel 1'
channel_2 = 'channel 2'
channel_3 = 'channel 3'
channel_4 = 'channel 4'
channels = [channel_1, channel_2, channel_3, channel_4]

In [15]:
# The EBG tests were shredded across several directories. The below code spans all of 
# the directories for Kermit and Piggy
import numpy as np
sourcePathAppends_Piggie = ['Piggy/EBG_Harptest' + str(n) for n in np.arange(1,9)]
sourcePathAppends_Piggie.append('Piggy/EBG_Harptest')

sourcePathAppends_Kermit = ['Kermit/EBG_Harptest' + str(n) for n in np.arange(1,5)]
sourcePathAppends_Kermit.append('Kermit/EBG_Harptest')
sourcePathAppends = sourcePathAppends_Kermit + sourcePathAppends_Piggie
sourcePathAll = [sourcePath + sPA for sPA in sourcePathAppends]

In [31]:
# ------------------------------------------------------------------------------
# Sub functions -- actually zips and archives data
# ------------------------------------------------------------------------------
# Python libraries for controlling the tarballing
def make_tarfile(tarName, filesToZip):
    # Open the tarball and prepare it for writing
    with tarfile.open(tarName, "w:gz") as tar:
        # If a wildcard was passed in filesToZip...
        if '*' in filesToZip:
            # Glob the wildcard expressions together
            filesToZip = glob.glob(filesToZip)
            # Add each file to the tarball
            for f in filesToZip:
                tar.add(f, arcname=os.path.basename(f))
        # Else just tarball the whole thing
        else:
            tar.add(filesToZip, arcname=os.path.basename(filesToZip))

def archiveTool(outFile, sourceFile):
    '''
    This helper function creates the command to tar.gz the DTS XML files.
    INPUT:
        outFile = Name of the tar.gz file to create
        sourceFile = Name of the source files to compress. It MUST terminate with a
            wildcare (*) (I think).
    OUTPUT:
        flag = True if the archive was sucessfully created. False if an error was 
            detected.
    '''
    # Check that the directory actually exists.
    if os.path.isdir(sourcePath):
        print('Source files: ' + sourceFile)
        print('Archiving to: ' + outFile)

        # We are on a windows machine (likely the Ultimas/XT)
        if os.name == 'nt':
            mergeCommand = ['bsdtar', '-czf', outFile] + (glob.glob(sourceFile))
            subprocess.check_output(['bsdtar -czf ' + outFile + ' ' + sourceFile])
            try:
                p = subprocess.check_output(mergeCommand)
            # The tar command indicated an error.
            except subprocess.CalledProcessError:
                print('tar failed with non-zero exit.')
                return(False)
            
        # We are on a unix system (likely archiving post-sampling)
        elif os.name == 'posix':
            # Compress xml files
            make_tarfile(outFile, sourceFile)
#             mergeCommand = ['tar', '-zcvf', outFile] + (glob.glob(sourceFile))
#             try: 
#                 p = subprocess.check_output(mergeCommand)
            # The tar command indicated an error.
#             except subprocess.CalledProcessError:
#                 print('tar failed with non-zero exit.')
#                 return(False)
            
    
            # Delete original xml files, just keeping the archives.
#             print('Deleting original files...')
            print('')
#             try:
#                 mergeCommand = ['rm'] + (glob.glob(sourceFile))
#                 p = subprocess.check_output(mergeCommand)
            
#             except subprocess.CalledProcessError:
#                 print('Removing original xml files did not execute.')
#                 return(False)
    
        return(True)
    
    # No files were found, exit and notify.
    else:
        print('Could not find files in specified paths. Please check sourcePath')
        return(False)
    

In [33]:
# ------------------------------------------------------------------------------
# Archive data
# ------------------------------------------------------------------------------
ch = channels[0]
for subpaths in sourcePathAll:
    channelPath = os.path.join(sourcePath, subpaths)

    ########
    # Archive: To archive previously aquired data.
    # Check if the channel directory exists.
    if os.path.isdir(channelPath):
        contents = os.listdir(channelPath)
    # If it doesn't, move on to the next channel
    else:
        continue
    # Only select xml files
    contents = [c for c in contents if '.xml' in c]
    # Sort the file list alphabetically.
    contents.sort()

    # First datetimes
    t = contents[0]
    t = t.split('_')[-1]
    t = t.split('.')[0]
    year = t[0:4]
    month = t[4:6]
    day = t[6:8]
    hour = t[8:10]
    dtInit = datetime.datetime(int(year), int(month), int(day), int(hour), 0)

    # Last datetime
    t = contents[-1]
    t = t.split('_')[-1]
    t = t.split('.')[0]
    year = t[0:4]
    month = t[4:6]
    day = t[6:8]
    hour = t[8:10]
    dtFinal = datetime.datetime(int(year), int(month), int(day), int(hour), 0)

    # Span the time found in the specified directory
    dt = dtInit
    while dt <= dtFinal:
        yyyy = dt.year
        mm = dt.month
        dd = dt.day

        # Hours require special attention
        hh = dt.hour
        if hh < 10:
            hh = '0' + str(hh)
        else:
            hh = str(hh)

        # Create file names for this hour
        dateFileName = '_' + str(yyyy) + str(mm) + str(dd) + '-' + hh
        outFile = os.path.join(channelPath, ch + '_' + str(yyyy) +
            str(mm) + str(dd) + '-' + hh + '.tar.gz')
        sourceFile = os.path.join(channelPath, channel_1
                                  + '_' + str(yyyy) + str(mm) + str(dd) + hh + '*')
        # Zip and archive this time period
        runFlag = archiveTool(outFile, sourceFile)
        if not runFlag:
            print('Error detected... exiting.')
            break
            
        # Iterate the time
        dt = dt + datetime.timedelta(hours=1)

    print('Done with ' + ch + '. Backup files in: ' + targetPath)
# ------------------------------------------------------------------------------

Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest1/channel 1_2017121522*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest1/channel 1_20171215-22.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest1/channel 1_2017121523*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest1/channel 1_20171215-23.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest1/channel 1_2017121600*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest1/channel 1_20171216-00.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest1/channel 1_2017121601*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest1/channel 1_20171216-01.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/E

Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest4/channel 1_2017121705*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest4/channel 1_20171217-05.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest4/channel 1_2017121706*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest4/channel 1_20171217-06.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest4/channel 1_2017121707*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest4/channel 1_20171217-07.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest4/channel 1_2017121708*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest4/channel 1_20171217-08.tar.gz
Done with channel 1. Backup files in: /Users/karllapo/Desktop/data/d

Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest8/channel 1_2017121811*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest8/channel 1_20171218-11.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest8/channel 1_2017121812*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest8/channel 1_20171218-12.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest8/channel 1_2017121813*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest8/channel 1_20171218-13.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest8/channel 1_2017121814*
Archiving to: /Users/karllapo/Desktop/data/darkmix_data_2017/field/EBG/Piggy/EBG_Harptest8/channel 1_20171218-14.tar.gz
Source files: /Users/karllapo/Desktop/data/darkmix_data_2017/field/E

In [11]:
# ------------------------------------------------------------------------------
# Sync to the mobile backup drive.
if os.path.isdir(rootMobile):
    print('Backing up archives to mobile drive')
    print('Syncing ' + rootLocal + ' to ' + rootMobile)
    
    # Check the os. Unix = rsync; Windos = Unison.
    if os.name == 'nt':
        mergeCommand = [unisonPath, rootLocal, rootMobile,
                        '-logfile ' + logfileMobile, ' -force ' + rootLocal,
                        ' -batch -nodeletion ' + rootMobile]
        subprocess.check_output(mergeCommand)
        try:
            p = subprocess.check_output(mergeCommand)
        # The tar command indicated an error.
        except subprocess.CalledProcessError:
            print('Warning: syncing to the mobile backup failed.')

    elif os.name == 'posix':
        # Add a trailing backslash to make rsync behave as expected.
        if not rootMobile[-1] == '/':
            rootMobile = rootMobile + '/'
        if not rootLocal[-1] == '/':
            rootLocal = rootLocal + '/'
        mergeCommand = ['rsync', '-az', rootLocal, rootMobile]
        try:
            p = subprocess.check_output(mergeCommand)
        # The tar command indicated an error.
        except subprocess.CalledProcessError:
            print('Warning: syncing to the mobile backup failed.')
        
else:
    print('Warning: Mobile back-up was not found in the specified path.')
# ------------------------------------------------------------------------------

Backing up archives to mobile drive
Syncing /Users/karllapo/Desktop/archive to /Users/karllapo/Desktop/proj/test_archive/
