# Download Low Rate Data Logger Files From AWS S3 - All Data for a Logger That Exists on S3

Download all files from a logger and concatenate to a single file.

- Get list of contents of lake.
- For a specific logger:
    - Download each day file
    - Concatenate all of these
    - Remove the header for each day
- Data file produced is called as 'loggername_Table1.csv'

**This version uses list_objects_v2 to find the objects to download**

In [1]:
import boto3
import os
import glob
import shutil

In [2]:
#S3 bucket name
bucket = 'dev-data-logger-lake.geonet.org.nz'

#folder for downloaded daily CSV files
dltmp = '/home/sherburn/GeoNet/datalogger/auto_download/tmp'
#top folder to save final CSV files
dlsav = '/home/sherburn/GeoNet/datalogger/auto_download'

#temporary file, concatenated but with daily headers
tmpfile = os.path.join(dlsav, 'tmpfile.csv')

#list of loggers to download data from
loggers = ['infernocratertest', 'lowertemaari', 'foxglacierlandslide']

In [3]:
def remove_duplines(tmpfile, completefile):
    lines_seen = [] # holds lines already seen
    outfile = open(completefile, 'w')
    for line in open(tmpfile, 'r'):
        if line not in lines_seen: # not a duplicate
            outfile.write(line)
            lines_seen.append(line)
    outfile.close()

In [4]:
#create client
s3 = boto3.client('s3')

#list contents of bucket
list=s3.list_objects_v2(Bucket=bucket)['Contents']

#loop for each logger 
for logger in loggers:
    print ('downloading from logger:', logger)
    
    os.makedirs(dltmp, exist_ok=True) #make tmp directory for downloaded files
    for key in list:
        if ((logger in key['Key']) and ('Table1' in key['Key'])):
            #print (key['Key'])
            s3.download_file(bucket, key['Key'], os.path.join(dltmp, os.path.basename(key['Key'])))

    #concat all files for the logger
    concatfile = tmpfile
    files = glob.glob(os.path.join(dltmp, '*.csv'))
    files.sort() #to get data in time order
    with open(concatfile, 'w') as outfile:
        for file in files:
            with open(file, 'r') as readfile:
                shutil.copyfileobj(readfile, outfile)

    shutil.rmtree(dltmp)#remove tmp directory for downloaded files
    
    #remove unwanted header lines from temporary file
    completefile = os.path.join(dlsav, logger, logger+'_Table1.csv')
    remove_duplines(tmpfile, completefile)
    #remove temporary file
    os.remove(tmpfile)

downloading from logger: infernocratertest
downloading from logger: lowertemaari
downloading from logger: foxglacierlandslide
