Skip to content
This repository has been archived by the owner on Jan 12, 2020. It is now read-only.

Commit

Permalink
Put S3 archival into housekeeping
Browse files Browse the repository at this point in the history
  • Loading branch information
keithrozario committed Sep 23, 2018
1 parent 59a15d9 commit f634c9b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
6 changes: 6 additions & 0 deletions custom_config.py
Expand Up @@ -79,3 +79,9 @@
retirement_age = 3 # Only delete backups older than this many days (caveat: max_backups)
max_backups = 2 # keep at least this many backups present (regardless of age)
table_name = 'siteAuditGovMy'

# S3 Buckets (full downloads)
# S3 upload dir is in aws configuration section above
file_max_backups = 7 # always keep at least these many backups
file_retirement_age = 7 # Only archive zip files after their this many days old
archive_directory = 'archives'
36 changes: 35 additions & 1 deletion house_keeping.py
Expand Up @@ -47,4 +47,38 @@
# Get latest list (post-deletion)
response = client.list_backups(TableName=custom_config.dynamo_table_name)
for backup in response['BackupSummaries']:
logger.info('Retained Backup: %s ' % backup)
logger.info('Retained Backup: %s ' % backup)


file_upper_bound = (datetime.now(timezone(timedelta(hours=custom_config.timezone_delta)))
- timedelta(days=custom_config.file_retirement_age))
# list out scans in S3 bucket
logger.info("Retrieving list of files in S3 bucket")
keys = []
client = boto3.client('s3')
file_prefix = custom_config.s3_upload_dir + "/"
response = client.list_objects_v2(Bucket=custom_config.bucket_name,
Prefix=file_prefix)

# Sort response (latest files first)
response['Contents'].sort(key=operator.itemgetter('LastModified'), reverse=True)

# loop through only backups after max_backup:
for content in response['Contents'][custom_config.file_max_backups:]:
file_name = content['Key'][len(file_prefix):]

if file_name == '':
continue # S3 list bucket will return folder as an object (skip folder)
else:
if content['LastModified'] < file_upper_bound:
# Archive file
client.copy({'Bucket': custom_config.bucket_name, 'Key': content['Key']},
custom_config.bucket_name, custom_config.archive_directory + '/' + file_name)
logger.info("%s : Archived" % file_name)
# Delete file
response = client.delete_object(Bucket=custom_config.bucket_name,
Key=content['Key'])
if response['DeleteMarker']:
logger.info("Successfully Deleted %s" % file_name)
else:
logger.info("Unable to delete %s" % file_name)

0 comments on commit f634c9b

Please sign in to comment.