diff --git a/custom_config.py b/custom_config.py index 81be61a..42849eb 100644 --- a/custom_config.py +++ b/custom_config.py @@ -79,3 +79,9 @@ retirement_age = 3 # Only delete backups older than this many days (caveat: max_backups) max_backups = 2 # keep at least this many backups present (regardless of age) table_name = 'siteAuditGovMy' + +# S3 Buckets (full downloads) +# S3 upload dir is in aws configuration section above +file_max_backups = 7 # always keep at least these many backups +file_retirement_age = 7 # Only archive zip files after their this many days old +archive_directory = 'archives' \ No newline at end of file diff --git a/house_keeping.py b/house_keeping.py index 18d8d3a..da66151 100644 --- a/house_keeping.py +++ b/house_keeping.py @@ -47,4 +47,38 @@ # Get latest list (post-deletion) response = client.list_backups(TableName=custom_config.dynamo_table_name) for backup in response['BackupSummaries']: - logger.info('Retained Backup: %s ' % backup) \ No newline at end of file + logger.info('Retained Backup: %s ' % backup) + + +file_upper_bound = (datetime.now(timezone(timedelta(hours=custom_config.timezone_delta))) + - timedelta(days=custom_config.file_retirement_age)) +# list out scans in S3 bucket +logger.info("Retrieving list of files in S3 bucket") +keys = [] +client = boto3.client('s3') +file_prefix = custom_config.s3_upload_dir + "/" +response = client.list_objects_v2(Bucket=custom_config.bucket_name, + Prefix=file_prefix) + +# Sort response (latest files first) +response['Contents'].sort(key=operator.itemgetter('LastModified'), reverse=True) + +# loop through only backups after max_backup: +for content in response['Contents'][custom_config.file_max_backups:]: + file_name = content['Key'][len(file_prefix):] + + if file_name == '': + continue # S3 list bucket will return folder as an object (skip folder) + else: + if content['LastModified'] < file_upper_bound: + # Archive file + client.copy({'Bucket': custom_config.bucket_name, 'Key': content['Key']}, + custom_config.bucket_name, custom_config.archive_directory + '/' + file_name) + logger.info("%s : Archived" % file_name) + # Delete file + response = client.delete_object(Bucket=custom_config.bucket_name, + Key=content['Key']) + if response['DeleteMarker']: + logger.info("Successfully Deleted %s" % file_name) + else: + logger.info("Unable to delete %s" % file_name)