Skip to content

Commit

Permalink
Common: fixed deleteFromS3 to delete all files, not just first 1000 (c…
Browse files Browse the repository at this point in the history
…loses #18)
  • Loading branch information
alexanderdean committed Aug 25, 2015
1 parent 1482614 commit 39dbf76
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
Expand Up @@ -80,7 +80,7 @@ object Singular {
// 1. Setup
// TODO: initialize for each database
implicit val s3Client = FileTasks.initializeS3Client(config.s3.access_key_id, config.s3.secret_access_key)
FileTasks.deleteFromS3(s3Client, config.s3.bucket, config.s3.folder_path)
FileTasks.deleteFromS3(s3Client, config.s3.bucket, Left(config.s3.folder_path))

// 2. Pagination
// TODO: this should be in parallel
Expand Down
21 changes: 16 additions & 5 deletions src/main/scala/com.snowplowanalytics/huskimo/tasks/FileTasks.scala
Expand Up @@ -30,10 +30,12 @@ import au.com.bytecode.opencsv.CSVWriter
// AWS
import com.amazonaws.auth.BasicAWSCredentials
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectRequest
import com.amazonaws.services.s3.model.ObjectListing

// Scala
import scala.collection.JavaConversions._
import scala.annotation.tailrec

// Scalaz
import scalaz._
Expand Down Expand Up @@ -73,13 +75,22 @@ object FileTasks {
*
* @param s3Client Our S3 client from the AWS SDK
* @param bucket The bucket to delete from
* @param folderPath The path to delete from
* @param target Either the folder path to start
* deleting from, or the next ObjectListing
* to delete
*/
def deleteFromS3(s3Client: AmazonS3Client, bucket: String, folderPath: String) {
val objcts = s3Client.listObjects(bucket, folderPath)
for (file <- objcts.getObjectSummaries) {
@tailrec
def deleteFromS3(s3Client: AmazonS3Client, bucket: String, target: Either[String, ObjectListing]) {
val listing = target match {
case Left(folderPath) => s3Client.listObjects(bucket, folderPath)
case Right(nextBatch) => nextBatch
}
for (file <- listing.getObjectSummaries) {
s3Client.deleteObject(bucket, file.getKey)
}
if (listing.isTruncated) {
deleteFromS3(s3Client, bucket, Right(s3Client.listNextBatchOfObjects(listing)))
}
}

/**
Expand Down

0 comments on commit 39dbf76

Please sign in to comment.