Permalink
Browse files

Merge pull request #2 from lovegandhi/multi_part_upload

Multi part upload and cron style scheduler
  • Loading branch information...
2 parents 43642ab + cb13da5 commit bc1553188df97d3df825de6d826b34ab7185a431 Marc Boeker committed Feb 13, 2013
Showing with 281 additions and 76 deletions.
  1. +19 −0 README.md
  2. +6 −1 examples/jobs.yml
  3. +117 −26 lib/mongolicious/backup.rb
  4. +14 −14 lib/mongolicious/db.rb
  5. +53 −18 lib/mongolicious/filesystem.rb
  6. +72 −17 lib/mongolicious/storage.rb
View
@@ -20,6 +20,9 @@ backup jobs, that will be run in the defined interval.
db: mongodb://user:password@host:port/database
location: bucket_name/prefix
versions: 5
+ compress_tar_file: False
+ temp_directory: /mnt/some_ebs_location/backups
+ cron: 0 22 * * 1-5
The s3 section contains the credentials, to authenticate with AWS S3. The
jobs section contains a list ob jobs, that will be executed in the given
@@ -29,6 +32,22 @@ interval. Each job must contain the following keys:
* **db** - Is a URI, that defines the database host, database name and auth credentials.
* **location** - The location is the S3 bucket, where to put the dump and a prefix.
* **versions** - Keep the latest X versions of the backup.
+* **compress_tar_file** - True/False A large backup might take too long to compress on smaller EC2 instances
+* **temp_directory** - (optional) Use this directory for storing temp dump and tar files. If not provided it will use system's temp directory
+* **cron** - 0 22 * * 1-5 (optional every day of the week at 22:00 (10pm).
+ If it's not provided interval is used instead
+
+Cron explained:
+
+|Field name |Mandatory |Allowed values |Allowed special characters|
+|:------------|:--------:|:---------------|:-------------------------|
+|Minutes |Yes |0-59 |* / , - |
+|Hours |Yes |0-23 |/ , - |
+|Day of month |Yes |1-31 |* / , - ? L W |
+|Month |Yes |1-12 or JAN-DEC |* / , - |
+|Day of week |Yes |0-6 or SUN-SAT |* / , - ? L # |
+|Year |No |1970–2099 |* / , - |
+
Please consider, that the location option works like this:
View
@@ -7,9 +7,14 @@ jobs:
db: mongodb://user:password@host:port/database
location: bucket_name/prefix
versions: 5
+ compress_tar_file: False
+ temp_directory: /mnt/some_ebs_location/backups
+ cron: 0 1 * * 0-6
- interval: 1d
db: mongodb://user:password@host:port/database2
location: bucket_name/prefix_db2
versions: 2
-
+ compress_tar_file: True
+ temp_directory: /mnt/some_ebs_location/backups
+ cron: 0 2 * * 0-6
View
@@ -1,80 +1,171 @@
module Mongolicious
class Backup
-
+
# Initialize the backup system.
#
# @param [String] jobfile the path of the job configuration file.
#
# @return [Backup]
def initialize(jobfile)
@conf = parse_jobfile(jobfile)
-
+
@storage = Storage.new(@conf['s3'])
@filesystem = Filesystem.new
@db = DB.new
-
+
schedule_jobs(@conf['jobs'])
end
-
+
protected
# Parse YAML job configuration.
#
# @param [String] jobfile the path of the job configuration file.
#
# @return [Hash]
- def parse_jobfile(jobfile)
- YAML.load(File.read(jobfile))
+ def parse_jobfile(jobfile)
+ YAML.load(File.read(jobfile))
rescue Errno::ENOENT
Mongolicious.logger.error("Could not find job file at #{ARGV[0]}")
exit
rescue ArgumentError => e
Mongolicious.logger.error("Could not parse job file #{ARGV[0]} - #{e}")
exit
end
-
+
# Schedule the jobs to be executed in the given interval.
#
# This method will block and keep running until it gets interrupted.
#
# @param [Array] jobs the list of jobs to be scheduled.
#
- # @return [nil]
+ # @return [nil]
def schedule_jobs(jobs)
scheduler = Rufus::Scheduler.start_new
-
+
jobs.each do |job|
- Mongolicious.logger.info("Scheduled new job for #{job['db'].split('/').last} with interval #{job['interval']}")
- scheduler.every job['interval'] do
- backup(job)
+ if job['cron']
+ Mongolicious.logger.info("Scheduled new job for #{job['db'].split('/').last} with cron: #{job['cron']}")
+ scheduler.cron job['cron'] do
+ backup(job)
+ end
+ else
+ scheduler.every job['interval'] do
+ Mongolicious.logger.info("Scheduled new job for #{job['db'].split('/').last} with interval: #{job['interval']}")
+ backup(job)
+ end
end
- end
-
+ end
+
scheduler.join
- end
-
+ end
+
# Dump database, compress and upload it.
#
# @param [Hash] job the job to execute.
#
# @return [nil]
def backup(job)
- path = @filesystem.get_tmp_path
+ path = @filesystem.get_tmp_path(job['temp_directory'])
s3 = @storage.parse_location(job['location'])
db = @db.get_opts(job['db'])
-
+
Mongolicious.logger.info("Starting job for #{db[:host]}:#{db[:port]}/#{db[:db]}")
@db.dump(db, path)
- @filesystem.compress(path)
-
- key = "#{s3[:prefix]}_#{Time.now.strftime('%m%d%Y_%H%M%S')}.tar.bz2"
- @storage.upload(s3[:bucket], key, path)
-
- @filesystem.cleanup(path)
+ path = @filesystem.compress(path, job['compress_tar_file'])
+ key = "#{s3[:prefix]}_#{Time.now.strftime('%Y%m%d_%H%M%S')}.tar.bz2"
+
+ min_file_size = 5 * (1024 * 1024) # 5 MB
+ max_file_size = 4 * (1024 * 1024 * 1024) # 4 GB
+ split_size = max_file_size
+ file_size = File.size("#{path}")
+ Mongolicious.logger.info("Total backup size: #{file_size} bytes")
+
+ if file_size > max_file_size
+ split_parts = file_size / max_file_size + (file_size % max_file_size > 0 ? 1 : 0)
+
+ last_part_size_in_bytes = file_size -
+ (max_file_size * ((split_parts - 1) <= 0 ? 1: (split_parts - 1)))
+
+ if last_part_size_in_bytes < min_file_size
+ # If we are sending the file in chunks we need to make sure that the last part of the
+ # file is bigger than the 5MB otherwise the whole upload will fail.
+ # If last part is smaller than 5MB then we distribute its bytes to the other parts
+ split_size = max_file_size +
+ (last_part_size_in_bytes/((split_parts - 1) <= 0 ? 1 : (split_parts - 1)))
+ end
+
+ Mongolicious.logger.info("Splitting file into #{split_size} bytes/part before uploading.")
+ system("split -b #{split_size} #{path} #{path}.")
+
+ Mongolicious.logger.info("Deleting tar file: #{path}")
+ @filesystem.cleanup_tar_file(path)
+
+ # Get a list of all the split files bigfile.gzip.aa/ab/ac...
+ file_parts = Dir.glob("#{path}.*").sort
+ upload_id = @storage.initiate_multipart_upload(s3[:bucket], key)
+ part_ids = []
+
+ Mongolicious.logger.info("Uploading #{path} in #{file_parts.count} parts.")
+
+ file_parts.each_with_index do |part, position|
+ Mongolicious.logger.info("Uploading file part: #{part}")
+ part_number = (position + 1).to_s
+
+ File.open part do |file_part|
+ attempts = 0
+ max_attempts = 3
+
+ begin
+ # While in production we would get frequent "Connection reset by peer" while uploading to S3
+ # retrying the upload would cause the begin block to be called after 30-40 minutes, therefore,
+ # we can't reuse the same socket as that one has timed out.
+ # http://scie.nti.st/2008/3/14/amazon-s3-and-connection-reset-by-peer for explanation on "connection
+ # reset by peer" and what you can do to fix the issue
+ #
+ # issue with fog 0.5.1 https://github.com/fog/fog/issues/327
+ # fixed with: https://github.com/fog/fog/commit/597acf03631d3c21442f036a0433a2aa24f98345
+ # Fog 0.5.1 was released on January 31 2011
+ # Fix was issued on May 25 2011
+ # Whenever there is connection reset fog would not set content length to the right value.
+
+ etag = @storage.upload_part(s3[:bucket], key, upload_id, part_number, file_part)
+ rescue Exception => exception
+ attempts += 1
+ Mongolicious.logger.warn("Retry #{attempts} of #{max_attempts}. Error while uploading part: #{part}")
+ Mongolicious.logger.warn(exception.message)
+ Mongolicious.logger.warn(exception.backtrace)
+ retry unless attempts >= max_attempts
+
+ Mongolicious.logger.error("Aborting upload! Error uploading part: #{part}")
+ @filesystem.cleanup_parts(file_parts)
+
+ # tell S3 that we are aborting the upload.
+ @storage.abort_multipart_upload(s3[:bucket], key, upload_id)
+
+ # There is nothing that we can do anymore
+ # Exit this method with error code 0 so that subsequent jobs can fire as scheduled.
+ return
+ end
+
+ part_ids << etag
+ end
+ end
+
+ Mongolicious.logger.info("Completing multipart upload.")
+ response = @storage.complete_multipart_upload(s3[:bucket], key, upload_id, part_ids)
+ Mongolicious.logger.info("#{response.inspect}\n\n")
+
+ @filesystem.cleanup_parts(file_parts)
+ else
+ @storage.upload(s3[:bucket], key, path)
+ @filesystem.cleanup_tar_file(path)
+ end
+
@storage.cleanup(s3[:bucket], s3[:prefix], job['versions'])
-
- Mongolicious.logger.info("Finishing job for #{db[:host]}:#{db[:port]}/#{db[:db]}")
+
+ Mongolicious.logger.info("Finishing job for #{db[:host]}:#{db[:port]}/#{db[:db]}")
end
end
View
@@ -3,44 +3,44 @@ class DB
# Initialize a ne DB object.
#
- # @return [DB]
+ # @return [DB]
def initialize
-
+
end
# Parse the MongoDB URI.
#
# @param [String] db_uri the DB URI.
#
- # @return [Hash]
+ # @return [Hash]
def get_opts(db_uri)
uri = URI.parse(db_uri)
-
+
{
- :host => uri.host,
- :port => uri.port,
- :user => uri.user,
- :password => uri.password,
+ :host => uri.host,
+ :port => uri.port,
+ :user => uri.user,
+ :password => uri.password,
:db => uri.path.gsub('/', '')
}
- end
+ end
# Dump database using mongodump.
#
# @param [Hash] db the DB connection opts.
# @param [String] path the path, where the dump should be stored.
#
- # @return [nil]
+ # @return [nil]
def dump(db, path)
Mongolicious.logger.info("Dumping database #{db[:db]}")
-
+
cmd = "mongodump -d #{db[:db]} -h #{db[:host]}:#{db[:port]} -o #{path}"
cmd << " -u '#{db[:user]}' -p '#{db[:password]}'" unless (db[:user].nil? || db[:user].empty?)
cmd << " > /dev/null"
-
+
system(cmd)
raise "Error while backuing up #{db[:db]}" if $?.to_i != 0
end
-
+
end
-end
+end
Oops, something went wrong.

0 comments on commit bc15531

Please sign in to comment.