Skip to content

Commit

Permalink
cleanup: use File.join to concat file path correctly, introduced opti…
Browse files Browse the repository at this point in the history
…on ":url" in case web_url differs from file path, introduced ":full_path" to specify document_root and path in one go.
  • Loading branch information
Tobias Bielohlawek committed Mar 8, 2011
1 parent 5db6cbd commit 337fc63
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 46 deletions.
5 changes: 4 additions & 1 deletion README.rdoc
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ Via gem:

* <code>:url_options</code> -- hash with <code>:host</code>, optionally <code>:port</code> and <code>:protocol</code>
* <code>:base_url</code> -- string alternative to <code>:url_options</code>, e.g. <code>'https://example.com:8080/'</code>
* <code>:url_path</code> -- string path_name to sitemaps folder, defaults to <code>:document_path</code>
* <code>:document_root</code> -- string
* <code>:path</code> -- string defaults to <code>'sitemaps'</code>, which places sitemap files under the <code>/sitemaps</code> directory
* <code>:document_path</code> -- string document path to generation folder, relative to :document_root, defaults to <code>'sitemaps/'</code>
* <code>:path</code> -- string, alias for ":document_path" for legacy reasons
* <code>:document_full</code> -- string absolute document path to generation folder - defaults to <code>:document_root/:document_path</code>
* <code>:max_per_sitemap</code> -- <code>50000</code>, which is the limit dictated by Google but can be less
* <code>:batch_size</code> -- <code>1001</code> (not <code>1000</code> due to a bug in DataMapper)
* <code>:gzip</code> -- <code>true</code>
Expand Down
70 changes: 29 additions & 41 deletions lib/big_sitemap.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class BigSitemap
DEFAULTS = {
:max_per_sitemap => Builder::MAX_URLS,
:batch_size => 1001,
:path => 'sitemaps',
:document_path => 'sitemaps/',
:gzip => true,

# opinionated
Expand All @@ -24,34 +24,31 @@ class BigSitemap

def initialize(options={})
@options = DEFAULTS.merge options

@default_url_options = options.delete(:default_url_options) || {}
@options[:document_path] ||= @options[:path] #for legacy reasons

if @options[:max_per_sitemap] <= 1
raise ArgumentError, '":max_per_sitemap" must be greater than 1'
end

if @options[:url_options]
@default_url_options.update @options[:url_options]
elsif @options[:base_url]
uri = URI.parse(@options[:base_url])
@default_url_options[:host] = uri.host
@default_url_options[:port] = uri.port
@default_url_options[:protocol] = uri.scheme
else
@options[:base_url] = URI::Generic.build( {:scheme => "http"}.merge(@options.delete(:url_options)) ).to_s
end

unless @options[:base_url]
raise ArgumentError, 'you must specify either ":url_options" hash or ":base_url" string'
end
@options[:url_path] ||= @options[:document_path]

if @options[:batch_size] > @options[:max_per_sitemap]
raise ArgumentError, '":batch_size" must be less than ":max_per_sitemap"'
end

unless @options[:document_root]
raise ArgumentError, 'Document root must be specified with the ":document_root" option'
@options[:document_full] ||= File.join(@options[:document_root], @options[:document_path])
unless @options[:document_full]
raise ArgumentError, 'Document root must be specified with the ":document_root" option, the full path with ":document_full"'
end

@file_path = "#{@options[:document_root]}/#{strip_leading_slash(@options[:path])}"
Dir.mkdir(@file_path) unless File.exists? @file_path
Dir.mkdir(@options[:document_full]) unless File.exists?(@options[:document_full])

@sources = []
@models = []
Expand Down Expand Up @@ -98,11 +95,15 @@ def table_name(model)

def file_name(name)
name = table_name(name) unless name.is_a? String
"#{@file_path}/sitemap_#{name}"
File.join(@options[:document_full], "sitemap_#{name}")
end

def dir_files
File.join(@options[:document_full], "sitemap_*.{xml,xml.gz}")
end

def clean
Dir["#{@file_path}/sitemap_*.{xml,xml.gz}"].each do |file|
Dir[dir_files].each do |file|
FileUtils.rm file
end
self
Expand Down Expand Up @@ -171,12 +172,12 @@ def generate_models

param_method = pick_method(record, PARAM_METHODS)

location = options[:location]
if location.is_a?(Proc)
location = location.call(record)
else
location = "#{root_url}/#{strip_leading_slash(options[:path])}/#{record.send(param_method)}"
end
location =
if options[:location].is_a?(Proc)
options[:location].call(record)
else
File.join @options[:base_url], options[:path], record.send(param_method).to_s
end

change_frequency = options[:change_frequency] || 'weekly'
freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency
Expand Down Expand Up @@ -206,7 +207,7 @@ def generate_static

# Create a sitemap index document
def generate_sitemap_index(files = nil)
files ||= Dir["#{@file_path}/sitemap_*.{xml,xml.gz}"]
files ||= Dir[dir_files]
with_sitemap 'index', :type => 'index' do |sitemap|
for path in files
next if path =~ /index/
Expand Down Expand Up @@ -246,17 +247,6 @@ def ping_search_engines
end
end

def root_url
@root_url ||= begin
url = ''
url << (@default_url_options[:protocol] || 'http')
url << '://' unless url.match('://')
url << @default_url_options[:host]
url << ":#{port}" if port = @default_url_options[:port] and port != 80
url
end
end

private

def prepare_update
Expand All @@ -271,11 +261,13 @@ def prepare_update
end

def lock!(lock_file = 'generator.lock')
File.open("#{@file_path}/#{lock_file}", 'w', File::EXCL)
lock_file = File.join(@options[:document_full], lock_file)
File.open(lock_file, 'w', File::EXCL)
end

def unlock!(lock_file = 'generator.lock')
FileUtils.rm "#{@file_path}/#{lock_file}"
lock_file = File.join(@options[:document_full], lock_file)
FileUtils.rm lock_file
end

def with_sitemap(name, options={})
Expand All @@ -302,10 +294,6 @@ def with_sitemap(name, options={})
end
end

def strip_leading_slash(str)
str.sub(/^\//, '')
end

def get_last_id(filename)
Dir["#{filename}*.{xml,xml.gz}"].map do |file|
file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i
Expand All @@ -328,7 +316,7 @@ def escape_if_string(value)
end

def url_for_sitemap(path)
[root_url, @options[:path], File.basename(path)].compact.join('/')
File.join @options[:base_url], @options[:url_path], File.basename(path)
end

end
Expand Down
16 changes: 12 additions & 4 deletions test/big_sitemap_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,18 @@ def teardown

should 'generate the same base URL' do
options = {:document_root => tmp_dir}
assert_equal(
BigSitemap.new(options.merge(:base_url => 'http://example.com')).root_url,
BigSitemap.new(options.merge(:url_options => {:host => 'example.com'})).root_url
)
url = 'http://example.com'
sitemap = BigSitemap.new(options.merge(:base_url => url))

assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
end

should 'generate the same base URL' do
options = {:document_root => tmp_dir}
url = 'http://example.com'
sitemap = BigSitemap.new(options.merge(:url_options => {:host => 'example.com'}))

assert_equal url, sitemap.instance_variable_get(:@options)[:base_url]
end

should 'generate a sitemap index file' do
Expand Down

0 comments on commit 337fc63

Please sign in to comment.