Permalink
Browse files

added license, cleanup, added docs

  • Loading branch information...
1 parent 4464dd8 commit 90dcf3323821baab5165d3a92ea048574b6a83a0 Tobias Bielohlawek committed Feb 15, 2012
View
@@ -4,10 +4,11 @@
## v2.0.x - ???
- * updated Docu
+ * update Docu
* switch to writer chain
+ * move manifest to it's own writer
+ * nested writing
* add BigSitemap API
- * support nested writing
## v2.0.0 - 13-02-2012
_inital release_
View
26 LICENCE
@@ -0,0 +1,26 @@
+Copyright (c) 2012, SoundCloud, Tobias Bielohlawek
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+- Neither the name of the SoundCloud nor the names of its contributors may be
+ used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
View
@@ -1,6 +1,4 @@
-# MassiveSitemap
-
-[![](http://travis-ci.org/rngtng/massive_sitemap.png)](http://travis-ci.org/rngtng/massive_sitemap)
+# MassiveSitemap [![](http://travis-ci.org/rngtng/massive_sitemap.png)](http://travis-ci.org/rngtng/massive_sitemap)
Build painfree sitemaps for websites with millions of pages
@@ -10,13 +8,36 @@ It implements various generation stategies, e.g. to split large Sitemaps into mu
## Usage
```ruby
- index_url = MassiveSitemap.generate(:url => 'test.de/') do
- add "dummy"
- end
- MassiveSitemap.ping(index_url)
+require 'massive_sitemap'
+
+index_url = MassiveSitemap.generate(:url => 'test.de/') do
+ add "dummy"
+end
+MassiveSitemap.ping(index_url)
+
```
* clear structure
* allows extension (S3)
MassiveSitemap - build huge sitemaps painfree. Differential updates keeps generation time short and reduces load on DB. It's heavealy inspired by BigSitemaps and offers compatiable API
+
+## Dependencies
+
+Obviously depends on a S3 library which [S3 gem](https://github.com/qoobaa/s3)
+
+
+## Contributing
+
+We'll check out your contribution if you:
+
+- Provide a comprehensive suite of tests for your fork.
+- Have a clear and documented rationale for your changes.
+- Package these up in a pull request.
+
+We'll do our best to help you out with any contribution issues you may have.
+
+
+## License
+
+The license is included as LICENSE in this directory.
View
@@ -1,31 +1,35 @@
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
+
require 'massive_sitemap/writer/file'
require 'massive_sitemap/writer/gzip_file'
require 'massive_sitemap/builder/rotating'
require 'massive_sitemap/builder/index'
require 'massive_sitemap/lock'
require 'massive_sitemap/ping'
-# Page at -> <url>
-# http://example.de/dir/
-
-# Index at -> <index_url>
-# http://sitemap.example.de/index-dir/
-
-# Save at -> <root>
-# /root/dir/ -> <document_root>/<document_path>
+# MassiveSitemap
+# Example Standard setup of a writer, rotating and index builder.
+# Common parameters:
+# required:
+# :url - Url of your website e.g http://example.de/dir/
+#
+# optional:
+# :index_url - Url of your index website e.g http://example.de/sitemap
+# :root - directory where files get written to e.g. /var/sitemap
+# :gzip - wether to gzip files or not
+# :writer - custom wirter
module MassiveSitemap
DEFAULTS = {
# global
:index_url => nil,
+ :index_filename => "sitemap_index.xml",
:gzip => false,
- :writer => MassiveSitemap::Writer::File,
# writer
:root => '.',
:force_overwrite => false,
:filename => "sitemap.xml",
- :index_filename => "sitemap_index.xml",
# builder
:url => nil,
@@ -37,18 +41,18 @@ def generate(options = {}, &block)
@options = DEFAULTS.merge options
unless @options[:url]
- raise ArgumentError, 'you must specify ":url" string'
+ raise ArgumentError, %Q(":url" not given)
end
@options[:index_url] ||= @options[:url]
if @options[:max_urls] && !Builder::Rotating::NUM_URLS.member?(@options[:max_urls])
- raise ArgumentError, %Q(":max_urls" must be greater than #{NUM_URLS.min} and smaller than #{NUM_URLS.max})
+ raise ArgumentError, %Q(":max_urls" must be greater than #{Builder::Rotating::NUM_URLS.min} and smaller than #{Builder::Rotating::NUM_URLS.max})
end
- @options[:writer] = Writer::GzipFile if @options[:gzip]
+ @writer = @options.delete(:writer)
+ @writer ||= (@options.delete(:gzip) ? Writer::GzipFile : Writer::File).new
- @writer = @options.delete(:writer).new @options
- Builder::Rotating.generate(@writer, @options, &block)
+ Builder::Rotating.generate(@writer.set(@options), @options, &block)
@writer.set(:filename => @options[:index_filename])
Builder::Index.generate(@writer, @options.merge(:url => @options[:index_url]))
@@ -1,3 +1,5 @@
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
+
require "massive_sitemap/builder/base"
module MassiveSitemap
@@ -1,3 +1,5 @@
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
+
# MassiveSitemap Builder
# The purpose of a builder is create the XML files: continaing header and all other tag (with attributes).
#
@@ -1,3 +1,5 @@
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
+
require "massive_sitemap/builder/base"
module MassiveSitemap
@@ -1,3 +1,5 @@
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
+
require "massive_sitemap/builder/base"
module MassiveSitemap
@@ -1,7 +1,8 @@
-require 'fileutils'
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
-# Create Lock
+require 'fileutils'
+# Simple locker to make sure no second process is running in paralell
module MassiveSitemap
LOCK_FILE = 'generator.lock'
@@ -1,6 +1,9 @@
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
+
require 'cgi'
require 'open-uri'
+# Ping Search Engines to pull the latest update
module MassiveSitemap
ENGINES_URLS = {
:google => 'http://www.google.com/webmasters/tools/ping?sitemap=%s',
@@ -1,3 +1,5 @@
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
+
# MassiveSitemap Writer
# The purpose of a writer is to store the written data, and to keep the state of existing data.
# It offers an API to which a builder can talk to, and a Interface which other writers have to implement
@@ -1,7 +1,10 @@
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
+
require 'fileutils'
require 'massive_sitemap/writer/base'
-# Write into File
+# MassiveSitemap Writer File
+# Extension to base writer for writing into file(s).
module MassiveSitemap
module Writer
@@ -1,7 +1,10 @@
-require 'zlib'
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
+require 'zlib'
require "massive_sitemap/writer/file"
-# Write into GZipped File
+
+# MassiveSitemap Writer GzipFile
+# Extension to file writer for gzip support
module MassiveSitemap
module Writer
@@ -1,3 +1,5 @@
+# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek
+
require 'stringio'
require "massive_sitemap/writer/base"
@@ -57,7 +57,7 @@ def gz_filename(file = filename)
it 'takes custom writer' do
expect do
- MassiveSitemap.generate(:url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile) do
+ MassiveSitemap.generate(:url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile.new) do
add "dummy"
end
end.to change { ::File.exists?(gz_filename) }.to(true)
@@ -157,7 +157,7 @@ def gz_filename(file = filename)
it 'creates sitemap file' do
expect do
- MassiveSitemap.generate(:url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile) do
+ MassiveSitemap.generate(:url => 'test.de/', :writer => MassiveSitemap::Writer::GzipFile.new) do
add "dummy"
end
end.to change { ::File.exists?(gz_filename(index_filename)) }.to(true)
View
@@ -186,29 +186,29 @@
it { writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml), 1).should == %w(sitemap-1.xml) }
it "keeps file for 2 days" do
- Time.stub!(:now).and_return(Time.parse("1-1-2012"))
+ Time.stub!(:now).and_return(Time.parse("1-1-2012").utc)
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml), 2).should == []
end
it "deletes file on snd day" do
- Time.stub!(:now).and_return(Time.parse("2-1-2012"))
+ Time.stub!(:now).and_return(Time.parse("2-1-2012").utc)
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml), 2).should == %w(sitemap-1.xml)
end
end
context "many files" do
it "keeps file for 2 days" do
- Time.stub!(:now).and_return(Time.parse("1-1-2012"))
+ Time.stub!(:now).and_return(Time.parse("1-1-2012").utc)
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml sitemap-2.xml sitemap-3.xml), 2).should == %w(sitemap-2.xml)
end
it "deletes file on 2nd day" do
- Time.stub!(:now).and_return(Time.parse("2-1-2012"))
+ Time.stub!(:now).and_return(Time.parse("2-1-2012").utc)
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml sitemap-2.xml sitemap-3.xml), 2).should == %w(sitemap-1.xml sitemap-3.xml)
end
it "deletes file on 3rd day" do
- Time.stub!(:now).and_return(Time.parse("3-1-2012"))
+ Time.stub!(:now).and_return(Time.parse("3-1-2012").utc)
writer.send(:chaos_monkey_stream_ids, %w(sitemap-1.xml sitemap-2.xml sitemap-3.xml), 2).should == %w(sitemap-2.xml)
end
end

0 comments on commit 90dcf33

Please sign in to comment.