Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
noguchi999 committed May 3, 2012
1 parent 8da0500 commit 7ab9cf0
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .autotest
@@ -0,0 +1,3 @@
require 'autotest/growl'

Autotest::Growl::image_dir = File.join(ENV['HOME'], 'Pictures/growl/autotest_image')
1 change: 1 addition & 0 deletions .rspec
@@ -0,0 +1 @@
--colour
30 changes: 30 additions & 0 deletions spec/weblinks_spec.rb
@@ -0,0 +1,30 @@
# coding: utf-8
require 'rspec'
require File.expand_path("weblinks")

describe Weblinks, "instance when it " do
before do
opts = {url: 'http://ec2-46-51-232-200.ap-northeast-1.compute.amazonaws.com/2013/madorin/'}
@weblinks = Weblinks.new(opts)
end

it "method dump should create files name of weblinks.log and weblinks_error.log in ./log/ ." do
app_log = File.expand_path("log/weblinks.log")
app_error_log = File.expand_path("log/weblinks_error.log")
begin
FileUtils.rm([app_log, app_error_log])
rescue => e
puts e
end

@weblinks.dump

(FileTest.exist?(app_log) && FileTest.exist?(app_error_log)).should be_true
end

it "method to_a should return Array size 43" do
results = @weblinks.to_a

results.size.should eql 43
end
end
106 changes: 106 additions & 0 deletions weblinks.rb
@@ -0,0 +1,106 @@
# coding: utf-8
require 'mechanize'
require 'logger'

class Weblinks
attr_reader :agent, :url, :before_link, :already_linked, :wrong_url, :exclusion_link

Mechanize.log = Logger.new(File.dirname(__FILE__) + "/log/access_#{Time.now.to_i}.log")

def initialize(options={})
opts = {url: nil, user_agent: 'Windows Mozilla', wrong_url: [], exclusion_link: [], auth: nil}.merge(options)

@url = opts[:url]
@before_link = nil
@already_linked = []
@wrong_url = opts[:wrong_url]
@exclusion_link = opts[:exclusion_link]
@app_logs = []
@app_error_logs = []

@agent = Mechanize.new
@agent.user_agent_alias = opts[:user_agent]
@agent.follow_meta_refresh = true
@agent.get(@url)
end

def dump
execute

dump_log
end

def to_a
execute

@app_logs + @app_error_logs
end

private
def execute
@agent.page.links.each do |link|
next if exclusion_link? link
next if already_linked? link
next if wrong_url? link

begin
@before_link = link.uri
@already_linked << @before_link
link.click
dump_link
if white_link_count(@agent.page.links) == 0
@agent.get @before_link
else
execute
end
rescue => e
@app_error_logs << "#{link.uri.to_s} : #{link} #{e} -----------------------------------------"
end
end

@app_logs = @app_logs.sort.uniq
@app_error_logs = @app_error_logs.sort.uniq
end

def outer_url?(link)
link.uri.to_s[/^http/] && link.uri.to_s[/^#{@url}/].nil?
end

def wrong_url?(link)
@wrong_url.include?(link.uri) || outer_url?(link)
end

def already_linked?(link)
@already_linked.include? link.uri
end

def exclusion_link?(link)
exclusion_link.include? link.uri
end

def dump_link
@app_logs << "#{@agent.page.uri} : #{@agent.page.title}"
end

def white_link_count(links)
count = 0
links.each do |link|
count += 1 unless already_linked?(link) && exclusion_link?(link)
end
count
end

def dump_log
open(File.dirname(__FILE__) + "/log/#{File.basename(__FILE__, '.rb')}.log", 'w') do |file|
@app_logs.each do |log|
file.puts log
end
end

open(File.dirname(__FILE__) + "/log/#{File.basename(__FILE__, '.rb')}_error.log", 'w') do |file|
@app_error_logs.each do |log|
file.puts log
end
end
end
end

0 comments on commit 7ab9cf0

Please sign in to comment.