Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Add Mechanize gem to do scrapping

  • Loading branch information...
commit 47e598b4b4dc4c654f851cfd1972c43f0844b2f2 1 parent 3f1ade7
samgooi4189 authored
View
1  Gemfile
@@ -3,6 +3,7 @@ source 'https://rubygems.org'
gem 'rails', '3.2.8'
gem 'therubyracer'
gem 'heroku'
+gem 'mechanize'
# Bundle edge Rails instead:
# gem 'rails', :git => 'git://github.com/rails/rails.git'
View
19 Gemfile.lock
@@ -38,6 +38,8 @@ GEM
coffee-script-source
execjs
coffee-script-source (1.3.3)
+ domain_name (0.5.4)
+ unf (~> 0.0.3)
erubis (2.7.0)
excon (0.16.4)
execjs (1.4.0)
@@ -64,9 +66,21 @@ GEM
i18n (>= 0.4.0)
mime-types (~> 1.16)
treetop (~> 1.4.8)
+ mechanize (2.5.1)
+ domain_name (~> 0.5, >= 0.5.1)
+ mime-types (~> 1.17, >= 1.17.2)
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
+ net-http-persistent (~> 2.5, >= 2.5.2)
+ nokogiri (~> 1.4)
+ ntlm-http (~> 0.1, >= 0.1.1)
+ webrobots (~> 0.0, >= 0.0.9)
mime-types (1.19)
multi_json (1.3.6)
+ net-http-digest_auth (1.2.1)
+ net-http-persistent (2.7)
netrc (0.7.7)
+ nokogiri (1.5.5)
+ ntlm-http (0.1.1)
pg (0.14.1)
polyglot (0.3.3)
rack (1.4.1)
@@ -118,6 +132,10 @@ GEM
uglifier (1.3.0)
execjs (>= 0.3.0)
multi_json (~> 1.0, >= 1.0.2)
+ unf (0.0.5)
+ unf_ext
+ unf_ext (0.0.5)
+ webrobots (0.0.13)
PLATFORMS
ruby
@@ -126,6 +144,7 @@ DEPENDENCIES
coffee-rails (~> 3.2.1)
heroku
jquery-rails
+ mechanize
pg
rails (= 3.2.8)
sass-rails (~> 3.2.3)
View
16 script/googlefetch.rb
@@ -0,0 +1,16 @@
+require 'rubygems'
+require 'mechanize'
+
+a = Mechanize.new{ |agent|
+ agent.user_agent_alias = "Linux Firefox"
+}
+
+a.get('http://google.com/') do |page|
+ search_result = page.form_with(:name => 'f') do |search|
+ search.q = 'Hello World'
+ end.submit
+
+ search_result.links.each do |link|
+ puts link.text
+ end
+end
View
18 script/rubyforge.rb
@@ -0,0 +1,18 @@
+require 'rubygems'
+require 'mechanize'
+
+a = Mechanize.new
+a.get('http://rubyforge.org/') do |page|
+ login_page = a.click(page.link_with(:text => %rLog In/))
+
+ my_page = login_page.form_with(:action = '/account/login.php') do |f|
+ f.form_loginname = ARGV[0]
+ f.form_pw
+ end.click_button
+
+ my_page.links.each do |link|
+ text = link.text.strip
+ next unless text.length > 0
+ puts text
+ end
+end
View
13 script/wiscEvent.rb
@@ -0,0 +1,13 @@
+require 'rubygems'
+require 'mechanize'
+
+a = Mechanize.new
+a.user_agent_alias = 'Linux Firefox'
+a.get('http://www.today.wisc.edu/') do |page|
+ #puts page.links.inspect
+ puts page.parser.css(".title a").collect{|link| link["href"]}
+
+ #event_page = a.click(page.link_with(parser.css(".title a")))
+ #puts event_page.text
+end
+
Please sign in to comment.
Something went wrong with that request. Please try again.