Update scraper.rb

ostephens · Nov 29, 2016 · bfed7ab · bfed7ab
1 parent 41b40dd
commit bfed7ab
Showing 1 changed file with 35 additions and 24 deletions.
diff --git a/scraper.rb b/scraper.rb
@@ -1,25 +1,36 @@
-# This is a template for a Ruby scraper on morph.io (https://morph.io)
-# including some code snippets below that you should find helpful
+require 'scraperwiki'
+require 'nokogiri'
 
-# require 'scraperwiki'
-# require 'mechanize'
-#
-# agent = Mechanize.new
-#
-# # Read in a page
-# page = agent.get("http://foo.com")
-#
-# # Find somehing on the page using css selectors
-# p page.at('div.content')
-#
-# # Write out to the sqlite database using scraperwiki library
-# ScraperWiki.save_sqlite(["name"], {"name" => "susan", "occupation" => "software developer"})
-#
-# # An arbitrary query against the database
-# ScraperWiki.select("* from data where 'name'='peter'")
-
-# You don't have to do things with the Mechanize or ScraperWiki libraries.
-# You can use whatever gems you want: https://morph.io/documentation/ruby
-# All that matters is that your final data is written to an SQLite database
-# called "data.sqlite" in the current working directory which has at least a table
-# called "data".
+class Collection
+    def initialize(name,url)
+        @url = url
+        @name = name
+        @titles = []
+    end
+
+    attr_reader :name, :url
+    attr_accessor :titles
+
+    def getTitles
+        html = ScraperWiki.scrape(@url)
+        doc = Nokogiri::XML(html)
+        doc.xpath("//a[contains(@href, 'titles')]/").each do |t|
+            @titles.push(Title.new(t.inner_text.strip,t.href))
+        end
+    end
+end
+
+class Title
+  def initialise(title,url)
+    @title = title
+    @url = url
+  end
+
+  attr_reader :title, :url
+  attr_accessor :coverage, :publisher, :issn, :publisher_url, :frequency, :notes
+
+  def getDetails
+
+  end
+end
+