From bfed7abf1eb9c60891d32564090fcebedc9ddcb6 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 29 Nov 2016 13:47:28 +0000 Subject: [PATCH] Update scraper.rb --- scraper.rb | 59 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/scraper.rb b/scraper.rb index 5799e98..021f01e 100644 --- a/scraper.rb +++ b/scraper.rb @@ -1,25 +1,36 @@ -# This is a template for a Ruby scraper on morph.io (https://morph.io) -# including some code snippets below that you should find helpful +require 'scraperwiki' +require 'nokogiri' -# require 'scraperwiki' -# require 'mechanize' -# -# agent = Mechanize.new -# -# # Read in a page -# page = agent.get("http://foo.com") -# -# # Find somehing on the page using css selectors -# p page.at('div.content') -# -# # Write out to the sqlite database using scraperwiki library -# ScraperWiki.save_sqlite(["name"], {"name" => "susan", "occupation" => "software developer"}) -# -# # An arbitrary query against the database -# ScraperWiki.select("* from data where 'name'='peter'") - -# You don't have to do things with the Mechanize or ScraperWiki libraries. -# You can use whatever gems you want: https://morph.io/documentation/ruby -# All that matters is that your final data is written to an SQLite database -# called "data.sqlite" in the current working directory which has at least a table -# called "data". +class Collection + def initialize(name,url) + @url = url + @name = name + @titles = [] + end + + attr_reader :name, :url + attr_accessor :titles + + def getTitles + html = ScraperWiki.scrape(@url) + doc = Nokogiri::XML(html) + doc.xpath("//a[contains(@href, 'titles')]/").each do |t| + @titles.push(Title.new(t.inner_text.strip,t.href)) + end + end +end + +class Title + def initialise(title,url) + @title = title + @url = url + end + + attr_reader :title, :url + attr_accessor :coverage, :publisher, :issn, :publisher_url, :frequency, :notes + + def getDetails + + end +end +