From afc595ea30ba3613767ea279e1957a44cfec5483 Mon Sep 17 00:00:00 2001 From: Ritesh Agrawal Date: Thu, 29 Mar 2012 18:17:02 -0700 Subject: [PATCH] First update --- Memento.rb | 57 ++++++++ MementoException.rb | 8 ++ parser/Parser.rb | 82 +++++++++++ parser/ParserManager.rb | 45 ++++++ parser/text/AmazonXMLParser.rb | 50 +++++++ parser/text/BibTexParser.rb | 7 + parser/url/ACMPortalParser.rb | 19 +++ parser/url/ASMParser.rb | 24 ++++ parser/url/AmazonParser.rb | 44 ++++++ parser/url/BlackwellSynergyParser.rb | 32 +++++ parser/url/HubMedParser.rb | 20 +++ parser/url/IngentaConnectParser.rb | 38 +++++ parser/url/LeoonlineParser.rb | 41 ++++++ parser/url/PubmedParser.rb | 21 +++ parser/url/SageParser.rb | 18 +++ test/ACMPortalParserTest.rb | 7 + test/ASMParserTest.rb | 14 ++ test/AbstractTest.rb | 6 + test/ParserManagerTest.rb | 15 ++ test/TextParserTest.rb | 11 ++ test/UrlParserTest.rb | 29 ++++ test/mybib.bib | 120 ++++++++++++++++ test/test.rb | 5 + writer/Bib2OfficeXML.rb.deprecated | 202 +++++++++++++++++++++++++++ writer/OfficeXML.rb | 107 ++++++++++++++ writer/Writer.rb | 11 ++ writer/WriterManager.rb | 17 +++ 27 files changed, 1050 insertions(+) create mode 100644 Memento.rb create mode 100644 MementoException.rb create mode 100644 parser/Parser.rb create mode 100644 parser/ParserManager.rb create mode 100644 parser/text/AmazonXMLParser.rb create mode 100644 parser/text/BibTexParser.rb create mode 100644 parser/url/ACMPortalParser.rb create mode 100644 parser/url/ASMParser.rb create mode 100644 parser/url/AmazonParser.rb create mode 100644 parser/url/BlackwellSynergyParser.rb create mode 100644 parser/url/HubMedParser.rb create mode 100644 parser/url/IngentaConnectParser.rb create mode 100644 parser/url/LeoonlineParser.rb create mode 100644 parser/url/PubmedParser.rb create mode 100644 parser/url/SageParser.rb create mode 100644 test/ACMPortalParserTest.rb create mode 100644 test/ASMParserTest.rb create mode 100644 test/AbstractTest.rb create mode 100644 test/ParserManagerTest.rb create mode 100644 test/TextParserTest.rb create mode 100644 test/UrlParserTest.rb create mode 100644 test/mybib.bib create mode 100644 test/test.rb create mode 100644 writer/Bib2OfficeXML.rb.deprecated create mode 100644 writer/OfficeXML.rb create mode 100644 writer/Writer.rb create mode 100644 writer/WriterManager.rb diff --git a/Memento.rb b/Memento.rb new file mode 100644 index 0000000..f072930 --- /dev/null +++ b/Memento.rb @@ -0,0 +1,57 @@ +require 'curb' +require 'cgi' + +require_relative 'MementoException' +require_relative 'parser/ParserManager' +require_relative 'writer/WriterManager' + +module Memento + def self.transform(input_format, output_format, value) + #Sanity Checks + raise MementoException, "Error: Missing required parameter: input_format" if input_format.nil? or input_format.empty? + raise MementoException, "Error: Missing required parameter: output_format" if output_format.nil? or output_format.empty? + raise MementoException, "Error: Missing required parameter: text" if value.nil? or value.empty? + + #if input_format = 'site', then its a website and use UrlParser to get text + parser = nil + if ['site'].include?(input_format.downcase.strip) + parser = Memento::ParserManager.get_url_parser(value) + else + parser = Memento::ParserManager.get_text_parser(input_format) + end + raise MementoException, "Unable to find required parser" if parser.nil? + + parser.value = value + data = parser.get_data + + writer = Memento::WriterManager.get_writer(output_format) + return writer.export(data) + end + + def self.get_page(url, parameters = {}, referer = nil) + c = Curl::Easy.new(url) + c.follow_location = true + c.header_in_body = false + c.useragent='Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5' + c.enable_cookies = true + + if parameters and !parameters.empty? + c.http_post parameters.map{|k,v| "#{k}=#{CGI.escape(v)}"}.join('&') + end + + #sometimes especially in the case pubmed url, c.perform + # falsely gives PartialFileError but successfully retrieves body + begin + c.perform + rescue + + end + return c.body_str + + end + + + + + +end \ No newline at end of file diff --git a/MementoException.rb b/MementoException.rb new file mode 100644 index 0000000..992a6a8 --- /dev/null +++ b/MementoException.rb @@ -0,0 +1,8 @@ +# Note instead of subclassing from Exception, use StandardError because +# StandardError deals with application level errors where as Exception deals with the +# both application and environment level types of errors + +class MementoException < StandardError + + +end diff --git a/parser/Parser.rb b/parser/Parser.rb new file mode 100644 index 0000000..16b7b05 --- /dev/null +++ b/parser/Parser.rb @@ -0,0 +1,82 @@ +module Memento + module Parser + class TextParser + + attr_accessor :value + + #constructor + def validate + raise MementoException, "Error: Missing required text" if text.nil? or text.empty? + end + + # extract citation information form the input string + # and return an array of BibTexEntry object + def get_data + raise 'calling abstract method: get_data' + end + + end + + class UrlParser + attr_accessor :value + + + def validate + raise MementoException, 'Invalid Url' if @value.nil? or @value.empty? + end + + #function: get_data + #@description: processes url and returns citation information as an array of BibTeX entries. + # + def get_data + validate + to = get_citation_url() + params = get_form_parameters + referrer = get_referrer() + + citation = Memento.get_page(to, params, referrer) + raise MementoException, 'Error: Unable to fetch citation details' if citation.to_s.strip.empty? + puts citation + text_parser = Memento::ParserManager.get_text_parser(get_citation_format) + text_parser.value = citation + data = text_parser.get_data() + + return data + + end + + protected + #If to fetch citation details requires filling form, then provide form parameters + def get_form_parameters + {} + end + + def get_referrer + return @url + end + + #============ ABSTRACT METHODS ============# + # Subclass of UrlParser will need to atleast implement these + # two functions + #========================================== + + #returns Url from where the citation detalils can be fetched + def get_citation_url + raise MementoException, 'Called abstract method: get_citation_url' + end + + + # return the format of citation + def get_citation_format + raise MementoException, 'Called abstract method: get_citation_format' + end + + + + end + + end +end + + + diff --git a/parser/ParserManager.rb b/parser/ParserManager.rb new file mode 100644 index 0000000..ac687f2 --- /dev/null +++ b/parser/ParserManager.rb @@ -0,0 +1,45 @@ +require_relative 'Parser' + +#Test Parser +require_relative 'text/BibTexParser' + +#Url Parser +require_relative 'url/ASMParser' +require_relative 'url/HubMedParser' +require_relative 'url/ACMPortalParser' +require_relative 'url/BlackwellSynergyParser' +require_relative 'url/PubmedParser' + + +module Memento + module ParserManager + TEXT_PARSER = { + 'bibtex' => {'name' => 'BibTeX', 'parser' => 'BibTexParser'} + } + + WEBSITES = { + 'asm.org' => {'name' => 'ASM Journals', 'link' => 'http://journals.asm.org/', 'parser' => 'ASMParser'}, + 'hubmed.org' => {'name' => 'Hubmed', 'link' => "http://www.hubmed.org", 'parser' => 'HubMedParser'}, + 'dl.acm.org' => {'name' => 'ACM Digital Library', 'link' => 'http://dl.acm.org/', 'parser' => 'ACMPortalParser'}, + 'ncbi.nlm.nih.gov' => {'name' => 'PubMed', 'link' => 'http://www.pubmed.gov', 'parser'=>'PubmedParser'}, + 'onlinelibrary.wiley.com' => {'name' =>'Wiley Online Library', 'link' => "http://onlinelibrary.wiley.com", 'parser' => 'BlackwellSynergyParser'} + + } + + def self.get_text_parser(format) + raise MementoException, "Error: Missing require parameter: format" if format.nil? or format.empty? + info = TEXT_PARSER[format.to_s.downcase.strip] + raise MementoException, "Error: unsupported text format: #{format}" if info.nil? + Kernel.const_get(info['parser']).new + end + + def self.get_url_parser(url) + raise MementoException, "Error: Missing required parameter url" if url.nil? or url.empty? + WEBSITES.each do |key, value| + return Kernel.const_get(value['parser']).new if url =~ /#{key}/ + end + raise MementoException, "Error: Parsing is not supported for this website" + end #get_parser + + end +end \ No newline at end of file diff --git a/parser/text/AmazonXMLParser.rb b/parser/text/AmazonXMLParser.rb new file mode 100644 index 0000000..e7d434c --- /dev/null +++ b/parser/text/AmazonXMLParser.rb @@ -0,0 +1,50 @@ +require 'libxml' + +class AmazonXMLParser < Memento::Parser::UrlParser + + def get_data + validate + doc = XML::Parser.string(@value) + item = doc.parse + item = item.root.find('./Items/Item') + if('Book' != item.attributes) + + $item = $xml->Items->Item; + if('Book' != (string)$item->ItemAttributes->ProductGroup) + throw new Exception("Currently only books can be imported from Amazon"); + + $article['doctype'] = 'book'; + $article['url'] = trim($item->DetailPageURL); + $article['title'] = trim($item->ItemAttributes->Title); + $article['publisher']=trim($item->ItemAttributes->Publisher); + $article['pages'] = trim($item->ItemAttributes->NumberOfPages); + $imgUrl = trim($item->SmallImage->URL); + + if(!empty($imgUrl)){ + $ch = curl_init($imgUrl); + $ext = strtolower(end(explode('.', $imgUrl))); + uses('neat_string'); + $neat = new NeatString(); + $filename = $neat->randomPassword(10) . '.' . $ext; + $fp = fopen(ARTICLE_ICON . $filename, 'w'); + curl_setopt($ch, CURLOPT_FILE, $fp); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); + curl_exec($ch); + curl_close($ch); + fclose($fp); + $article['img'] = ARTICLE_ICON_URL.$filename; + } + + + + list($article['year'], $article['month'], $article['day']) = DateUtil::getCleanDate((string)$item->ItemAttributes->PublicationDate); + + foreach($item->ItemAttributes->Author as $author) + $authors[] = (string) $author; + + $data[0] = array('Article'=>$article, 'Author' => $authors); + return $data; + } + end +end \ No newline at end of file diff --git a/parser/text/BibTexParser.rb b/parser/text/BibTexParser.rb new file mode 100644 index 0000000..d50ab0c --- /dev/null +++ b/parser/text/BibTexParser.rb @@ -0,0 +1,7 @@ +require 'bibtex' + +class BibTexParser < Memento::Parser::TextParser + def get_data + return BibTeX.parse @value + end +end diff --git a/parser/url/ACMPortalParser.rb b/parser/url/ACMPortalParser.rb new file mode 100644 index 0000000..316c61d --- /dev/null +++ b/parser/url/ACMPortalParser.rb @@ -0,0 +1,19 @@ +class ACMPortalParser < Memento::Parser::UrlParser + PATTERNS = [ + Regexp.new('id=(\d*)', Regexp::IGNORECASE) + ] + BASE_URL = 'http://dl.acm.org/exportformats.cfm?expformat=bibtex&id=' + + def get_citation_url + validate + PATTERNS.each do |pattern| + m = @value.match(pattern) + return BASE_URL + m[1] if m and m.length == 2 + end + raise MementoException, "Error: Unable to find citation information" + end + + def get_citation_format + 'bibtex' + end +end \ No newline at end of file diff --git a/parser/url/ASMParser.rb b/parser/url/ASMParser.rb new file mode 100644 index 0000000..2b5f76e --- /dev/null +++ b/parser/url/ASMParser.rb @@ -0,0 +1,24 @@ +require 'uri' +class ASMParser < Memento::Parser::UrlParser + + PATTERNS = [ + Regexp.new('http:\/\/(.*)\.asm\.org.*[abstract|full|reprint]\/(.*)\?', Regexp::IGNORECASE), + Regexp.new('http:\/\/(.*)\.asm\.org\/content\/(.*)\.[abstract|full]') + ].freeze + BASE_URL = 'http://DOMAIN.asm.org/citmgr?type=bibtex&gca='; + + def get_citation_url + PATTERNS.each do |pattern| + match = @value.match(pattern) + return BASE_URL.gsub('DOMAIN', match[1]) + URI.escape("#{match[1]};#{match[2]}") if match and match.length == 3 + end + raise MementoException, "Error: Unable to find link to bibtex" + end + + def get_citation_format + "bibtex" + end + + + +end \ No newline at end of file diff --git a/parser/url/AmazonParser.rb b/parser/url/AmazonParser.rb new file mode 100644 index 0000000..f571909 --- /dev/null +++ b/parser/url/AmazonParser.rb @@ -0,0 +1,44 @@ +class AmazonParser < Memento::Parser::UrlParser + @BASE_URL = 'http://ecs.amazonaws.com/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=1E6W7G64405195A1J702&Operation=ItemLookup&ResponseGroup=Medium&ItemId=' + + PATTERNS = [ + Regexp.new('/gp/product/(\d*)', Regexp::IGNORECASE), + Regexp.new('/ASIN/(\d*)', Regexp::IGNORECASE), + Regexp.new('/dp/(\w*)', Regexp::IGNORECASE) + ].freeze + + if(preg_match("#/gp/product/(\d*)#", $url, $matches)) + return $matches[1]; + if(preg_match("#/ASIN/(\d*)#", $url, $matches)) + return $matches[1]; + if(preg_match("#/dp/(\w*)#", $url, $matches)) + return $matches[1]; + + def get_data + + end + + protected + + def get_citation_url + asin = get_asin() + raise MementoException, 'Unable to get Amazon Standard Identification Number (ASIN).' if(asin == -1) + return @BASE_URL + asin + end + + #TODO: + def get_asin + PATTERNS.each do |pattern| + matches = @value.match(pattern) + return matches[1] if matches and matches.length >= 2 + end + end + + + def get_citation_format + raise MementoException, 'Called abstract method: get_citation_format' + end + + + +end \ No newline at end of file diff --git a/parser/url/BlackwellSynergyParser.rb b/parser/url/BlackwellSynergyParser.rb new file mode 100644 index 0000000..8f74679 --- /dev/null +++ b/parser/url/BlackwellSynergyParser.rb @@ -0,0 +1,32 @@ +class BlackwellSynergyParser < Memento::Parser::UrlParser + + PATTERNS = [ + Regexp.new('doi/(.*)/[abstract|full]', Regexp::IGNORECASE) + ] + BASE_URL = 'http://onlinelibrary.wiley.com/documentcitationdownloadformsubmit' + + + def get_form_parameters + params = {'hasAbstract' => 'CITATION_AND_ABSTRACT', 'fileFormat' => 'BIBTEX', 'submit' => 'Submit'} + + PATTERNS.each do |pattern| + match = @value.match(pattern) + if match and match.length == 2 + params['doi'] = match[1] + break + end + end + + raise MementoException, "Error: Unable to find DOI" unless params.has_key?('doi') + + return params + end + + def get_citation_url + BASE_URL + end + + def get_citation_format + 'bibtex' + end +end \ No newline at end of file diff --git a/parser/url/HubMedParser.rb b/parser/url/HubMedParser.rb new file mode 100644 index 0000000..b2d1287 --- /dev/null +++ b/parser/url/HubMedParser.rb @@ -0,0 +1,20 @@ +class HubMedParser < Memento::Parser::UrlParser + + PATTERNS = [ + Regexp.new('.*uids=([0-9]*)', Regexp::IGNORECASE) + ] + BASE_URL = "http://www.hubmed.org/export/bibtex.cgi?uids="; + + def get_citation_url + PATTERNS.each do |pattern| + match = pattern.match(@value) + return BASE_URL + match[1] if match and match.length == 2 + end + raise MementoException, "Error: Unable to find unique identifier" + end + + def get_citation_format + "bibtex" + end + +end \ No newline at end of file diff --git a/parser/url/IngentaConnectParser.rb b/parser/url/IngentaConnectParser.rb new file mode 100644 index 0000000..7eaafdd --- /dev/null +++ b/parser/url/IngentaConnectParser.rb @@ -0,0 +1,38 @@ +class IngentaConnectParser < Memento::Parser::UrlParser + PATTERNS = [ + Regexp.new('title="BibText Export" href="([^\"]*)"', Regexp::IGNORECASE), + Regexp.new('ingentaconnect.com[^/]*/(.*)', Regexp::IGNORECASE) + ] + BASE_URL = 'http://www.ingentaconnect.com' + + + protected function __getCitationUrl(){ + + //$pattern = '#(http://.*/content.*format=bib)#'; + //$content = Url::getPage($this->url, array(), $this->url); + //if(preg_match($pattern, $content, $matches)){ + // return $matches[1]; + //} + $content = Url::getPage($this->url); + if(preg_match("#title=\"BibText Export\" href=\"([^\"]*)\"#", $content, $matches)){ + return "http://www.ingentaconnect.com" . $matches[1]; + } + else if(preg_match("#ingentaconnect.com[^/]*/(.*)#", $this->url, $matches)){ + return "http://www.ingentaconnect.com/" . $matches[1] . '?format=bib'; + } + else + throw new ParserException("Unable to find bibtext link"); + + } + + def get_citation_url + PATTERNS.each do |pattern| + match = @value.match(pattern) + return BASE_URL + match[1] if match and match.length == 2 + end + end + + def get_citation_format + 'bibtex' + end +end \ No newline at end of file diff --git a/parser/url/LeoonlineParser.rb b/parser/url/LeoonlineParser.rb new file mode 100644 index 0000000..0a89f65 --- /dev/null +++ b/parser/url/LeoonlineParser.rb @@ -0,0 +1,41 @@ +#Bought by Taylor and + + +class LeoonlinePortal < Memento::Parser::UrlParser + PATTERNS = [ + Regexp.new('abs/(.*)', Regexp::IGNORECASE) + ] + BASE_URL = 'http://www.tandfonline.com/action/downloadCitation' + + + def get_data + Memento.get_page(@url) + return super + end + + def get_citation_url + BASE_URL + end + + def get_form_parameters + params = {'downloadFileName'=> 'tandf_rfse206_1', 'include' => 'abs', 'format' => 'bibtex', 'direct' => 'Download+article+metadata'} + + downloadFileName=tandf_rfse206_1&format=bibtex&direct=true&include=abs + + PATTERNS.each do |pattern| + match = @value.match(pattern) + if match and match.length == 2 + params['doi'] = match[1] + break + end + end + + raise MementoException, "Error: Unable to find DOI" unless params.has_key?('doi') + å + return params + end + + def get_citation_format + 'bibtex' + end +end \ No newline at end of file diff --git a/parser/url/PubmedParser.rb b/parser/url/PubmedParser.rb new file mode 100644 index 0000000..843d8be --- /dev/null +++ b/parser/url/PubmedParser.rb @@ -0,0 +1,21 @@ +class PubmedParser < Memento::Parser::UrlParser + PATTERNS = [ + Regexp.new('.*TermToSearch=([0-9]*)', Regexp::IGNORECASE), + Regexp.new('.*list_uids=([0-9]*)',Regexp::IGNORECASE), + Regexp.new('/pubmed/([0-9]*)/?', Regexp::IGNORECASE), + Regexp.new('uid=([0-9]*)', Regexp::IGNORECASE) + ] + BASE_URL = 'http://www.hubmed.org/export/bibtex.cgi?uids=' + + def get_citation_url + PATTERNS.each do |pattern| + match = @value.match(pattern) + return BASE_URL + match[1] if match and match.length == 2 + end + raise MementoException, "Error: Unable to find article identifier number" + end + + def get_citation_format + 'bibtex' + end +end \ No newline at end of file diff --git a/parser/url/SageParser.rb b/parser/url/SageParser.rb new file mode 100644 index 0000000..ca54687 --- /dev/null +++ b/parser/url/SageParser.rb @@ -0,0 +1,18 @@ +class ACMPortal < Memento::Parser::UrlParser + PATTERNS = [ + Regexp.new('http:\/\/([^\.]*).*[abstract|reprint]\/([\d\/]*)', Regexp::IGNORECASE) + ] + BASE_URL = 'http://online.sagepub.com/cgi/citmgr?type=bibtex&gca=sp' + + def get_citation_url + PATTERNS.each do |pattern| + match = @value.match(pattern) + return BASE_URL + match[1] + ';' + match[2] if match and match.length == 3 + end + raise MementoException, "Error: Unable to find unique identifier" + end + + def get_citation_format + 'bibtex' + end +end \ No newline at end of file diff --git a/test/ACMPortalParserTest.rb b/test/ACMPortalParserTest.rb new file mode 100644 index 0000000..ba0956c --- /dev/null +++ b/test/ACMPortalParserTest.rb @@ -0,0 +1,7 @@ +require_relative 'AbstractTest' + +class ACMPortalParserTest < AbstractTest + URLS = [ + 'http://dl.acm.org/citation.cfm?id=505168.505187&coll=DL&dl=GUIDE&CFID=74784343&CFTOKEN=20610835' + ] +end \ No newline at end of file diff --git a/test/ASMParserTest.rb b/test/ASMParserTest.rb new file mode 100644 index 0000000..cf8ae02 --- /dev/null +++ b/test/ASMParserTest.rb @@ -0,0 +1,14 @@ +require_relative 'AbstractTest' + +class ASMParserTest < AbstractTest + TEST_URL = [ + 'http://jvi.asm.org/content/85/23/12474.abstract' + ] + + def test_toOfficeXML + TEST_URL.each do |url| + result = Memento.transform('site', 'msofficexml', url) + assert_not_nil result, "Failed to retrieve result" + end + end +end \ No newline at end of file diff --git a/test/AbstractTest.rb b/test/AbstractTest.rb new file mode 100644 index 0000000..e334919 --- /dev/null +++ b/test/AbstractTest.rb @@ -0,0 +1,6 @@ +require 'test/unit' +require_relative "../Memento" + +class AbstractTest < Test::Unit::TestCase + +end diff --git a/test/ParserManagerTest.rb b/test/ParserManagerTest.rb new file mode 100644 index 0000000..cae783f --- /dev/null +++ b/test/ParserManagerTest.rb @@ -0,0 +1,15 @@ +require 'test/unit' +require_relative "../Memento" + +class ParserManagerTest < Test::Unit::TestCase + def test_bibtex + + ['bibtex','Bibtex','BIBTEX'].each do |format| + + parser = Memento::ParserManager.get_text_parser(format) + assert_equal BibTexParser, parser.class + end + end + + +end diff --git a/test/TextParserTest.rb b/test/TextParserTest.rb new file mode 100644 index 0000000..fde77d5 --- /dev/null +++ b/test/TextParserTest.rb @@ -0,0 +1,11 @@ +require 'test/unit' +require_relative "../Memento" + +class TextParserTest < Test::Unit::TestCase + def test_bibtex2office + bib = Memento.transform("bibtex","msofficexml",File.open("test/mybib.bib").read) + puts bib + end + + +end diff --git a/test/UrlParserTest.rb b/test/UrlParserTest.rb new file mode 100644 index 0000000..4805b9e --- /dev/null +++ b/test/UrlParserTest.rb @@ -0,0 +1,29 @@ +require_relative 'AbstractTest' + +class HubMedParserTest < AbstractTest + TEST_URL = [ + + # ACM PORTAL + 'http://dl.acm.org/citation.cfm?id=505168.505187&coll=DL&dl=GUIDE', + + # ASM + 'http://jvi.asm.org/content/85/23/12474.abstract', + + #Blackwell Synergey or Wiley + 'http://onlinelibrary.wiley.com/doi/10.1002/smr.509/abstract', + + #HUBMED + 'http://www.hubmed.org/display.cgi?uids=21809171', + + #PubMed - ncbi.nlm.nih.gov + 'http://www.ncbi.nlm.nih.gov/pubmed/22454401' + ] + + def test_officeXML + TEST_URL.each do |url| + result= Memento.transform('site', 'msofficexml', url) + puts result + assert_not_nil result, "Failed: #{url}" + end + end +end \ No newline at end of file diff --git a/test/mybib.bib b/test/mybib.bib new file mode 100644 index 0000000..caf49d2 --- /dev/null +++ b/test/mybib.bib @@ -0,0 +1,120 @@ +@InProceedings{ 2008hst..prop11557C, + author = "G. {Canalizo}", + title = "{The Nature of low-ionization BAL QSOs}", + booktitle = "HST Proposal", + year = 2008, + month = jul, + pages = "11557--+", + url = "http://adsabs.harvard.edu/abs/2008hst..prop11557C", + adsnote = "Provided by the SAO/NASA Astrophysics Data System" +} + +@InProceedings{ 2008AIPC.1053...63C, + author = "G. {Canalizo} and M. {Wold} and M. {Lazarova} and M. {Lacy}", + title = "{Quasar Black Hole Masses from Velocity Dispersions}", + keywords = "Quasars, Galactic nuclei, circumnuclear matter, and bulges, Solid solution hardening, precipitation hardening, and dispersion hardening, aging", + booktitle = "American Institute of Physics Conference Series", + year = 2008, + series = "American Institute of Physics Conference Series", + volume = 1053, + archivePrefix = "arXiv", + eprint = "0807.2433", + editor = "{S.~K.~Chakrabarti \& A.~S.~Majumdar}", + month = oct, + pages = "63--66", + doi = "10.1063/1.3009525", + url = "http://adsabs.harvard.edu/abs/2008AIPC.1053...63C", + adsnote = "Provided by the SAO/NASA Astrophysics Data System" +} + +@InProceedings{ 2008sptz.prop50792C, + author = "G. {Canalizo} and M. {Lacy} and M. {Lazarova}", + title = "{The nature of low-ionization BAL QSOs}", + booktitle = "Spitzer Proposal ID 50792", + year = 2008, + month = mar, + pages = "50792--+", + url = "http://adsabs.harvard.edu/abs/2008sptz.prop50792C", + adsnote = "Provided by the SAO/NASA Astrophysics Data System" +} + +@Article{ 2007ApJ...669..801C, + author = "G. {Canalizo} and N. {Bennert} and B. {Jungwiert} and A. {Stockton} and F. {Schweizer} and M. {Lacy} and C. {Peng}", + title = "{Spectacular Shells in the Host Galaxy of the QSO MC2 1635+119}", + journal = "ApJ", + archivePrefix = "arXiv", + eprint = "0707.2951", + keywords = "Galaxies: Active, Galaxies: Evolution, Galaxies: Interactions, Galaxies: Quasars: General, quasars: individual (MC2 1635+119)", + year = 2007, + month = nov, + volume = 669, + pages = "801--809", + doi = "10.1086/521721", + url = "http://adsabs.harvard.edu/abs/2007ApJ...669..801C", + adsnote = "Provided by the SAO/NASA Astrophysics Data System" +} +@ARTICLE{Bailey, + author = "D. H. Bailey and P. N. Swarztrauber", + title = "The fractional {F}ourier transform and applications", + journal = "SIAM Rev.", + volume = 33, + number = 3, + pages = "389--404", + year = 1991 + } + +@ARTICLE{Bay1, + author = "A. Bayliss and C. I. Goldstein and E. Turkel", + title = "An iterative method for the {H}elmholtz equation", + journal = "J. Comp. Phys.", + volume = 49, + pages = "443--457", + year = 1983 + } + +@TECHREPORT{Ernst, + author = "O. Ernst and G. Golub", + title = "A domain decomposition approach to solving the {H}elmholtz + equation with a radiation boundary condition", + number = "NA-92-08", + school = "Stanford University, Computer Science Department", + year = "August 1992" + } + +@TECHREPORT{Fujitsu, + organization = "Fujitsu", + title = "FACOM OS IV SSL II USER'S GUIDE, 99SP0050E5", + year = 1990 + } + +@ARTICLE{Gold3, + author = "C. I. Goldstein", + title = "Multigrid methods for elliptic problems in unbounded domains", + journal = "SIAM J. Numer. Anal.", + volume = 30, + pages = "159--183", + year = 1993 + } + +@BOOK{Hale, + author = "J. K. Hale", + title = "Theory of functional--differential equations", + publisher = "Springer--Verlag, Berlin--Heidelberg--New York", + year = 1977 + } + +@INBOOK{Swa82, + author = "P. N. Swarztrauber", + title = "Vectorizing the {FFTs}", + editor = "G.~Rodrigue", + booktitle = "Parallel Computations", + publisher = "Academic Press, New York", + year = 1982 + } + +@PHDTHESIS{Ta, + author = "S. Ta'asan", + title = "Multigrid Methods for Highly Oscillatory Problems", + school = "Weizmann Institute of Science, Rehovot, Israel", + year = "1984" + } \ No newline at end of file diff --git a/test/test.rb b/test/test.rb new file mode 100644 index 0000000..3b7e221 --- /dev/null +++ b/test/test.rb @@ -0,0 +1,5 @@ +require 'bibtex' +require '/Users/ragrawal/personal/Memento/rMemento/export/Bib2OfficeXML2' + +bib = BibTeX.open("mybib.bib") +puts Bib2OfficeXML2.new.export(bib) \ No newline at end of file diff --git a/writer/Bib2OfficeXML.rb.deprecated b/writer/Bib2OfficeXML.rb.deprecated new file mode 100644 index 0000000..6e605ff --- /dev/null +++ b/writer/Bib2OfficeXML.rb.deprecated @@ -0,0 +1,202 @@ +=begin + * @author Ritesh Agrawal + * @version 2.0 + * Takes Bibtex output and returns Office2007 XML String +=end + +class Bib2OfficeXML + + attr_reader :doc_types, :generic, :book , :book_section, :report, :misc, :article_in_preiodical, :conference_proceedings, :journal_article + + def initialize + @doc_types = { + 'article' => {'name' => 'JournalArticle', 'fields' => ['journal_article'] }, + 'book' => {'name' => 'Book', 'fields' => ['book']}, + 'booklet' => {'name' => 'Book', 'fields' => ['book']}, + 'conference'=> {'name' => 'ConferenceProceedings', 'fields' => ['conference_proceedings']}, + 'inbook' => {'name' => 'BookSection', 'fields' => ['book_section']}, + 'incollection' => {'name' => 'ArticleInAPeriodical', 'fields' => ['article_in_preiodical']}, + 'inproceedings' => {'name' => 'ConferenceProceedings', 'fields' => ['conference_proceedings']}, + 'manual' => {'name' => 'Report', 'fields' => ['report']}, + 'masterthesis' => {'name' => 'Report', 'fields' => ['report']}, + 'misc' => {'name' => 'Misc', 'fields' => ['Misc']}, + 'phdthesis' => {'name' => 'Report', 'fields' => ['report']}, + 'proceedings' => {'name' => 'ConferenceProceedings', 'fields' => ['conference_proceedings']}, + 'techreport' => {'name' => 'Report', 'fields' => ['report']}, + 'unpublished' => {'name' => 'Misc', 'fields' => ['misc']} + }.freeze + + @generic = { + 'id' => 'b:Tag', + 'title' => 'b:Title', + 'year' => 'b:Year' + }.freeze + + @book = { + 'author' => 'b:Author/b:Author/b:NameList', + 'place' => 'b:CountryRegion', + 'publisher' => 'b:Publisher', + 'editor' => 'b:Author/b:Editor/b:NameList', + 'volume' => 'b:Volume', + 'issn' => 'b:StandardNumber', + 'pages' => 'b:Pages' + }.freeze + + @book_section = { + 'author' => 'b:Author/b:Author/b:NameList', + 'sec_title'=> 'b:BookTitle', + 'pages' => 'b:Pages', + 'place' => 'b:CountryRegion', + 'publisher' => 'b:Publisher', + 'editor' => 'b:Author/b:Editor/b:NameList', + 'volume' => 'b:Volume', + 'issn' => 'b:StandardNumber' + }.freeze + + @journal_article = { + 'author' => 'b:Author/b:Author/b:NameList', + 'journal' => 'b:JournalName', + 'month' => 'b:Month', + 'day' => 'b:Day', + 'pages' => 'b:Pages', + 'editor' => 'b:Author/b:Editor/b:NameList', + 'publisher' => 'b:Publisher', + 'volume' => 'b:Volume', + 'issue' => 'b:Issue', + 'issn' => 'b:StandardNumber' + }.freeze + + @article_in_preiodical = { + 'author' => 'b:Author/b:Author/b:NameList', + 'journal' => 'b:PeriodicalTitle', + 'month' => 'b:Month', + 'day' => 'b:Day', + 'pages' => 'b:Pages', + 'editor' => 'b:Author/b:Editor/b:NameList', + 'publisher' => 'b:Publisher', + 'volume' => 'b:Volume', + 'issue' => 'b:Issue', + 'issn' => 'b:StandardNumber' + }.freeze + + @conference_proceedings = { + 'author' => 'b:Author/b:Author/b:NameList', + 'editor' => 'b:Author/b:Editor/b:NameList', + 'pages' => 'b:Pages', + 'journal' => 'b:ConferenceName', + 'place' => 'b:City', + 'publisher' => 'b:Publisher', + 'volume' => 'b:Volume', + 'issn' => 'b:StandardNumber' + }.freeze + + @report = { + 'author' => 'b:Author/b:Author/b:NameList', + 'publisher' => 'b:Publisher', + 'place' => 'b:City', + 'pages' => 'b:Pages', + 'doctype' => 'b:ThesisType', + 'issn' => 'b:StandardNumber' + }.freeze + + + @misc = { + 'author' => 'b:Author/b:Author/b:NameList', + 'sec_title' => 'b:PublicationTitle', + 'year' => 'b:Year', + 'month' => 'b:Month', + 'day' => 'b:Day', + 'place' => 'b:CountryRegion', + 'publisher' => 'b:Publisher', + 'editor' => 'b:Author/b:Editor/b:NameList', + 'pages' => 'b:Pages', + 'volume' => 'b:Volume', + 'issue' => 'b:Issue', + 'issn' => 'b:StandardNumber' + }.freeze + + end + + + # Function: EXPORT + # data: Bibtex parsed data - single entity + + def export(data) + + sources = '' + sources = sources + '' + + data.each do |record| + #sanity check + next if record.nil? or record.empty? + + type = @doc_types[record.type.to_s] + next if type.nil? or type.empty? + + source = '' + type["name"] + '' + + #Process Generic Fields + generic.each do |field, tag| + source = source + "<#{tag}>" + record.send(field).to_s + "" + end + + #Process specific fields + fields = type["fields"] + + fields.each do |field_for| + send(field_for).each do |field, tag| + unless record.respond_to?(field) + puts "missing #{field}" + next + end + if(field == "author" and record.author.length > 0 ) + source = source + process_authors(record.author) + elsif(field == "editor" ) + source = source + process_editors(record.editor) + else + source = source + "<#{tag}>" + record.send(field).to_s + "" + end + end #loop field_for + end #loop fields + source = source + '' + sources = sources + source + + end # loop data + sources = sources + ''; + return sources; + + end #function export + + private + def process_authors(authors) + return if authors.nil? or authors.empty? + source = '' + authors.each do |author| + source = source + author_tag(author) + end + source = source + '' + return source + end + + def process_editors(editors) + return if editors.nil? or editors.empty? + source = '' + editors.each do |author| + source = source + author_tag(author) + end + source = source + '' + return source + end + + def author_tag(author) + last, first = author.split(',', 2) + a = '' + last + '' + b = '' + first + '' + return '' + a + b + '' + end + + + +end + + \ No newline at end of file diff --git a/writer/OfficeXML.rb b/writer/OfficeXML.rb new file mode 100644 index 0000000..44949dd --- /dev/null +++ b/writer/OfficeXML.rb @@ -0,0 +1,107 @@ +class OfficeXML < Memento::Writer::AbstractWriter + def initialize + @FIELD_MAPPING = { + :id => 'b:Tag', + :day => 'b:Day', + :issn => 'b:StandardNumber', + :issue => 'b:Issue', + :journal => 'b:JournalName', + :month => 'b:Month', + :pages => 'b:Pages', + :pages => 'b:Pages', + :place => 'b:City', + :publisher => 'b:Publisher', + :booktitle => 'b:BookTitle', + :title => 'b:Title', + :volume => 'b:Volume', + :year => 'b:Year' + }.freeze + + @DOC_TYPES = { + :article => {:name => 'JournalArticle', :journal => 'b:JournalName'}, + :book => {:name => 'Book', :place => 'b:CountryRegion'}, + :booklet => {:name => 'Book', :place => 'b:CountryRegion'}, + :conference => {:name => 'ConferenceProceedings'}, + :inbook => {:name => 'BookSection', :place => 'b:CountryRegion'}, + :incollection => {:name => 'ArticleInAPeriodical', :journal => 'b:PeriodicalTitle'}, + :inproceedings => {:name => 'ConferenceProceedings'}, + :manual => {:name => 'Report'}, + :mastersthesis => {:name => 'Report'}, + :misc => {:name => 'Report'}, + :phdthesis => {:name => 'Report'}, + :proceedings => {:name => 'ConferenceProceedings'}, + :techreport => {:name => 'Report'}, + :unpublished => {:name => 'Misc', :booktitle => 'b:PublicationTitle', :place => 'b:CountryRegion'} + }.freeze + end + + def export(data) + + sources = '' + sources = sources + '' + + data.each do |record| + #sanity check + + next if record.nil? or record.empty? + + source = "" + fields = @FIELD_MAPPING.merge(@DOC_TYPES[record.type]) + + #fields that require custom handling + source = source + '' + fields[:name] + '' + source = source + "<#{fields[:id]}>" + record.id + "" + + source = source + process_editors(record.editor) if record.respond_to?("editor") + + + record.fields.each do |key, value| + xml_tag = fields[key] + next unless xml_tag + if key == :author + source = source + process_authors(value) + elsif key == :editor + source = source + process_editor(value) + else + source = source + "<#{xml_tag}>" + value + "" + end + end + + + source = source + '' + sources = sources + source + end + sources = sources + ''; + return sources; + end #function export + + private + def process_authors(authors) + return if authors.nil? or authors.empty? + source = '' + authors.each do |author| + source = source + author_tag(author) + end + source = source + '' + return source + end + + def process_editors(editors) + return if editors.nil? or editors.empty? + source = '' + editors.each do |author| + source = source + author_tag(author) + end + source = source + '' + return source + end + + def author_tag(author) + last, first = author.split(',', 2) + a = '' + last.to_s + '' + b = '' + first.to_s + '' + return '' + a + b + '' + end + + +end \ No newline at end of file diff --git a/writer/Writer.rb b/writer/Writer.rb new file mode 100644 index 0000000..820d927 --- /dev/null +++ b/writer/Writer.rb @@ -0,0 +1,11 @@ +module Memento + module Writer + + class AbstractWriter + def export(data) + raise MementoException, "calling abstract method: export" + end + end + + end +end diff --git a/writer/WriterManager.rb b/writer/WriterManager.rb new file mode 100644 index 0000000..cc98ff1 --- /dev/null +++ b/writer/WriterManager.rb @@ -0,0 +1,17 @@ +require_relative 'Writer' +require_relative 'OfficeXML' + +module Memento + module WriterManager + WRITERS = { + :msofficexml => {'name'=>'MS Office XML', 'parser'=>'OfficeXML'} + }.freeze + + def self.get_writer(format) + raise MementoException, 'Error: Missing required parameter: format' if format.nil? or format.empty? + info = WRITERS[format.to_s.downcase.strip.to_sym] + raise MementoException, 'Error: unsupported writer type: #{format}' if info.nil? or info.empty? + return Kernel.const_get(info['parser']).new + end + end +end \ No newline at end of file