Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

First update

  • Loading branch information...
commit afc595ea30ba3613767ea279e1957a44cfec5483 0 parents
@ragrawal authored
57 Memento.rb
@@ -0,0 +1,57 @@
+require 'curb'
+require 'cgi'
+
+require_relative 'MementoException'
+require_relative 'parser/ParserManager'
+require_relative 'writer/WriterManager'
+
+module Memento
+ def self.transform(input_format, output_format, value)
+ #Sanity Checks
+ raise MementoException, "Error: Missing required parameter: input_format" if input_format.nil? or input_format.empty?
+ raise MementoException, "Error: Missing required parameter: output_format" if output_format.nil? or output_format.empty?
+ raise MementoException, "Error: Missing required parameter: text" if value.nil? or value.empty?
+
+ #if input_format = 'site', then its a website and use UrlParser to get text
+ parser = nil
+ if ['site'].include?(input_format.downcase.strip)
+ parser = Memento::ParserManager.get_url_parser(value)
+ else
+ parser = Memento::ParserManager.get_text_parser(input_format)
+ end
+ raise MementoException, "Unable to find required parser" if parser.nil?
+
+ parser.value = value
+ data = parser.get_data
+
+ writer = Memento::WriterManager.get_writer(output_format)
+ return writer.export(data)
+ end
+
+ def self.get_page(url, parameters = {}, referer = nil)
+ c = Curl::Easy.new(url)
+ c.follow_location = true
+ c.header_in_body = false
+ c.useragent='Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5'
+ c.enable_cookies = true
+
+ if parameters and !parameters.empty?
+ c.http_post parameters.map{|k,v| "#{k}=#{CGI.escape(v)}"}.join('&')
+ end
+
+ #sometimes especially in the case pubmed url, c.perform
+ # falsely gives PartialFileError but successfully retrieves body
+ begin
+ c.perform
+ rescue
+
+ end
+ return c.body_str
+
+ end
+
+
+
+
+
+end
8 MementoException.rb
@@ -0,0 +1,8 @@
+# Note instead of subclassing from Exception, use StandardError because
+# StandardError deals with application level errors where as Exception deals with the
+# both application and environment level types of errors
+
+class MementoException < StandardError
+
+
+end
82 parser/Parser.rb
@@ -0,0 +1,82 @@
+module Memento
+ module Parser
+ class TextParser
+
+ attr_accessor :value
+
+ #constructor
+ def validate
+ raise MementoException, "Error: Missing required text" if text.nil? or text.empty?
+ end
+
+ # extract citation information form the input string
+ # and return an array of BibTexEntry object
+ def get_data
+ raise 'calling abstract method: get_data'
+ end
+
+ end
+
+ class UrlParser
+ attr_accessor :value
+
+
+ def validate
+ raise MementoException, 'Invalid Url' if @value.nil? or @value.empty?
+ end
+
+ #function: get_data
+ #@description: processes url and returns citation information as an array of BibTeX entries.
+ #
+ def get_data
+ validate
+ to = get_citation_url()
+ params = get_form_parameters
+ referrer = get_referrer()
+
+ citation = Memento.get_page(to, params, referrer)
+ raise MementoException, 'Error: Unable to fetch citation details' if citation.to_s.strip.empty?
+ puts citation
+ text_parser = Memento::ParserManager.get_text_parser(get_citation_format)
+ text_parser.value = citation
+ data = text_parser.get_data()
+
+ return data
+
+ end
+
+ protected
+ #If to fetch citation details requires filling form, then provide form parameters
+ def get_form_parameters
+ {}
+ end
+
+ def get_referrer
+ return @url
+ end
+
+ #============ ABSTRACT METHODS ============#
+ # Subclass of UrlParser will need to atleast implement these
+ # two functions
+ #==========================================
+
+ #returns Url from where the citation detalils can be fetched
+ def get_citation_url
+ raise MementoException, 'Called abstract method: get_citation_url'
+ end
+
+
+ # return the format of citation
+ def get_citation_format
+ raise MementoException, 'Called abstract method: get_citation_format'
+ end
+
+
+
+ end
+
+ end
+end
+
+
+
45 parser/ParserManager.rb
@@ -0,0 +1,45 @@
+require_relative 'Parser'
+
+#Test Parser
+require_relative 'text/BibTexParser'
+
+#Url Parser
+require_relative 'url/ASMParser'
+require_relative 'url/HubMedParser'
+require_relative 'url/ACMPortalParser'
+require_relative 'url/BlackwellSynergyParser'
+require_relative 'url/PubmedParser'
+
+
+module Memento
+ module ParserManager
+ TEXT_PARSER = {
+ 'bibtex' => {'name' => 'BibTeX', 'parser' => 'BibTexParser'}
+ }
+
+ WEBSITES = {
+ 'asm.org' => {'name' => 'ASM Journals', 'link' => 'http://journals.asm.org/', 'parser' => 'ASMParser'},
+ 'hubmed.org' => {'name' => 'Hubmed', 'link' => "http://www.hubmed.org", 'parser' => 'HubMedParser'},
+ 'dl.acm.org' => {'name' => 'ACM Digital Library', 'link' => 'http://dl.acm.org/', 'parser' => 'ACMPortalParser'},
+ 'ncbi.nlm.nih.gov' => {'name' => 'PubMed', 'link' => 'http://www.pubmed.gov', 'parser'=>'PubmedParser'},
+ 'onlinelibrary.wiley.com' => {'name' =>'Wiley Online Library', 'link' => "http://onlinelibrary.wiley.com", 'parser' => 'BlackwellSynergyParser'}
+
+ }
+
+ def self.get_text_parser(format)
+ raise MementoException, "Error: Missing require parameter: format" if format.nil? or format.empty?
+ info = TEXT_PARSER[format.to_s.downcase.strip]
+ raise MementoException, "Error: unsupported text format: #{format}" if info.nil?
+ Kernel.const_get(info['parser']).new
+ end
+
+ def self.get_url_parser(url)
+ raise MementoException, "Error: Missing required parameter url" if url.nil? or url.empty?
+ WEBSITES.each do |key, value|
+ return Kernel.const_get(value['parser']).new if url =~ /#{key}/
+ end
+ raise MementoException, "Error: Parsing is not supported for this website"
+ end #get_parser
+
+ end
+end
50 parser/text/AmazonXMLParser.rb
@@ -0,0 +1,50 @@
+require 'libxml'
+
+class AmazonXMLParser < Memento::Parser::UrlParser
+
+ def get_data
+ validate
+ doc = XML::Parser.string(@value)
+ item = doc.parse
+ item = item.root.find('./Items/Item')
+ if('Book' != item.attributes)
+
+ $item = $xml->Items->Item;
+ if('Book' != (string)$item->ItemAttributes->ProductGroup)
+ throw new Exception("Currently only books can be imported from Amazon");
+
+ $article['doctype'] = 'book';
+ $article['url'] = trim($item->DetailPageURL);
+ $article['title'] = trim($item->ItemAttributes->Title);
+ $article['publisher']=trim($item->ItemAttributes->Publisher);
+ $article['pages'] = trim($item->ItemAttributes->NumberOfPages);
+ $imgUrl = trim($item->SmallImage->URL);
+
+ if(!empty($imgUrl)){
+ $ch = curl_init($imgUrl);
+ $ext = strtolower(end(explode('.', $imgUrl)));
+ uses('neat_string');
+ $neat = new NeatString();
+ $filename = $neat->randomPassword(10) . '.' . $ext;
+ $fp = fopen(ARTICLE_ICON . $filename, 'w');
+ curl_setopt($ch, CURLOPT_FILE, $fp);
+ curl_setopt($ch, CURLOPT_HEADER, 0);
+ curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
+ curl_exec($ch);
+ curl_close($ch);
+ fclose($fp);
+ $article['img'] = ARTICLE_ICON_URL.$filename;
+ }
+
+
+
+ list($article['year'], $article['month'], $article['day']) = DateUtil::getCleanDate((string)$item->ItemAttributes->PublicationDate);
+
+ foreach($item->ItemAttributes->Author as $author)
+ $authors[] = (string) $author;
+
+ $data[0] = array('Article'=>$article, 'Author' => $authors);
+ return $data;
+ }
+ end
+end
7 parser/text/BibTexParser.rb
@@ -0,0 +1,7 @@
+require 'bibtex'
+
+class BibTexParser < Memento::Parser::TextParser
+ def get_data
+ return BibTeX.parse @value
+ end
+end
19 parser/url/ACMPortalParser.rb
@@ -0,0 +1,19 @@
+class ACMPortalParser < Memento::Parser::UrlParser
+ PATTERNS = [
+ Regexp.new('id=(\d*)', Regexp::IGNORECASE)
+ ]
+ BASE_URL = 'http://dl.acm.org/exportformats.cfm?expformat=bibtex&id='
+
+ def get_citation_url
+ validate
+ PATTERNS.each do |pattern|
+ m = @value.match(pattern)
+ return BASE_URL + m[1] if m and m.length == 2
+ end
+ raise MementoException, "Error: Unable to find citation information"
+ end
+
+ def get_citation_format
+ 'bibtex'
+ end
+end
24 parser/url/ASMParser.rb
@@ -0,0 +1,24 @@
+require 'uri'
+class ASMParser < Memento::Parser::UrlParser
+
+ PATTERNS = [
+ Regexp.new('http:\/\/(.*)\.asm\.org.*[abstract|full|reprint]\/(.*)\?', Regexp::IGNORECASE),
+ Regexp.new('http:\/\/(.*)\.asm\.org\/content\/(.*)\.[abstract|full]')
+ ].freeze
+ BASE_URL = 'http://DOMAIN.asm.org/citmgr?type=bibtex&gca=';
+
+ def get_citation_url
+ PATTERNS.each do |pattern|
+ match = @value.match(pattern)
+ return BASE_URL.gsub('DOMAIN', match[1]) + URI.escape("#{match[1]};#{match[2]}") if match and match.length == 3
+ end
+ raise MementoException, "Error: Unable to find link to bibtex"
+ end
+
+ def get_citation_format
+ "bibtex"
+ end
+
+
+
+end
44 parser/url/AmazonParser.rb
@@ -0,0 +1,44 @@
+class AmazonParser < Memento::Parser::UrlParser
+ @BASE_URL = 'http://ecs.amazonaws.com/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=1E6W7G64405195A1J702&Operation=ItemLookup&ResponseGroup=Medium&ItemId='
+
+ PATTERNS = [
+ Regexp.new('/gp/product/(\d*)', Regexp::IGNORECASE),
+ Regexp.new('/ASIN/(\d*)', Regexp::IGNORECASE),
+ Regexp.new('/dp/(\w*)', Regexp::IGNORECASE)
+ ].freeze
+
+ if(preg_match("#/gp/product/(\d*)#", $url, $matches))
+ return $matches[1];
+ if(preg_match("#/ASIN/(\d*)#", $url, $matches))
+ return $matches[1];
+ if(preg_match("#/dp/(\w*)#", $url, $matches))
+ return $matches[1];
+
+ def get_data
+
+ end
+
+ protected
+
+ def get_citation_url
+ asin = get_asin()
+ raise MementoException, 'Unable to get Amazon Standard Identification Number (ASIN).' if(asin == -1)
+ return @BASE_URL + asin
+ end
+
+ #TODO:
+ def get_asin
+ PATTERNS.each do |pattern|
+ matches = @value.match(pattern)
+ return matches[1] if matches and matches.length >= 2
+ end
+ end
+
+
+ def get_citation_format
+ raise MementoException, 'Called abstract method: get_citation_format'
+ end
+
+
+
+end
32 parser/url/BlackwellSynergyParser.rb
@@ -0,0 +1,32 @@
+class BlackwellSynergyParser < Memento::Parser::UrlParser
+
+ PATTERNS = [
+ Regexp.new('doi/(.*)/[abstract|full]', Regexp::IGNORECASE)
+ ]
+ BASE_URL = 'http://onlinelibrary.wiley.com/documentcitationdownloadformsubmit'
+
+
+ def get_form_parameters
+ params = {'hasAbstract' => 'CITATION_AND_ABSTRACT', 'fileFormat' => 'BIBTEX', 'submit' => 'Submit'}
+
+ PATTERNS.each do |pattern|
+ match = @value.match(pattern)
+ if match and match.length == 2
+ params['doi'] = match[1]
+ break
+ end
+ end
+
+ raise MementoException, "Error: Unable to find DOI" unless params.has_key?('doi')
+
+ return params
+ end
+
+ def get_citation_url
+ BASE_URL
+ end
+
+ def get_citation_format
+ 'bibtex'
+ end
+end
20 parser/url/HubMedParser.rb
@@ -0,0 +1,20 @@
+class HubMedParser < Memento::Parser::UrlParser
+
+ PATTERNS = [
+ Regexp.new('.*uids=([0-9]*)', Regexp::IGNORECASE)
+ ]
+ BASE_URL = "http://www.hubmed.org/export/bibtex.cgi?uids=";
+
+ def get_citation_url
+ PATTERNS.each do |pattern|
+ match = pattern.match(@value)
+ return BASE_URL + match[1] if match and match.length == 2
+ end
+ raise MementoException, "Error: Unable to find unique identifier"
+ end
+
+ def get_citation_format
+ "bibtex"
+ end
+
+end
38 parser/url/IngentaConnectParser.rb
@@ -0,0 +1,38 @@
+class IngentaConnectParser < Memento::Parser::UrlParser
+ PATTERNS = [
+ Regexp.new('title="BibText Export" href="([^\"]*)"', Regexp::IGNORECASE),
+ Regexp.new('ingentaconnect.com[^/]*/(.*)', Regexp::IGNORECASE)
+ ]
+ BASE_URL = 'http://www.ingentaconnect.com'
+
+
+ protected function __getCitationUrl(){
+
+ //$pattern = '#(http://.*/content.*format=bib)#';
+ //$content = Url::getPage($this->url, array(), $this->url);
+ //if(preg_match($pattern, $content, $matches)){
+ // return $matches[1];
+ //}
+ $content = Url::getPage($this->url);
+ if(preg_match("#title=\"BibText Export\" href=\"([^\"]*)\"#", $content, $matches)){
+ return "http://www.ingentaconnect.com" . $matches[1];
+ }
+ else if(preg_match("#ingentaconnect.com[^/]*/(.*)#", $this->url, $matches)){
+ return "http://www.ingentaconnect.com/" . $matches[1] . '?format=bib';
+ }
+ else
+ throw new ParserException("Unable to find bibtext link");
+
+ }
+
+ def get_citation_url
+ PATTERNS.each do |pattern|
+ match = @value.match(pattern)
+ return BASE_URL + match[1] if match and match.length == 2
+ end
+ end
+
+ def get_citation_format
+ 'bibtex'
+ end
+end
41 parser/url/LeoonlineParser.rb
@@ -0,0 +1,41 @@
+#Bought by Taylor and
+
+
+class LeoonlinePortal < Memento::Parser::UrlParser
+ PATTERNS = [
+ Regexp.new('abs/(.*)', Regexp::IGNORECASE)
+ ]
+ BASE_URL = 'http://www.tandfonline.com/action/downloadCitation'
+
+
+ def get_data
+ Memento.get_page(@url)
+ return super
+ end
+
+ def get_citation_url
+ BASE_URL
+ end
+
+ def get_form_parameters
+ params = {'downloadFileName'=> 'tandf_rfse206_1', 'include' => 'abs', 'format' => 'bibtex', 'direct' => 'Download+article+metadata'}
+
+ downloadFileName=tandf_rfse206_1&format=bibtex&direct=true&include=abs
+
+ PATTERNS.each do |pattern|
+ match = @value.match(pattern)
+ if match and match.length == 2
+ params['doi'] = match[1]
+ break
+ end
+ end
+
+ raise MementoException, "Error: Unable to find DOI" unless params.has_key?('doi')
+ å
+ return params
+ end
+
+ def get_citation_format
+ 'bibtex'
+ end
+end
21 parser/url/PubmedParser.rb
@@ -0,0 +1,21 @@
+class PubmedParser < Memento::Parser::UrlParser
+ PATTERNS = [
+ Regexp.new('.*TermToSearch=([0-9]*)', Regexp::IGNORECASE),
+ Regexp.new('.*list_uids=([0-9]*)',Regexp::IGNORECASE),
+ Regexp.new('/pubmed/([0-9]*)/?', Regexp::IGNORECASE),
+ Regexp.new('uid=([0-9]*)', Regexp::IGNORECASE)
+ ]
+ BASE_URL = 'http://www.hubmed.org/export/bibtex.cgi?uids='
+
+ def get_citation_url
+ PATTERNS.each do |pattern|
+ match = @value.match(pattern)
+ return BASE_URL + match[1] if match and match.length == 2
+ end
+ raise MementoException, "Error: Unable to find article identifier number"
+ end
+
+ def get_citation_format
+ 'bibtex'
+ end
+end
18 parser/url/SageParser.rb
@@ -0,0 +1,18 @@
+class ACMPortal < Memento::Parser::UrlParser
+ PATTERNS = [
+ Regexp.new('http:\/\/([^\.]*).*[abstract|reprint]\/([\d\/]*)', Regexp::IGNORECASE)
+ ]
+ BASE_URL = 'http://online.sagepub.com/cgi/citmgr?type=bibtex&gca=sp'
+
+ def get_citation_url
+ PATTERNS.each do |pattern|
+ match = @value.match(pattern)
+ return BASE_URL + match[1] + ';' + match[2] if match and match.length == 3
+ end
+ raise MementoException, "Error: Unable to find unique identifier"
+ end
+
+ def get_citation_format
+ 'bibtex'
+ end
+end
7 test/ACMPortalParserTest.rb
@@ -0,0 +1,7 @@
+require_relative 'AbstractTest'
+
+class ACMPortalParserTest < AbstractTest
+ URLS = [
+ 'http://dl.acm.org/citation.cfm?id=505168.505187&coll=DL&dl=GUIDE&CFID=74784343&CFTOKEN=20610835'
+ ]
+end
14 test/ASMParserTest.rb
@@ -0,0 +1,14 @@
+require_relative 'AbstractTest'
+
+class ASMParserTest < AbstractTest
+ TEST_URL = [
+ 'http://jvi.asm.org/content/85/23/12474.abstract'
+ ]
+
+ def test_toOfficeXML
+ TEST_URL.each do |url|
+ result = Memento.transform('site', 'msofficexml', url)
+ assert_not_nil result, "Failed to retrieve result"
+ end
+ end
+end
6 test/AbstractTest.rb
@@ -0,0 +1,6 @@
+require 'test/unit'
+require_relative "../Memento"
+
+class AbstractTest < Test::Unit::TestCase
+
+end
15 test/ParserManagerTest.rb
@@ -0,0 +1,15 @@
+require 'test/unit'
+require_relative "../Memento"
+
+class ParserManagerTest < Test::Unit::TestCase
+ def test_bibtex
+
+ ['bibtex','Bibtex','BIBTEX'].each do |format|
+
+ parser = Memento::ParserManager.get_text_parser(format)
+ assert_equal BibTexParser, parser.class
+ end
+ end
+
+
+end
11 test/TextParserTest.rb
@@ -0,0 +1,11 @@
+require 'test/unit'
+require_relative "../Memento"
+
+class TextParserTest < Test::Unit::TestCase
+ def test_bibtex2office
+ bib = Memento.transform("bibtex","msofficexml",File.open("test/mybib.bib").read)
+ puts bib
+ end
+
+
+end
29 test/UrlParserTest.rb
@@ -0,0 +1,29 @@
+require_relative 'AbstractTest'
+
+class HubMedParserTest < AbstractTest
+ TEST_URL = [
+
+ # ACM PORTAL
+ 'http://dl.acm.org/citation.cfm?id=505168.505187&coll=DL&dl=GUIDE',
+
+ # ASM
+ 'http://jvi.asm.org/content/85/23/12474.abstract',
+
+ #Blackwell Synergey or Wiley
+ 'http://onlinelibrary.wiley.com/doi/10.1002/smr.509/abstract',
+
+ #HUBMED
+ 'http://www.hubmed.org/display.cgi?uids=21809171',
+
+ #PubMed - ncbi.nlm.nih.gov
+ 'http://www.ncbi.nlm.nih.gov/pubmed/22454401'
+ ]
+
+ def test_officeXML
+ TEST_URL.each do |url|
+ result= Memento.transform('site', 'msofficexml', url)
+ puts result
+ assert_not_nil result, "Failed: #{url}"
+ end
+ end
+end
120 test/mybib.bib
@@ -0,0 +1,120 @@
+@InProceedings{ 2008hst..prop11557C,
+ author = "G. {Canalizo}",
+ title = "{The Nature of low-ionization BAL QSOs}",
+ booktitle = "HST Proposal",
+ year = 2008,
+ month = jul,
+ pages = "11557--+",
+ url = "http://adsabs.harvard.edu/abs/2008hst..prop11557C",
+ adsnote = "Provided by the SAO/NASA Astrophysics Data System"
+}
+
+@InProceedings{ 2008AIPC.1053...63C,
+ author = "G. {Canalizo} and M. {Wold} and M. {Lazarova} and M. {Lacy}",
+ title = "{Quasar Black Hole Masses from Velocity Dispersions}",
+ keywords = "Quasars, Galactic nuclei, circumnuclear matter, and bulges, Solid solution hardening, precipitation hardening, and dispersion hardening, aging",
+ booktitle = "American Institute of Physics Conference Series",
+ year = 2008,
+ series = "American Institute of Physics Conference Series",
+ volume = 1053,
+ archivePrefix = "arXiv",
+ eprint = "0807.2433",
+ editor = "{S.~K.~Chakrabarti \& A.~S.~Majumdar}",
+ month = oct,
+ pages = "63--66",
+ doi = "10.1063/1.3009525",
+ url = "http://adsabs.harvard.edu/abs/2008AIPC.1053...63C",
+ adsnote = "Provided by the SAO/NASA Astrophysics Data System"
+}
+
+@InProceedings{ 2008sptz.prop50792C,
+ author = "G. {Canalizo} and M. {Lacy} and M. {Lazarova}",
+ title = "{The nature of low-ionization BAL QSOs}",
+ booktitle = "Spitzer Proposal ID 50792",
+ year = 2008,
+ month = mar,
+ pages = "50792--+",
+ url = "http://adsabs.harvard.edu/abs/2008sptz.prop50792C",
+ adsnote = "Provided by the SAO/NASA Astrophysics Data System"
+}
+
+@Article{ 2007ApJ...669..801C,
+ author = "G. {Canalizo} and N. {Bennert} and B. {Jungwiert} and A. {Stockton} and F. {Schweizer} and M. {Lacy} and C. {Peng}",
+ title = "{Spectacular Shells in the Host Galaxy of the QSO MC2 1635+119}",
+ journal = "ApJ",
+ archivePrefix = "arXiv",
+ eprint = "0707.2951",
+ keywords = "Galaxies: Active, Galaxies: Evolution, Galaxies: Interactions, Galaxies: Quasars: General, quasars: individual (MC2 1635+119)",
+ year = 2007,
+ month = nov,
+ volume = 669,
+ pages = "801--809",
+ doi = "10.1086/521721",
+ url = "http://adsabs.harvard.edu/abs/2007ApJ...669..801C",
+ adsnote = "Provided by the SAO/NASA Astrophysics Data System"
+}
+@ARTICLE{Bailey,
+ author = "D. H. Bailey and P. N. Swarztrauber",
+ title = "The fractional {F}ourier transform and applications",
+ journal = "SIAM Rev.",
+ volume = 33,
+ number = 3,
+ pages = "389--404",
+ year = 1991
+ }
+
+@ARTICLE{Bay1,
+ author = "A. Bayliss and C. I. Goldstein and E. Turkel",
+ title = "An iterative method for the {H}elmholtz equation",
+ journal = "J. Comp. Phys.",
+ volume = 49,
+ pages = "443--457",
+ year = 1983
+ }
+
+@TECHREPORT{Ernst,
+ author = "O. Ernst and G. Golub",
+ title = "A domain decomposition approach to solving the {H}elmholtz
+ equation with a radiation boundary condition",
+ number = "NA-92-08",
+ school = "Stanford University, Computer Science Department",
+ year = "August 1992"
+ }
+
+@TECHREPORT{Fujitsu,
+ organization = "Fujitsu",
+ title = "FACOM OS IV SSL II USER'S GUIDE, 99SP0050E5",
+ year = 1990
+ }
+
+@ARTICLE{Gold3,
+ author = "C. I. Goldstein",
+ title = "Multigrid methods for elliptic problems in unbounded domains",
+ journal = "SIAM J. Numer. Anal.",
+ volume = 30,
+ pages = "159--183",
+ year = 1993
+ }
+
+@BOOK{Hale,
+ author = "J. K. Hale",
+ title = "Theory of functional--differential equations",
+ publisher = "Springer--Verlag, Berlin--Heidelberg--New York",
+ year = 1977
+ }
+
+@INBOOK{Swa82,
+ author = "P. N. Swarztrauber",
+ title = "Vectorizing the {FFTs}",
+ editor = "G.~Rodrigue",
+ booktitle = "Parallel Computations",
+ publisher = "Academic Press, New York",
+ year = 1982
+ }
+
+@PHDTHESIS{Ta,
+ author = "S. Ta'asan",
+ title = "Multigrid Methods for Highly Oscillatory Problems",
+ school = "Weizmann Institute of Science, Rehovot, Israel",
+ year = "1984"
+ }
5 test/test.rb
@@ -0,0 +1,5 @@
+require 'bibtex'
+require '/Users/ragrawal/personal/Memento/rMemento/export/Bib2OfficeXML2'
+
+bib = BibTeX.open("mybib.bib")
+puts Bib2OfficeXML2.new.export(bib)
202 writer/Bib2OfficeXML.rb.deprecated
@@ -0,0 +1,202 @@
+=begin
+ * @author Ritesh Agrawal
+ * @version 2.0
+ * Takes Bibtex output and returns Office2007 XML String
+=end
+
+class Bib2OfficeXML
+
+ attr_reader :doc_types, :generic, :book , :book_section, :report, :misc, :article_in_preiodical, :conference_proceedings, :journal_article
+
+ def initialize
+ @doc_types = {
+ 'article' => {'name' => 'JournalArticle', 'fields' => ['journal_article'] },
+ 'book' => {'name' => 'Book', 'fields' => ['book']},
+ 'booklet' => {'name' => 'Book', 'fields' => ['book']},
+ 'conference'=> {'name' => 'ConferenceProceedings', 'fields' => ['conference_proceedings']},
+ 'inbook' => {'name' => 'BookSection', 'fields' => ['book_section']},
+ 'incollection' => {'name' => 'ArticleInAPeriodical', 'fields' => ['article_in_preiodical']},
+ 'inproceedings' => {'name' => 'ConferenceProceedings', 'fields' => ['conference_proceedings']},
+ 'manual' => {'name' => 'Report', 'fields' => ['report']},
+ 'masterthesis' => {'name' => 'Report', 'fields' => ['report']},
+ 'misc' => {'name' => 'Misc', 'fields' => ['Misc']},
+ 'phdthesis' => {'name' => 'Report', 'fields' => ['report']},
+ 'proceedings' => {'name' => 'ConferenceProceedings', 'fields' => ['conference_proceedings']},
+ 'techreport' => {'name' => 'Report', 'fields' => ['report']},
+ 'unpublished' => {'name' => 'Misc', 'fields' => ['misc']}
+ }.freeze
+
+ @generic = {
+ 'id' => 'b:Tag',
+ 'title' => 'b:Title',
+ 'year' => 'b:Year'
+ }.freeze
+
+ @book = {
+ 'author' => 'b:Author/b:Author/b:NameList',
+ 'place' => 'b:CountryRegion',
+ 'publisher' => 'b:Publisher',
+ 'editor' => 'b:Author/b:Editor/b:NameList',
+ 'volume' => 'b:Volume',
+ 'issn' => 'b:StandardNumber',
+ 'pages' => 'b:Pages'
+ }.freeze
+
+ @book_section = {
+ 'author' => 'b:Author/b:Author/b:NameList',
+ 'sec_title'=> 'b:BookTitle',
+ 'pages' => 'b:Pages',
+ 'place' => 'b:CountryRegion',
+ 'publisher' => 'b:Publisher',
+ 'editor' => 'b:Author/b:Editor/b:NameList',
+ 'volume' => 'b:Volume',
+ 'issn' => 'b:StandardNumber'
+ }.freeze
+
+ @journal_article = {
+ 'author' => 'b:Author/b:Author/b:NameList',
+ 'journal' => 'b:JournalName',
+ 'month' => 'b:Month',
+ 'day' => 'b:Day',
+ 'pages' => 'b:Pages',
+ 'editor' => 'b:Author/b:Editor/b:NameList',
+ 'publisher' => 'b:Publisher',
+ 'volume' => 'b:Volume',
+ 'issue' => 'b:Issue',
+ 'issn' => 'b:StandardNumber'
+ }.freeze
+
+ @article_in_preiodical = {
+ 'author' => 'b:Author/b:Author/b:NameList',
+ 'journal' => 'b:PeriodicalTitle',
+ 'month' => 'b:Month',
+ 'day' => 'b:Day',
+ 'pages' => 'b:Pages',
+ 'editor' => 'b:Author/b:Editor/b:NameList',
+ 'publisher' => 'b:Publisher',
+ 'volume' => 'b:Volume',
+ 'issue' => 'b:Issue',
+ 'issn' => 'b:StandardNumber'
+ }.freeze
+
+ @conference_proceedings = {
+ 'author' => 'b:Author/b:Author/b:NameList',
+ 'editor' => 'b:Author/b:Editor/b:NameList',
+ 'pages' => 'b:Pages',
+ 'journal' => 'b:ConferenceName',
+ 'place' => 'b:City',
+ 'publisher' => 'b:Publisher',
+ 'volume' => 'b:Volume',
+ 'issn' => 'b:StandardNumber'
+ }.freeze
+
+ @report = {
+ 'author' => 'b:Author/b:Author/b:NameList',
+ 'publisher' => 'b:Publisher',
+ 'place' => 'b:City',
+ 'pages' => 'b:Pages',
+ 'doctype' => 'b:ThesisType',
+ 'issn' => 'b:StandardNumber'
+ }.freeze
+
+
+ @misc = {
+ 'author' => 'b:Author/b:Author/b:NameList',
+ 'sec_title' => 'b:PublicationTitle',
+ 'year' => 'b:Year',
+ 'month' => 'b:Month',
+ 'day' => 'b:Day',
+ 'place' => 'b:CountryRegion',
+ 'publisher' => 'b:Publisher',
+ 'editor' => 'b:Author/b:Editor/b:NameList',
+ 'pages' => 'b:Pages',
+ 'volume' => 'b:Volume',
+ 'issue' => 'b:Issue',
+ 'issn' => 'b:StandardNumber'
+ }.freeze
+
+ end
+
+
+ # Function: EXPORT
+ # data: Bibtex parsed data - single entity
+
+ def export(data)
+
+ sources = '<?xml version="1.0" encoding="UTF-8" ?>'
+ sources = sources + '<b:Sources SelectedStyle="" xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography">'
+
+ data.each do |record|
+ #sanity check
+ next if record.nil? or record.empty?
+
+ type = @doc_types[record.type.to_s]
+ next if type.nil? or type.empty?
+
+ source = '<b:SourceType>' + type["name"] + '</b:SourceType>'
+
+ #Process Generic Fields
+ generic.each do |field, tag|
+ source = source + "<#{tag}>" + record.send(field).to_s + "</#{tag}>"
+ end
+
+ #Process specific fields
+ fields = type["fields"]
+
+ fields.each do |field_for|
+ send(field_for).each do |field, tag|
+ unless record.respond_to?(field)
+ puts "missing #{field}"
+ next
+ end
+ if(field == "author" and record.author.length > 0 )
+ source = source + process_authors(record.author)
+ elsif(field == "editor" )
+ source = source + process_editors(record.editor)
+ else
+ source = source + "<#{tag}>" + record.send(field).to_s + "</#{tag}>"
+ end
+ end #loop field_for
+ end #loop fields
+ source = source + '</b:Source>'
+ sources = sources + source
+
+ end # loop data
+ sources = sources + '</b:Sources>';
+ return sources;
+
+ end #function export
+
+ private
+ def process_authors(authors)
+ return if authors.nil? or authors.empty?
+ source = '<b:Author><b:Author><b:NameList>'
+ authors.each do |author|
+ source = source + author_tag(author)
+ end
+ source = source + '</b:NameList></b:Author></b:Author>'
+ return source
+ end
+
+ def process_editors(editors)
+ return if editors.nil? or editors.empty?
+ source = '<b:Author><b:Editor><b:NameList>'
+ editors.each do |author|
+ source = source + author_tag(author)
+ end
+ source = source + '</b:NameList></b:Editor></b:Author>'
+ return source
+ end
+
+ def author_tag(author)
+ last, first = author.split(',', 2)
+ a = '<b:Last>' + last + '</b:Last>'
+ b = '<b:First>' + first + '</b:First>'
+ return '<b:Person>' + a + b + '</b:Person>'
+ end
+
+
+
+end
+
+
107 writer/OfficeXML.rb
@@ -0,0 +1,107 @@
+class OfficeXML < Memento::Writer::AbstractWriter
+ def initialize
+ @FIELD_MAPPING = {
+ :id => 'b:Tag',
+ :day => 'b:Day',
+ :issn => 'b:StandardNumber',
+ :issue => 'b:Issue',
+ :journal => 'b:JournalName',
+ :month => 'b:Month',
+ :pages => 'b:Pages',
+ :pages => 'b:Pages',
+ :place => 'b:City',
+ :publisher => 'b:Publisher',
+ :booktitle => 'b:BookTitle',
+ :title => 'b:Title',
+ :volume => 'b:Volume',
+ :year => 'b:Year'
+ }.freeze
+
+ @DOC_TYPES = {
+ :article => {:name => 'JournalArticle', :journal => 'b:JournalName'},
+ :book => {:name => 'Book', :place => 'b:CountryRegion'},
+ :booklet => {:name => 'Book', :place => 'b:CountryRegion'},
+ :conference => {:name => 'ConferenceProceedings'},
+ :inbook => {:name => 'BookSection', :place => 'b:CountryRegion'},
+ :incollection => {:name => 'ArticleInAPeriodical', :journal => 'b:PeriodicalTitle'},
+ :inproceedings => {:name => 'ConferenceProceedings'},
+ :manual => {:name => 'Report'},
+ :mastersthesis => {:name => 'Report'},
+ :misc => {:name => 'Report'},
+ :phdthesis => {:name => 'Report'},
+ :proceedings => {:name => 'ConferenceProceedings'},
+ :techreport => {:name => 'Report'},
+ :unpublished => {:name => 'Misc', :booktitle => 'b:PublicationTitle', :place => 'b:CountryRegion'}
+ }.freeze
+ end
+
+ def export(data)
+
+ sources = '<?xml version="1.0" encoding="UTF-8" ?>'
+ sources = sources + '<b:Sources SelectedStyle="" xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography">'
+
+ data.each do |record|
+ #sanity check
+
+ next if record.nil? or record.empty?
+
+ source = "<b:Source>"
+ fields = @FIELD_MAPPING.merge(@DOC_TYPES[record.type])
+
+ #fields that require custom handling
+ source = source + '<b:SourceType>' + fields[:name] + '</b:SourceType>'
+ source = source + "<#{fields[:id]}>" + record.id + "</#{fields[:id]}>"
+
+ source = source + process_editors(record.editor) if record.respond_to?("editor")
+
+
+ record.fields.each do |key, value|
+ xml_tag = fields[key]
+ next unless xml_tag
+ if key == :author
+ source = source + process_authors(value)
+ elsif key == :editor
+ source = source + process_editor(value)
+ else
+ source = source + "<#{xml_tag}>" + value + "</#{xml_tag}>"
+ end
+ end
+
+
+ source = source + '</b:Source>'
+ sources = sources + source
+ end
+ sources = sources + '</b:Sources>';
+ return sources;
+ end #function export
+
+ private
+ def process_authors(authors)
+ return if authors.nil? or authors.empty?
+ source = '<b:Author><b:Author><b:NameList>'
+ authors.each do |author|
+ source = source + author_tag(author)
+ end
+ source = source + '</b:NameList></b:Author></b:Author>'
+ return source
+ end
+
+ def process_editors(editors)
+ return if editors.nil? or editors.empty?
+ source = '<b:Author><b:Editor><b:NameList>'
+ editors.each do |author|
+ source = source + author_tag(author)
+ end
+ source = source + '</b:NameList></b:Editor></b:Author>'
+ return source
+ end
+
+ def author_tag(author)
+ last, first = author.split(',', 2)
+ a = '<b:Last>' + last.to_s + '</b:Last>'
+ b = '<b:First>' + first.to_s + '</b:First>'
+ return '<b:Person>' + a + b + '</b:Person>'
+ end
+
+
+end
11 writer/Writer.rb
@@ -0,0 +1,11 @@
+module Memento
+ module Writer
+
+ class AbstractWriter
+ def export(data)
+ raise MementoException, "calling abstract method: export"
+ end
+ end
+
+ end
+end
17 writer/WriterManager.rb
@@ -0,0 +1,17 @@
+require_relative 'Writer'
+require_relative 'OfficeXML'
+
+module Memento
+ module WriterManager
+ WRITERS = {
+ :msofficexml => {'name'=>'MS Office XML', 'parser'=>'OfficeXML'}
+ }.freeze
+
+ def self.get_writer(format)
+ raise MementoException, 'Error: Missing required parameter: format' if format.nil? or format.empty?
+ info = WRITERS[format.to_s.downcase.strip.to_sym]
+ raise MementoException, 'Error: unsupported writer type: #{format}' if info.nil? or info.empty?
+ return Kernel.const_get(info['parser']).new
+ end
+ end
+end
Please sign in to comment.
Something went wrong with that request. Please try again.