From afc595ea30ba3613767ea279e1957a44cfec5483 Mon Sep 17 00:00:00 2001
From: Ritesh Agrawal <ragrawal@gmail.com>
Date: Thu, 29 Mar 2012 18:17:02 -0700
Subject: [PATCH] First update

---
 Memento.rb                           |  57 ++++++++
 MementoException.rb                  |   8 ++
 parser/Parser.rb                     |  82 +++++++++++
 parser/ParserManager.rb              |  45 ++++++
 parser/text/AmazonXMLParser.rb       |  50 +++++++
 parser/text/BibTexParser.rb          |   7 +
 parser/url/ACMPortalParser.rb        |  19 +++
 parser/url/ASMParser.rb              |  24 ++++
 parser/url/AmazonParser.rb           |  44 ++++++
 parser/url/BlackwellSynergyParser.rb |  32 +++++
 parser/url/HubMedParser.rb           |  20 +++
 parser/url/IngentaConnectParser.rb   |  38 +++++
 parser/url/LeoonlineParser.rb        |  41 ++++++
 parser/url/PubmedParser.rb           |  21 +++
 parser/url/SageParser.rb             |  18 +++
 test/ACMPortalParserTest.rb          |   7 +
 test/ASMParserTest.rb                |  14 ++
 test/AbstractTest.rb                 |   6 +
 test/ParserManagerTest.rb            |  15 ++
 test/TextParserTest.rb               |  11 ++
 test/UrlParserTest.rb                |  29 ++++
 test/mybib.bib                       | 120 ++++++++++++++++
 test/test.rb                         |   5 +
 writer/Bib2OfficeXML.rb.deprecated   | 202 +++++++++++++++++++++++++++
 writer/OfficeXML.rb                  | 107 ++++++++++++++
 writer/Writer.rb                     |  11 ++
 writer/WriterManager.rb              |  17 +++
 27 files changed, 1050 insertions(+)
 create mode 100644 Memento.rb
 create mode 100644 MementoException.rb
 create mode 100644 parser/Parser.rb
 create mode 100644 parser/ParserManager.rb
 create mode 100644 parser/text/AmazonXMLParser.rb
 create mode 100644 parser/text/BibTexParser.rb
 create mode 100644 parser/url/ACMPortalParser.rb
 create mode 100644 parser/url/ASMParser.rb
 create mode 100644 parser/url/AmazonParser.rb
 create mode 100644 parser/url/BlackwellSynergyParser.rb
 create mode 100644 parser/url/HubMedParser.rb
 create mode 100644 parser/url/IngentaConnectParser.rb
 create mode 100644 parser/url/LeoonlineParser.rb
 create mode 100644 parser/url/PubmedParser.rb
 create mode 100644 parser/url/SageParser.rb
 create mode 100644 test/ACMPortalParserTest.rb
 create mode 100644 test/ASMParserTest.rb
 create mode 100644 test/AbstractTest.rb
 create mode 100644 test/ParserManagerTest.rb
 create mode 100644 test/TextParserTest.rb
 create mode 100644 test/UrlParserTest.rb
 create mode 100644 test/mybib.bib
 create mode 100644 test/test.rb
 create mode 100644 writer/Bib2OfficeXML.rb.deprecated
 create mode 100644 writer/OfficeXML.rb
 create mode 100644 writer/Writer.rb
 create mode 100644 writer/WriterManager.rb

diff --git a/Memento.rb b/Memento.rb
new file mode 100644
index 0000000..f072930
--- /dev/null
+++ b/Memento.rb
@@ -0,0 +1,57 @@
+require 'curb'
+require 'cgi'
+
+require_relative 'MementoException'
+require_relative 'parser/ParserManager'
+require_relative 'writer/WriterManager'
+
+module Memento
+  def self.transform(input_format, output_format, value)
+        #Sanity Checks
+        raise MementoException, "Error: Missing required parameter: input_format" if input_format.nil? or input_format.empty?
+        raise MementoException, "Error: Missing required parameter: output_format" if output_format.nil? or output_format.empty?
+        raise MementoException, "Error: Missing required parameter: text" if value.nil? or value.empty?
+      
+        #if input_format = 'site', then its a website and use UrlParser to get text
+        parser = nil
+        if ['site'].include?(input_format.downcase.strip)
+            parser = Memento::ParserManager.get_url_parser(value)
+        else
+            parser = Memento::ParserManager.get_text_parser(input_format)
+        end
+        raise MementoException, "Unable to find required parser" if parser.nil?
+        
+        parser.value = value
+        data = parser.get_data
+      
+        writer = Memento::WriterManager.get_writer(output_format)
+        return writer.export(data)
+  end
+  
+  def self.get_page(url, parameters = {}, referer = nil)
+ 		c = Curl::Easy.new(url)
+ 		c.follow_location = true
+    c.header_in_body = false
+    c.useragent='Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5'
+    c.enable_cookies = true
+    
+    if parameters and !parameters.empty?
+      c.http_post parameters.map{|k,v| "#{k}=#{CGI.escape(v)}"}.join('&')
+    end
+    
+    #sometimes especially in the case pubmed url, c.perform 
+    # falsely gives PartialFileError but successfully retrieves body
+    begin
+      c.perform
+    rescue
+      
+    end
+    return c.body_str
+    	
+ 	end
+ 	
+ 	
+  
+  
+  
+end
\ No newline at end of file
diff --git a/MementoException.rb b/MementoException.rb
new file mode 100644
index 0000000..992a6a8
--- /dev/null
+++ b/MementoException.rb
@@ -0,0 +1,8 @@
+# Note instead of subclassing from Exception, use StandardError because 
+# StandardError deals with application level errors where as Exception deals with the 
+# both application and environment level types of errors
+
+class MementoException < StandardError
+
+ 
+end
diff --git a/parser/Parser.rb b/parser/Parser.rb
new file mode 100644
index 0000000..16b7b05
--- /dev/null
+++ b/parser/Parser.rb
@@ -0,0 +1,82 @@
+module Memento
+  module Parser
+    class TextParser 
+
+      attr_accessor :value
+
+      #constructor
+      def validate
+        raise MementoException, "Error: Missing required text" if text.nil? or text.empty?
+      end
+
+      # extract citation information form the input string 
+      # and return an array of BibTexEntry object
+      def get_data
+        raise 'calling abstract method: get_data'
+      end
+
+    end
+    
+    class UrlParser 
+      attr_accessor :value
+
+
+      def validate
+        raise MementoException, 'Invalid Url' if @value.nil? or @value.empty?
+      end
+
+      #function: get_data
+      #@description: processes url and returns citation information as an array of BibTeX entries.
+      #
+      def get_data
+        validate
+        to = get_citation_url()
+        params = get_form_parameters
+        referrer = get_referrer()
+
+        citation = Memento.get_page(to, params, referrer)
+        raise MementoException, 'Error: Unable to fetch citation details' if citation.to_s.strip.empty?
+        puts citation
+        text_parser = Memento::ParserManager.get_text_parser(get_citation_format)
+        text_parser.value = citation
+        data = text_parser.get_data()
+
+        return data
+
+      end
+
+      protected
+       	#If to fetch citation details requires filling form, then provide form parameters
+       	def get_form_parameters
+       	  {}
+        end
+      
+        def get_referrer
+      	  return @url
+      	end
+
+        #============ ABSTRACT METHODS ============#
+        # Subclass of UrlParser will need to atleast implement these 
+        # two functions
+        #==========================================
+
+     	  #returns Url from where the citation detalils can be fetched
+       	def get_citation_url
+       	  raise MementoException, 'Called abstract method: get_citation_url'
+       	end
+
+
+       	 # return the format of citation 	
+       	def get_citation_format
+       	  raise MementoException, 'Called abstract method: get_citation_format'
+       	end
+
+
+
+    end
+    
+  end
+end
+
+
+
diff --git a/parser/ParserManager.rb b/parser/ParserManager.rb
new file mode 100644
index 0000000..ac687f2
--- /dev/null
+++ b/parser/ParserManager.rb
@@ -0,0 +1,45 @@
+require_relative 'Parser'
+
+#Test Parser
+require_relative 'text/BibTexParser'
+
+#Url Parser
+require_relative 'url/ASMParser'
+require_relative 'url/HubMedParser'
+require_relative 'url/ACMPortalParser'
+require_relative 'url/BlackwellSynergyParser'
+require_relative 'url/PubmedParser'
+
+
+module Memento
+  module ParserManager
+      TEXT_PARSER = {
+        'bibtex' => {'name' => 'BibTeX', 'parser' => 'BibTexParser'}
+      }
+      
+      WEBSITES = {
+        'asm.org' => {'name' => 'ASM Journals', 'link' => 'http://journals.asm.org/', 'parser' => 'ASMParser'},
+        'hubmed.org' => {'name' => 'Hubmed', 'link' => "http://www.hubmed.org", 'parser' => 'HubMedParser'},
+        'dl.acm.org' => {'name' => 'ACM Digital Library', 'link' => 'http://dl.acm.org/', 'parser' => 'ACMPortalParser'},
+        'ncbi.nlm.nih.gov' => {'name' => 'PubMed', 'link' => 'http://www.pubmed.gov', 'parser'=>'PubmedParser'},
+        'onlinelibrary.wiley.com' => {'name' =>'Wiley Online Library', 'link' => "http://onlinelibrary.wiley.com", 'parser' => 'BlackwellSynergyParser'}
+        
+      }
+
+      def self.get_text_parser(format)
+     		raise MementoException, "Error: Missing require parameter: format" if format.nil? or format.empty?
+     		info = TEXT_PARSER[format.to_s.downcase.strip]
+     		raise MementoException, "Error: unsupported text format: #{format}" if info.nil?
+     		Kernel.const_get(info['parser']).new
+      end
+      
+      def self.get_url_parser(url)
+        raise MementoException, "Error: Missing required parameter url" if url.nil? or url.empty?
+        WEBSITES.each do |key, value|
+          return Kernel.const_get(value['parser']).new if url =~ /#{key}/
+        end  
+        raise MementoException, "Error: Parsing is not supported for this website"
+      end #get_parser
+
+  end
+end
\ No newline at end of file
diff --git a/parser/text/AmazonXMLParser.rb b/parser/text/AmazonXMLParser.rb
new file mode 100644
index 0000000..e7d434c
--- /dev/null
+++ b/parser/text/AmazonXMLParser.rb
@@ -0,0 +1,50 @@
+require 'libxml'
+
+class AmazonXMLParser < Memento::Parser::UrlParser
+  
+  def get_data
+      validate
+      doc = XML::Parser.string(@value)
+      item = doc.parse
+      item = item.root.find('./Items/Item')
+      if('Book' != item.attributes)
+          
+  		$item = $xml->Items->Item;
+  		if('Book' != (string)$item->ItemAttributes->ProductGroup)
+  			throw new Exception("Currently only books can be imported from Amazon");
+
+  		$article['doctype'] = 'book';
+  		$article['url'] = trim($item->DetailPageURL);
+  		$article['title'] = trim($item->ItemAttributes->Title);
+  		$article['publisher']=trim($item->ItemAttributes->Publisher);
+  		$article['pages'] = trim($item->ItemAttributes->NumberOfPages);
+  		$imgUrl = trim($item->SmallImage->URL);
+
+  		if(!empty($imgUrl)){
+  			$ch = curl_init($imgUrl);
+  			$ext = strtolower(end(explode('.', $imgUrl)));
+  			uses('neat_string');
+  			$neat = new NeatString();
+  			$filename = $neat->randomPassword(10) . '.' . $ext;
+  			$fp = fopen(ARTICLE_ICON . $filename, 'w');
+  			curl_setopt($ch, CURLOPT_FILE, $fp);
+  			curl_setopt($ch, CURLOPT_HEADER, 0);
+  			curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
+  			curl_exec($ch);
+  			curl_close($ch);
+  			fclose($fp);
+  			$article['img'] = ARTICLE_ICON_URL.$filename;
+  		}
+
+
+
+  		list($article['year'], $article['month'], $article['day']) = DateUtil::getCleanDate((string)$item->ItemAttributes->PublicationDate);
+
+  		foreach($item->ItemAttributes->Author as $author)
+  			$authors[] = (string) $author;
+
+  		$data[0] = array('Article'=>$article, 'Author' => $authors);
+  		return $data;
+  	}
+  end
+end
\ No newline at end of file
diff --git a/parser/text/BibTexParser.rb b/parser/text/BibTexParser.rb
new file mode 100644
index 0000000..d50ab0c
--- /dev/null
+++ b/parser/text/BibTexParser.rb
@@ -0,0 +1,7 @@
+require 'bibtex'
+
+class BibTexParser < Memento::Parser::TextParser
+  def get_data
+    return BibTeX.parse @value
+  end
+end
diff --git a/parser/url/ACMPortalParser.rb b/parser/url/ACMPortalParser.rb
new file mode 100644
index 0000000..316c61d
--- /dev/null
+++ b/parser/url/ACMPortalParser.rb
@@ -0,0 +1,19 @@
+class ACMPortalParser < Memento::Parser::UrlParser
+  PATTERNS = [
+    Regexp.new('id=(\d*)', Regexp::IGNORECASE)
+  ]
+  BASE_URL = 'http://dl.acm.org/exportformats.cfm?expformat=bibtex&id='
+  
+  def get_citation_url
+    validate
+    PATTERNS.each do |pattern|
+      m = @value.match(pattern)
+      return BASE_URL + m[1] if m and m.length == 2
+    end
+    raise MementoException, "Error: Unable to find citation information" 
+  end
+  
+  def get_citation_format
+    'bibtex'
+  end
+end
\ No newline at end of file
diff --git a/parser/url/ASMParser.rb b/parser/url/ASMParser.rb
new file mode 100644
index 0000000..2b5f76e
--- /dev/null
+++ b/parser/url/ASMParser.rb
@@ -0,0 +1,24 @@
+require 'uri'
+class ASMParser < Memento::Parser::UrlParser
+ 	
+ 	PATTERNS = [
+ 	  Regexp.new('http:\/\/(.*)\.asm\.org.*[abstract|full|reprint]\/(.*)\?', Regexp::IGNORECASE),
+ 	  Regexp.new('http:\/\/(.*)\.asm\.org\/content\/(.*)\.[abstract|full]')
+ 	].freeze
+ 	BASE_URL = 'http://DOMAIN.asm.org/citmgr?type=bibtex&gca=';
+  
+  def get_citation_url
+    PATTERNS.each do |pattern|
+      match = @value.match(pattern)
+      return BASE_URL.gsub('DOMAIN', match[1]) + URI.escape("#{match[1]};#{match[2]}") if match and match.length == 3
+    end
+    raise MementoException, "Error: Unable to find link to bibtex"
+  end
+  
+  def get_citation_format
+    "bibtex"
+  end
+  
+  
+    
+end
\ No newline at end of file
diff --git a/parser/url/AmazonParser.rb b/parser/url/AmazonParser.rb
new file mode 100644
index 0000000..f571909
--- /dev/null
+++ b/parser/url/AmazonParser.rb
@@ -0,0 +1,44 @@
+class AmazonParser < Memento::Parser::UrlParser
+    @BASE_URL = 'http://ecs.amazonaws.com/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=1E6W7G64405195A1J702&Operation=ItemLookup&ResponseGroup=Medium&ItemId='
+    
+    PATTERNS = [
+        Regexp.new('/gp/product/(\d*)', Regexp::IGNORECASE),
+        Regexp.new('/ASIN/(\d*)', Regexp::IGNORECASE),
+        Regexp.new('/dp/(\w*)', Regexp::IGNORECASE)
+      ].freeze
+      
+      if(preg_match("#/gp/product/(\d*)#", $url, $matches))
+   			return $matches[1];
+   		if(preg_match("#/ASIN/(\d*)#", $url, $matches))
+   			return $matches[1];
+   		if(preg_match("#/dp/(\w*)#", $url, $matches))
+   			return $matches[1];
+   		
+   	def get_data
+   	  
+   	end
+    
+    protected
+    
+      def get_citation_url
+        asin = get_asin()
+        raise MementoException, 'Unable to get Amazon Standard Identification Number (ASIN).' if(asin == -1) 
+        return @BASE_URL + asin
+      end
+      
+      #TODO: 
+      def get_asin
+        PATTERNS.each do |pattern|
+          matches = @value.match(pattern)
+          return matches[1] if matches and matches.length >= 2
+        end
+      end
+      
+
+     	def get_citation_format
+     	  raise MementoException, 'Called abstract method: get_citation_format'
+     	end
+      
+      
+        
+end
\ No newline at end of file
diff --git a/parser/url/BlackwellSynergyParser.rb b/parser/url/BlackwellSynergyParser.rb
new file mode 100644
index 0000000..8f74679
--- /dev/null
+++ b/parser/url/BlackwellSynergyParser.rb
@@ -0,0 +1,32 @@
+class BlackwellSynergyParser < Memento::Parser::UrlParser
+  
+  PATTERNS = [
+    Regexp.new('doi/(.*)/[abstract|full]', Regexp::IGNORECASE)
+  ]
+  BASE_URL = 'http://onlinelibrary.wiley.com/documentcitationdownloadformsubmit'
+  
+  
+ 	def get_form_parameters
+ 	  params = {'hasAbstract' => 'CITATION_AND_ABSTRACT', 'fileFormat' => 'BIBTEX', 'submit' => 'Submit'}
+ 	  
+ 	  PATTERNS.each do |pattern|
+      match = @value.match(pattern)
+      if match and match.length == 2
+        params['doi'] = match[1]
+        break
+      end
+    end
+    
+    raise MementoException, "Error: Unable to find DOI" unless params.has_key?('doi')
+    
+    return params
+ 	end
+  
+  def get_citation_url
+    BASE_URL
+  end
+  
+  def get_citation_format
+    'bibtex'
+  end
+end
\ No newline at end of file
diff --git a/parser/url/HubMedParser.rb b/parser/url/HubMedParser.rb
new file mode 100644
index 0000000..b2d1287
--- /dev/null
+++ b/parser/url/HubMedParser.rb
@@ -0,0 +1,20 @@
+class HubMedParser < Memento::Parser::UrlParser
+  
+  PATTERNS = [ 
+    Regexp.new('.*uids=([0-9]*)', Regexp::IGNORECASE)
+  ]
+  BASE_URL = "http://www.hubmed.org/export/bibtex.cgi?uids=";
+ 	
+ 	def get_citation_url
+ 	  PATTERNS.each do |pattern|
+ 	    match = pattern.match(@value)
+ 	    return BASE_URL + match[1] if match and match.length == 2
+ 	  end	
+ 	  raise MementoException, "Error: Unable to find unique identifier"
+ 	end
+ 	
+ 	def get_citation_format
+ 	  "bibtex"
+ 	end
+ 	
+end
\ No newline at end of file
diff --git a/parser/url/IngentaConnectParser.rb b/parser/url/IngentaConnectParser.rb
new file mode 100644
index 0000000..7eaafdd
--- /dev/null
+++ b/parser/url/IngentaConnectParser.rb
@@ -0,0 +1,38 @@
+class IngentaConnectParser < Memento::Parser::UrlParser
+  PATTERNS = [
+    Regexp.new('title="BibText Export" href="([^\"]*)"', Regexp::IGNORECASE),
+    Regexp.new('ingentaconnect.com[^/]*/(.*)', Regexp::IGNORECASE)
+  ]
+  BASE_URL = 'http://www.ingentaconnect.com'
+  
+  
+  protected function __getCitationUrl(){
+ 		
+		//$pattern =  '#(http://.*/content.*format=bib)#';
+		//$content = Url::getPage($this->url, array(), $this->url);
+ 		//if(preg_match($pattern, $content, $matches)){
+		//	return $matches[1];
+		//}
+		$content = Url::getPage($this->url);
+		if(preg_match("#title=\"BibText Export\" href=\"([^\"]*)\"#", $content, $matches)){
+			return "http://www.ingentaconnect.com" . $matches[1];
+		}
+		else if(preg_match("#ingentaconnect.com[^/]*/(.*)#", $this->url, $matches)){
+			return "http://www.ingentaconnect.com/" . $matches[1] . '?format=bib';
+		}
+		else
+ 			throw new ParserException("Unable to find bibtext link");
+
+ 	}
+  
+  def get_citation_url
+    PATTERNS.each do |pattern|
+      match = @value.match(pattern)
+      return BASE_URL + match[1] if match and match.length == 2
+    end
+  end
+  
+  def get_citation_format
+    'bibtex'
+  end
+end
\ No newline at end of file
diff --git a/parser/url/LeoonlineParser.rb b/parser/url/LeoonlineParser.rb
new file mode 100644
index 0000000..0a89f65
--- /dev/null
+++ b/parser/url/LeoonlineParser.rb
@@ -0,0 +1,41 @@
+#Bought by Taylor and 
+
+
+class LeoonlinePortal < Memento::Parser::UrlParser
+  PATTERNS = [
+    Regexp.new('abs/(.*)', Regexp::IGNORECASE)
+  ]
+  BASE_URL = 'http://www.tandfonline.com/action/downloadCitation'
+  
+  
+  def get_data
+    Memento.get_page(@url)
+    return super
+  end
+  
+  def get_citation_url
+    BASE_URL
+  end
+  
+  def get_form_parameters
+    params = {'downloadFileName'=> 'tandf_rfse206_1', 'include' => 'abs', 'format' => 'bibtex', 'direct' => 'Download+article+metadata'}
+    
+    downloadFileName=tandf_rfse206_1&format=bibtex&direct=true&include=abs
+		
+		PATTERNS.each do |pattern|
+      match = @value.match(pattern)
+      if match and match.length == 2
+        params['doi'] = match[1]
+        break
+      end   
+    end
+    
+    raise MementoException, "Error: Unable to find DOI" unless params.has_key?('doi')
+    å
+    return params
+  end 	
+  
+  def get_citation_format
+    'bibtex'
+  end
+end
\ No newline at end of file
diff --git a/parser/url/PubmedParser.rb b/parser/url/PubmedParser.rb
new file mode 100644
index 0000000..843d8be
--- /dev/null
+++ b/parser/url/PubmedParser.rb
@@ -0,0 +1,21 @@
+class PubmedParser < Memento::Parser::UrlParser
+  PATTERNS = [
+    Regexp.new('.*TermToSearch=([0-9]*)', Regexp::IGNORECASE),
+    Regexp.new('.*list_uids=([0-9]*)',Regexp::IGNORECASE),
+    Regexp.new('/pubmed/([0-9]*)/?', Regexp::IGNORECASE),
+    Regexp.new('uid=([0-9]*)', Regexp::IGNORECASE)
+  ]
+  BASE_URL = 'http://www.hubmed.org/export/bibtex.cgi?uids='
+    
+  def get_citation_url
+    PATTERNS.each do |pattern|
+      match = @value.match(pattern)
+      return BASE_URL + match[1] if match and match.length == 2
+    end
+    raise MementoException, "Error: Unable to find article identifier number"
+  end
+  
+  def get_citation_format
+    'bibtex'
+  end
+end
\ No newline at end of file
diff --git a/parser/url/SageParser.rb b/parser/url/SageParser.rb
new file mode 100644
index 0000000..ca54687
--- /dev/null
+++ b/parser/url/SageParser.rb
@@ -0,0 +1,18 @@
+class ACMPortal < Memento::Parser::UrlParser
+  PATTERNS = [
+    Regexp.new('http:\/\/([^\.]*).*[abstract|reprint]\/([\d\/]*)', Regexp::IGNORECASE)
+  ]
+  BASE_URL = 'http://online.sagepub.com/cgi/citmgr?type=bibtex&gca=sp'
+  
+  def get_citation_url
+    PATTERNS.each do |pattern|
+      match = @value.match(pattern)
+      return BASE_URL + match[1] + ';' + match[2] if match and match.length == 3
+    end
+    raise MementoException, "Error: Unable to find unique identifier"
+  end
+  
+  def get_citation_format
+    'bibtex'
+  end
+end
\ No newline at end of file
diff --git a/test/ACMPortalParserTest.rb b/test/ACMPortalParserTest.rb
new file mode 100644
index 0000000..ba0956c
--- /dev/null
+++ b/test/ACMPortalParserTest.rb
@@ -0,0 +1,7 @@
+require_relative 'AbstractTest'
+
+class ACMPortalParserTest < AbstractTest
+  URLS = [
+  'http://dl.acm.org/citation.cfm?id=505168.505187&coll=DL&dl=GUIDE&CFID=74784343&CFTOKEN=20610835'
+  ]
+end
\ No newline at end of file
diff --git a/test/ASMParserTest.rb b/test/ASMParserTest.rb
new file mode 100644
index 0000000..cf8ae02
--- /dev/null
+++ b/test/ASMParserTest.rb
@@ -0,0 +1,14 @@
+require_relative 'AbstractTest'
+
+class ASMParserTest < AbstractTest
+  TEST_URL = [
+    'http://jvi.asm.org/content/85/23/12474.abstract'
+  ]
+  
+  def test_toOfficeXML
+    TEST_URL.each do |url|
+      result = Memento.transform('site', 'msofficexml', url)
+      assert_not_nil result, "Failed to retrieve result"
+    end
+  end
+end
\ No newline at end of file
diff --git a/test/AbstractTest.rb b/test/AbstractTest.rb
new file mode 100644
index 0000000..e334919
--- /dev/null
+++ b/test/AbstractTest.rb
@@ -0,0 +1,6 @@
+require 'test/unit'
+require_relative "../Memento"
+
+class AbstractTest < Test::Unit::TestCase
+  
+end
diff --git a/test/ParserManagerTest.rb b/test/ParserManagerTest.rb
new file mode 100644
index 0000000..cae783f
--- /dev/null
+++ b/test/ParserManagerTest.rb
@@ -0,0 +1,15 @@
+require 'test/unit'
+require_relative "../Memento"
+
+class ParserManagerTest < Test::Unit::TestCase
+  def test_bibtex
+
+    ['bibtex','Bibtex','BIBTEX'].each do |format|
+      
+      parser = Memento::ParserManager.get_text_parser(format)
+      assert_equal BibTexParser, parser.class
+    end
+  end
+  
+  
+end
diff --git a/test/TextParserTest.rb b/test/TextParserTest.rb
new file mode 100644
index 0000000..fde77d5
--- /dev/null
+++ b/test/TextParserTest.rb
@@ -0,0 +1,11 @@
+require 'test/unit'
+require_relative "../Memento"
+
+class TextParserTest < Test::Unit::TestCase
+  def test_bibtex2office
+    bib = Memento.transform("bibtex","msofficexml",File.open("test/mybib.bib").read)
+    puts bib
+  end
+  
+  
+end
diff --git a/test/UrlParserTest.rb b/test/UrlParserTest.rb
new file mode 100644
index 0000000..4805b9e
--- /dev/null
+++ b/test/UrlParserTest.rb
@@ -0,0 +1,29 @@
+require_relative 'AbstractTest'
+
+class HubMedParserTest < AbstractTest
+  TEST_URL = [
+  
+    # ACM PORTAL
+    'http://dl.acm.org/citation.cfm?id=505168.505187&coll=DL&dl=GUIDE',
+   
+    # ASM
+    'http://jvi.asm.org/content/85/23/12474.abstract',
+
+    #Blackwell Synergey or Wiley
+    'http://onlinelibrary.wiley.com/doi/10.1002/smr.509/abstract',
+
+    #HUBMED
+    'http://www.hubmed.org/display.cgi?uids=21809171',
+  
+    #PubMed - ncbi.nlm.nih.gov
+    'http://www.ncbi.nlm.nih.gov/pubmed/22454401'
+  ]
+  
+  def test_officeXML
+    TEST_URL.each do |url|
+      result= Memento.transform('site', 'msofficexml', url)
+      puts result
+      assert_not_nil result, "Failed: #{url}"
+    end
+  end
+end
\ No newline at end of file
diff --git a/test/mybib.bib b/test/mybib.bib
new file mode 100644
index 0000000..caf49d2
--- /dev/null
+++ b/test/mybib.bib
@@ -0,0 +1,120 @@
+@InProceedings{ 2008hst..prop11557C,
+	author = "G. {Canalizo}",
+	title = "{The Nature of low-ionization BAL QSOs}",
+	booktitle = "HST Proposal",
+	year = 2008,
+	month = jul,
+	pages = "11557--+",
+	url = "http://adsabs.harvard.edu/abs/2008hst..prop11557C",
+	adsnote = "Provided by the SAO/NASA Astrophysics Data System"
+}
+ 
+@InProceedings{ 2008AIPC.1053...63C,
+	author = "G. {Canalizo} and M. {Wold} and M. {Lazarova} and M. {Lacy}",
+	title = "{Quasar Black Hole Masses from Velocity Dispersions}",
+	keywords = "Quasars, Galactic nuclei, circumnuclear matter, and bulges, Solid solution hardening, precipitation hardening, and dispersion hardening, aging",
+	booktitle = "American Institute of Physics Conference Series",
+	year = 2008,
+	series = "American Institute of Physics Conference Series",
+	volume = 1053,
+	archivePrefix = "arXiv",
+	eprint = "0807.2433",
+	editor = "{S.~K.~Chakrabarti \& A.~S.~Majumdar}",
+	month = oct,
+	pages = "63--66",
+	doi = "10.1063/1.3009525",
+	url = "http://adsabs.harvard.edu/abs/2008AIPC.1053...63C",
+	adsnote = "Provided by the SAO/NASA Astrophysics Data System"
+}
+ 
+@InProceedings{ 2008sptz.prop50792C,
+	author = "G. {Canalizo} and M. {Lacy} and M. {Lazarova}",
+	title = "{The nature of low-ionization BAL QSOs}",
+	booktitle = "Spitzer Proposal ID 50792",
+	year = 2008,
+	month = mar,
+	pages = "50792--+",
+	url = "http://adsabs.harvard.edu/abs/2008sptz.prop50792C",
+	adsnote = "Provided by the SAO/NASA Astrophysics Data System"
+}
+ 
+@Article{ 2007ApJ...669..801C,
+	author = "G. {Canalizo} and N. {Bennert} and B. {Jungwiert} and A. {Stockton} and F. {Schweizer} and M. {Lacy} and C. {Peng}",
+	title = "{Spectacular Shells in the Host Galaxy of the QSO MC2 1635+119}",
+	journal = "ApJ",
+	archivePrefix = "arXiv",
+	eprint = "0707.2951",
+	keywords = "Galaxies: Active, Galaxies: Evolution, Galaxies: Interactions, Galaxies: Quasars: General, quasars: individual (MC2 1635+119)",
+	year = 2007,
+	month = nov,
+	volume = 669,
+	pages = "801--809",
+	doi = "10.1086/521721",
+	url = "http://adsabs.harvard.edu/abs/2007ApJ...669..801C",
+	adsnote = "Provided by the SAO/NASA Astrophysics Data System"
+}
+@ARTICLE{Bailey,
+   author = "D. H. Bailey and P. N. Swarztrauber",
+   title = "The fractional {F}ourier transform and applications",
+   journal = "SIAM Rev.",
+   volume = 33,
+   number = 3,
+   pages = "389--404",
+   year = 1991
+   }
+
+@ARTICLE{Bay1,
+   author = "A. Bayliss and C. I. Goldstein and E. Turkel",
+   title = "An iterative method for the {H}elmholtz equation",
+   journal = "J. Comp. Phys.",
+   volume = 49,
+   pages = "443--457",
+   year = 1983
+   }
+
+@TECHREPORT{Ernst,
+   author = "O. Ernst and G. Golub",
+   title = "A domain decomposition approach to solving the {H}elmholtz
+		equation with a radiation boundary condition",
+   number = "NA-92-08",
+   school = "Stanford University, Computer Science Department",
+   year = "August 1992"
+   }
+
+@TECHREPORT{Fujitsu,
+   organization = "Fujitsu",
+   title = "FACOM OS IV SSL II USER'S GUIDE, 99SP0050E5",
+   year = 1990
+   }
+
+@ARTICLE{Gold3,
+   author = "C. I. Goldstein",
+   title = "Multigrid methods for elliptic problems in unbounded domains",
+   journal = "SIAM J. Numer. Anal.",
+   volume = 30,
+   pages = "159--183",
+   year = 1993
+   }
+
+@BOOK{Hale,
+   author = "J. K. Hale",
+   title = "Theory of functional--differential equations",
+   publisher = "Springer--Verlag, Berlin--Heidelberg--New York",
+   year = 1977
+   }
+
+@INBOOK{Swa82,
+   author = "P. N. Swarztrauber",
+   title = "Vectorizing the {FFTs}",
+   editor = "G.~Rodrigue",
+   booktitle = "Parallel Computations",
+   publisher = "Academic Press, New York",
+   year = 1982
+   }
+
+@PHDTHESIS{Ta,
+   author = "S. Ta'asan",
+   title = "Multigrid Methods for Highly Oscillatory Problems",
+   school = "Weizmann Institute of Science, Rehovot, Israel",
+   year = "1984"
+   }
\ No newline at end of file
diff --git a/test/test.rb b/test/test.rb
new file mode 100644
index 0000000..3b7e221
--- /dev/null
+++ b/test/test.rb
@@ -0,0 +1,5 @@
+require 'bibtex'
+require '/Users/ragrawal/personal/Memento/rMemento/export/Bib2OfficeXML2'
+
+bib = BibTeX.open("mybib.bib")
+puts Bib2OfficeXML2.new.export(bib)
\ No newline at end of file
diff --git a/writer/Bib2OfficeXML.rb.deprecated b/writer/Bib2OfficeXML.rb.deprecated
new file mode 100644
index 0000000..6e605ff
--- /dev/null
+++ b/writer/Bib2OfficeXML.rb.deprecated
@@ -0,0 +1,202 @@
+=begin
+ * @author Ritesh Agrawal
+ * @version 2.0
+ * Takes Bibtex output and returns Office2007 XML String
+=end
+
+class Bib2OfficeXML 
+ 
+  attr_reader :doc_types, :generic, :book , :book_section, :report, :misc, :article_in_preiodical, :conference_proceedings, :journal_article
+  
+  def initialize
+    @doc_types = {
+          'article' => {'name' => 'JournalArticle', 'fields' => ['journal_article'] },
+          'book'    => {'name' => 'Book', 'fields' => ['book']},
+          'booklet' => {'name' => 'Book', 'fields' => ['book']}, 
+          'conference'=> {'name' => 'ConferenceProceedings', 'fields' => ['conference_proceedings']},
+          'inbook'  => {'name' => 'BookSection', 'fields' => ['book_section']},
+          'incollection' => {'name' => 'ArticleInAPeriodical', 'fields' => ['article_in_preiodical']},
+          'inproceedings' => {'name' => 'ConferenceProceedings', 'fields' => ['conference_proceedings']},
+          'manual' => {'name' => 'Report', 'fields' => ['report']},
+          'masterthesis' => {'name' => 'Report', 'fields' => ['report']},
+          'misc' => {'name' => 'Misc', 'fields' => ['Misc']},
+          'phdthesis'  => {'name' => 'Report', 'fields' => ['report']},
+          'proceedings' => {'name' => 'ConferenceProceedings', 'fields' => ['conference_proceedings']},
+          'techreport' => {'name' => 'Report', 'fields' => ['report']},
+          'unpublished' => {'name' => 'Misc', 'fields' => ['misc']}
+        }.freeze
+  
+    @generic = {
+              'id' => 'b:Tag', 
+              'title' => 'b:Title', 
+              'year' => 'b:Year'
+            }.freeze
+
+    @book = {
+            'author' => 'b:Author/b:Author/b:NameList',
+            'place' => 'b:CountryRegion',
+            'publisher' => 'b:Publisher',
+            'editor' => 'b:Author/b:Editor/b:NameList',
+            'volume' => 'b:Volume',
+            'issn' => 'b:StandardNumber',
+            'pages' => 'b:Pages'
+        }.freeze
+
+    @book_section = {
+            'author' => 'b:Author/b:Author/b:NameList',
+            'sec_title'=> 'b:BookTitle',
+            'pages' => 'b:Pages',
+            'place' => 'b:CountryRegion',
+            'publisher' => 'b:Publisher',
+            'editor' => 'b:Author/b:Editor/b:NameList',
+            'volume' => 'b:Volume',
+            'issn' => 'b:StandardNumber'
+          }.freeze
+        
+  @journal_article = {
+            'author' => 'b:Author/b:Author/b:NameList',
+            'journal' => 'b:JournalName',
+            'month' => 'b:Month',
+            'day' => 'b:Day',
+            'pages' => 'b:Pages',
+            'editor' => 'b:Author/b:Editor/b:NameList',
+            'publisher' => 'b:Publisher',
+            'volume' => 'b:Volume',
+            'issue' => 'b:Issue',
+            'issn' => 'b:StandardNumber'
+        }.freeze
+
+  @article_in_preiodical = {
+            'author' => 'b:Author/b:Author/b:NameList',
+            'journal' => 'b:PeriodicalTitle',
+            'month' => 'b:Month',
+            'day' => 'b:Day',
+            'pages' => 'b:Pages',
+            'editor' => 'b:Author/b:Editor/b:NameList',
+            'publisher' => 'b:Publisher',
+            'volume' => 'b:Volume',
+            'issue' => 'b:Issue',
+            'issn' => 'b:StandardNumber'
+        }.freeze
+
+  @conference_proceedings = {
+            'author' => 'b:Author/b:Author/b:NameList',
+            'editor' => 'b:Author/b:Editor/b:NameList',
+            'pages' => 'b:Pages',
+            'journal' => 'b:ConferenceName',
+            'place' => 'b:City',
+            'publisher' => 'b:Publisher',
+            'volume' => 'b:Volume',
+            'issn' => 'b:StandardNumber'
+    }.freeze
+    
+  @report = {
+            'author' => 'b:Author/b:Author/b:NameList',
+            'publisher' => 'b:Publisher',
+            'place' => 'b:City',
+            'pages' => 'b:Pages',
+            'doctype' => 'b:ThesisType',
+            'issn' => 'b:StandardNumber'
+        }.freeze
+
+
+  @misc = {
+            'author' => 'b:Author/b:Author/b:NameList',
+            'sec_title' => 'b:PublicationTitle',
+            'year' => 'b:Year',
+            'month' => 'b:Month',
+            'day' => 'b:Day',
+            'place' => 'b:CountryRegion',
+            'publisher' => 'b:Publisher',
+            'editor' => 'b:Author/b:Editor/b:NameList',
+            'pages' => 'b:Pages',
+            'volume' => 'b:Volume',
+            'issue' => 'b:Issue',
+            'issn' => 'b:StandardNumber'
+        }.freeze
+        
+  end
+  
+  
+  # Function: EXPORT
+  # data: Bibtex parsed data - single entity
+  
+	def export(data)
+	
+		sources =  '<?xml version="1.0" encoding="UTF-8" ?>'
+		sources = sources + '<b:Sources SelectedStyle="" xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography">'
+		
+		data.each do |record|
+		  #sanity check
+		  next if record.nil? or record.empty?
+		  
+		  type = @doc_types[record.type.to_s]
+		  next if type.nil? or type.empty?
+		  
+		  source = '<b:SourceType>' + type["name"] + '</b:SourceType>'
+    
+			#Process Generic Fields
+			generic.each do |field, tag|
+				source = source + "<#{tag}>" + record.send(field).to_s + "</#{tag}>"
+			end
+			
+			#Process specific fields
+		  fields = type["fields"]
+		  
+      fields.each do |field_for|
+          send(field_for).each do |field, tag|
+              unless record.respond_to?(field)
+            	  puts "missing #{field}"
+            	  next
+            	end
+            	if(field == "author" and record.author.length > 0 )
+      					source = source + process_authors(record.author)
+      				elsif(field == "editor" )
+      					source = source + process_editors(record.editor)	
+      				else
+      				  source = source + "<#{tag}>" + record.send(field).to_s + "</#{tag}>" 
+      				end
+          end #loop field_for
+      end #loop fields
+      source = source + '</b:Source>'
+			sources = sources + source    
+		  
+		end # loop data
+		sources = sources + '</b:Sources>';
+		return sources;
+		
+	end #function export
+	
+	private 
+	  def process_authors(authors)
+	    return if authors.nil? or authors.empty?
+	    source = '<b:Author><b:Author><b:NameList>'
+	    authors.each do |author|
+	      source = source + author_tag(author)
+	    end
+	    source = source + '</b:NameList></b:Author></b:Author>'
+	    return source
+	  end
+	  
+	  def process_editors(editors)
+	    return if editors.nil? or editors.empty?
+	    source = '<b:Author><b:Editor><b:NameList>'
+	    editors.each do |author|
+	      source = source + author_tag(author)
+	    end
+	    source = source + '</b:NameList></b:Editor></b:Author>'
+	    return source
+	  end
+	  
+	  def author_tag(author)
+	    last, first = author.split(',', 2)
+	    a = '<b:Last>' + last + '</b:Last>'
+	    b = '<b:First>' + first + '</b:First>'
+	    return '<b:Person>' + a + b + '</b:Person>'
+	  end
+	  
+	  
+	
+end 
+		
+	
\ No newline at end of file
diff --git a/writer/OfficeXML.rb b/writer/OfficeXML.rb
new file mode 100644
index 0000000..44949dd
--- /dev/null
+++ b/writer/OfficeXML.rb
@@ -0,0 +1,107 @@
+class OfficeXML < Memento::Writer::AbstractWriter
+  def initialize
+    @FIELD_MAPPING = {
+      :id => 'b:Tag',
+      :day => 'b:Day',
+      :issn => 'b:StandardNumber',
+      :issue => 'b:Issue',
+      :journal => 'b:JournalName',
+      :month => 'b:Month',
+      :pages => 'b:Pages',
+      :pages => 'b:Pages',
+      :place => 'b:City',
+      :publisher => 'b:Publisher',
+      :booktitle => 'b:BookTitle',
+      :title => 'b:Title', 
+      :volume => 'b:Volume',
+      :year => 'b:Year'
+    }.freeze
+    
+    @DOC_TYPES = {
+      :article => {:name => 'JournalArticle', :journal => 'b:JournalName'},
+      :book => {:name =>  'Book', :place => 'b:CountryRegion'},
+      :booklet => {:name =>  'Book', :place => 'b:CountryRegion'},
+      :conference => {:name =>  'ConferenceProceedings'},
+      :inbook => {:name =>  'BookSection', :place => 'b:CountryRegion'},
+      :incollection => {:name =>  'ArticleInAPeriodical', :journal => 'b:PeriodicalTitle'},
+      :inproceedings => {:name =>  'ConferenceProceedings'},
+      :manual => {:name =>  'Report'},
+      :mastersthesis => {:name =>  'Report'},
+      :misc => {:name =>  'Report'},
+      :phdthesis => {:name =>  'Report'},
+      :proceedings => {:name =>  'ConferenceProceedings'},
+      :techreport => {:name =>  'Report'},
+      :unpublished => {:name =>  'Misc', :booktitle => 'b:PublicationTitle', :place => 'b:CountryRegion'}
+    }.freeze
+  end
+  
+  def export(data)
+	
+		sources =  '<?xml version="1.0" encoding="UTF-8" ?>'
+		sources = sources + '<b:Sources SelectedStyle="" xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography">'
+		
+		data.each do |record|
+		  #sanity check
+
+		  next if record.nil? or record.empty?
+		  
+		  source = "<b:Source>"
+		  fields = @FIELD_MAPPING.merge(@DOC_TYPES[record.type])
+		  
+		  #fields that require custom handling
+		  source = source + '<b:SourceType>' + fields[:name] + '</b:SourceType>'
+		  source = source + "<#{fields[:id]}>" + record.id + "</#{fields[:id]}>"
+		 
+		  source = source + process_editors(record.editor) if record.respond_to?("editor")
+		  
+		  
+		  record.fields.each do |key, value|
+		    xml_tag = fields[key]	    
+		    next unless xml_tag
+		    if key == :author
+		       source = source + process_authors(value)
+		    elsif key == :editor
+		       source = source + process_editor(value)
+		    else
+		      source = source + "<#{xml_tag}>" + value + "</#{xml_tag}>"
+		    end
+		  end
+
+		    
+		  source = source + '</b:Source>'
+		  sources = sources + source
+	  end
+		sources = sources + '</b:Sources>';
+		return sources;
+	end #function export
+	
+	private 
+	  def process_authors(authors)
+	    return if authors.nil? or authors.empty?
+	    source = '<b:Author><b:Author><b:NameList>'
+	    authors.each do |author|
+	      source = source + author_tag(author)
+	    end
+	    source = source + '</b:NameList></b:Author></b:Author>'
+	    return source
+	  end
+	  
+	  def process_editors(editors)
+	    return if editors.nil? or editors.empty?
+	    source = '<b:Author><b:Editor><b:NameList>'
+	    editors.each do |author|
+	      source = source + author_tag(author)
+	    end
+	    source = source + '</b:NameList></b:Editor></b:Author>'
+	    return source
+	  end
+	  
+	  def author_tag(author)
+	    last, first = author.split(',', 2)
+	    a = '<b:Last>' + last.to_s + '</b:Last>'
+	    b = '<b:First>' + first.to_s + '</b:First>'
+	    return '<b:Person>' + a + b + '</b:Person>'
+	  end
+  
+  
+end
\ No newline at end of file
diff --git a/writer/Writer.rb b/writer/Writer.rb
new file mode 100644
index 0000000..820d927
--- /dev/null
+++ b/writer/Writer.rb
@@ -0,0 +1,11 @@
+module Memento
+  module Writer
+    
+  	class AbstractWriter
+      def export(data)
+        raise MementoException, "calling abstract method: export"
+      end
+    end
+  	
+  end
+end
diff --git a/writer/WriterManager.rb b/writer/WriterManager.rb
new file mode 100644
index 0000000..cc98ff1
--- /dev/null
+++ b/writer/WriterManager.rb
@@ -0,0 +1,17 @@
+require_relative 'Writer'
+require_relative 'OfficeXML'
+
+module Memento
+  module WriterManager
+      WRITERS =  {
+    		:msofficexml => {'name'=>'MS Office XML', 'parser'=>'OfficeXML'}
+    	}.freeze
+  	
+    	def self.get_writer(format)
+    	  raise MementoException, 'Error: Missing required parameter: format' if format.nil? or format.empty? 
+    	  	info = WRITERS[format.to_s.downcase.strip.to_sym]
+      		raise MementoException, 'Error: unsupported writer type: #{format}' if info.nil? or info.empty?
+      		return Kernel.const_get(info['parser']).new
+    	end
+  end
+end
\ No newline at end of file