From 5d83f2360a1162052c4fc35e4c6d4c4c82133d0a Mon Sep 17 00:00:00 2001 From: mromanello Date: Mon, 30 Aug 2010 10:51:12 +0100 Subject: [PATCH] Initialized repo and imported files --- biblio_script.sh | 53 +++++++++++++++ parscit2mods.xsl | 174 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100755 biblio_script.sh create mode 100644 parscit2mods.xsl diff --git a/biblio_script.sh b/biblio_script.sh new file mode 100755 index 0000000..9eeda96 --- /dev/null +++ b/biblio_script.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# Author: Matteo Romanello, + +import os,sys + +# paths to executables +PARSCIT_PATH="/Applications/ParsCit/bin/" +BIBUTILS_PATH="/Applications/bibutils_4.8/" +SAXON_PATH="/56k/Downloads/saxonhe9-2-1-2j/saxon9he.jar" + +# paths to resources +XSLT_TRANFORM_PATH="/56k/Documents/Research/training_parscit/xslt/parscit2mods.xsl" + +def parscit_to_mods(parscit_out): + saxon_cmd="java -jar %s -xsl:%s -s:%s" %(SAXON_PATH,XSLT_TRANFORM_PATH,parscit_out) + out=os.popen(saxon_cmd).readlines() + print "Transforming Parscit's output into mods xml..." + return out + +def mods_to_bibtex(mods_xml): + bibutils_cmd="%sxml2bib %s"%(BIBUTILS_PATH,mods_xml) + out=os.popen(bibutils_cmd).readlines() + return out + +if(len(sys.argv)>1): + inp_file=sys.argv[1] #I should check that this file exists + out_dir=sys.argv[2] #I should check that this directory exists + print "Extracting references from the input file..." + parscit_out = os.popen("%sparseRefStrings.pl %s" %(PARSCIT_PATH,inp_file)).readlines() + parscit_xml='%sparscit_temp.xml'%out_dir + file = open(parscit_xml,'w') + for line in parscit_out: + file.write(line) + file.close() + + # transform parscit's output into mods 3.x + parscit_mods='%sparscit_mods.xml'%out_dir + file = open(parscit_mods,'w') + for line in parscit_to_mods(parscit_xml): + file.write(line) + file.close() + + # transform mods intermediate xml into bibtex + parscit_bibtex='%sparscit.bib'%out_dir + print "Transforming intermediate mods xml into Bibtex..." + file = open(parscit_bibtex,'w') + for line in mods_to_bibtex(parscit_mods): + file.write(line) + file.close() + + +else: + print"Usage: " \ No newline at end of file diff --git a/parscit2mods.xsl b/parscit2mods.xsl new file mode 100644 index 0000000..7101de8 --- /dev/null +++ b/parscit2mods.xsl @@ -0,0 +1,174 @@ + + + Matteo Romanello + + + + http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd + ### JOURNAL ARTICLES ### + + ### BOOKS ### + + + + Here is where most of the inetersting stuff happen. + + + + + + + + + + + + + + host + + + continuing + + + + + + marc + journal + + + academic journal + + + + + + + + + monographic + + + + + + + + + citekey + + + + + Auhtors of a journal article + + + journal_article + + + Auhtors of a book + + + book + + + Handles the creation of name elements in mods format. The current mode. + + + + + personal + + + + + given + + + family + + + + + + + + marcrelator + text + + + author + + + creator + + + + + + + + + + + + + + + + + + + + + + page + + + + + + + + + + + volume + + + + + + + + + + + + + + text + + + + + + + + + + + + + + + + + + + + + +