Skip to content

Commit

Permalink
Initialized repo and imported files
Browse files Browse the repository at this point in the history
  • Loading branch information
mromanello committed Aug 30, 2010
0 parents commit 5d83f23
Show file tree
Hide file tree
Showing 2 changed files with 227 additions and 0 deletions.
53 changes: 53 additions & 0 deletions biblio_script.sh
@@ -0,0 +1,53 @@
#!/usr/bin/env python
# Author: Matteo Romanello, <matteo.romanello@gmail.com>

import os,sys

# paths to executables
PARSCIT_PATH="/Applications/ParsCit/bin/"
BIBUTILS_PATH="/Applications/bibutils_4.8/"
SAXON_PATH="/56k/Downloads/saxonhe9-2-1-2j/saxon9he.jar"

# paths to resources
XSLT_TRANFORM_PATH="/56k/Documents/Research/training_parscit/xslt/parscit2mods.xsl"

def parscit_to_mods(parscit_out):
saxon_cmd="java -jar %s -xsl:%s -s:%s" %(SAXON_PATH,XSLT_TRANFORM_PATH,parscit_out)
out=os.popen(saxon_cmd).readlines()
print "Transforming Parscit's output into mods xml..."
return out

def mods_to_bibtex(mods_xml):
bibutils_cmd="%sxml2bib %s"%(BIBUTILS_PATH,mods_xml)
out=os.popen(bibutils_cmd).readlines()
return out

if(len(sys.argv)>1):
inp_file=sys.argv[1] #I should check that this file exists
out_dir=sys.argv[2] #I should check that this directory exists
print "Extracting references from the input file..."
parscit_out = os.popen("%sparseRefStrings.pl %s" %(PARSCIT_PATH,inp_file)).readlines()
parscit_xml='%sparscit_temp.xml'%out_dir
file = open(parscit_xml,'w')
for line in parscit_out:
file.write(line)
file.close()

# transform parscit's output into mods 3.x
parscit_mods='%sparscit_mods.xml'%out_dir
file = open(parscit_mods,'w')
for line in parscit_to_mods(parscit_xml):
file.write(line)
file.close()

# transform mods intermediate xml into bibtex
parscit_bibtex='%sparscit.bib'%out_dir
print "Transforming intermediate mods xml into Bibtex..."
file = open(parscit_bibtex,'w')
for line in mods_to_bibtex(parscit_mods):
file.write(line)
file.close()


else:
print"Usage: <inputFile> <outDir>"
174 changes: 174 additions & 0 deletions parscit2mods.xsl
@@ -0,0 +1,174 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0" xmlns:mods="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd"
xmlns:xd="http://www.pnp-software.com/XSLTdoc">
<xd:author>Matteo Romanello</xd:author>
<xsl:output method="xml" indent="yes"/>
<xsl:template match="citationList">
<xsl:element name="modsCollection" namespace="http://www.loc.gov/mods/v3">
<xsl:attribute name="xsi:schemaLocation" namespace="http://www.w3.org/2001/XMLSchema-instance">http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd</xsl:attribute>
<xsl:comment> ### JOURNAL ARTICLES ### </xsl:comment>
<xsl:apply-templates select="citation[journal]"/>
<xsl:comment> ### BOOKS ### </xsl:comment>
<xsl:apply-templates select="citation[title and not(booktitle) and not(pages) and not(journal)]"/>
</xsl:element>
</xsl:template>
<xd:doc> Here is where most of the inetersting stuff happen. </xd:doc>
<xsl:template match="citation">
<xsl:element name="mods" namespace="http://www.loc.gov/mods/v3">
<xsl:attribute name="ID">
<!-- add compatibility check for saxon or not -->
<xsl:value-of select="generate-id()"/>
</xsl:attribute>
<xsl:apply-templates select="title"/>
<!-- heuristic to determine the kind of resource -->
<xsl:choose>
<!-- CASE 1: Paper in a Journal-->
<xsl:when test="./journal">
<xsl:apply-templates select="authors" mode="journal_article"/>
<xsl:element name="relatedItem" namespace="http://www.loc.gov/mods/v3">
<xsl:attribute name="type">host</xsl:attribute>
<xsl:apply-templates select="journal" mode="journal_article"/>
<xsl:element name="originInfo" namespace="http://www.loc.gov/mods/v3">
<xsl:element name="issuance" namespace="http://www.loc.gov/mods/v3">continuing</xsl:element>
</xsl:element>
<xsl:element name="part" namespace="http://www.loc.gov/mods/v3">
<xsl:apply-templates select="*[name()!='authors'][name()!='journal']" mode="journal_article"/>
</xsl:element>
<xsl:element name="genre" namespace="http://www.loc.gov/mods/v3">
<xsl:attribute name="authority">marc</xsl:attribute>
<xsl:text>journal</xsl:text>
</xsl:element>
<xsl:element name="genre" namespace="http://www.loc.gov/mods/v3">
<xsl:text>academic journal</xsl:text>
</xsl:element>
</xsl:element>
</xsl:when>
<!-- CASE 2: Book -->
<xsl:when test=".[title and not(booktitle) and not(pages) and not(journal)]">
<xsl:apply-templates select="authors" mode="book"/>
<xsl:element name="originInfo" namespace="http://www.loc.gov/mods/v3">
<xsl:apply-templates select="location | date | publisher" mode="book"/>
<xsl:element name="issuance" namespace="http://www.loc.gov/mods/v3">monographic</xsl:element>
</xsl:element>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="*[name()!='authors']"/>
</xsl:otherwise>
</xsl:choose>
<!--<xsl:element name="typeOfResource" namespace="http://www.loc.gov/mods/v3">text</xsl:element>-->
<xsl:element name="identifier" namespace="http://www.loc.gov/mods/v3">
<xsl:attribute name="type">citekey</xsl:attribute>
<xsl:value-of select="generate-id()"/>
</xsl:element>
</xsl:element>
</xsl:template>
<xd:doc>Auhtors of a journal article</xd:doc>
<xsl:template match="authors" mode="journal_article">
<xsl:apply-templates select="author">
<xsl:with-param name="mode">journal_article</xsl:with-param>
</xsl:apply-templates>
</xsl:template>
<xd:doc>Auhtors of a book</xd:doc>
<xsl:template match="authors" mode="book">
<xsl:apply-templates select="author">
<xsl:with-param name="mode">book</xsl:with-param>
</xsl:apply-templates>
</xsl:template>
<xd:doc>Handles the creation of name elements in mods format. <xd:param type="string">The current mode.</xd:param>
</xd:doc>
<xsl:template match="author">
<xsl:param name="mode"/>
<xsl:element name="name" namespace="http://www.loc.gov/mods/v3">
<xsl:attribute name="type">personal</xsl:attribute>
<xsl:for-each select="tokenize(.,' ')">
<xsl:element name="namePart" namespace="http://www.loc.gov/mods/v3">
<xsl:choose>
<xsl:when test="string-length(.)=1">
<xsl:attribute name="type">given</xsl:attribute>
</xsl:when>
<xsl:otherwise>
<xsl:attribute name="type">family</xsl:attribute>
</xsl:otherwise>
</xsl:choose>
<xsl:value-of select="."/>
</xsl:element>
</xsl:for-each>
<xsl:element name="role" namespace="http://www.loc.gov/mods/v3">
<xsl:element name="roleTerm" namespace="http://www.loc.gov/mods/v3">
<xsl:attribute name="authority">marcrelator</xsl:attribute>
<xsl:attribute name="type">text</xsl:attribute>
<xsl:choose>
<xsl:when test="$mode='journal_article'">
<xsl:text>author</xsl:text>
</xsl:when>
<xsl:when test="$mode='book'">
<xsl:text>creator</xsl:text>
</xsl:when>
</xsl:choose>
</xsl:element>
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template name="title">
<xsl:element name="titleInfo" namespace="http://www.loc.gov/mods/v3">
<xsl:element name="title" namespace="http://www.loc.gov/mods/v3">
<xsl:value-of select="."/>
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template match="title">
<xsl:call-template name="title"/>
</xsl:template>
<xsl:template match="journal" mode="journal_article">
<xsl:call-template name="title"/>
</xsl:template>
<xsl:template match="pages" mode="journal_article">
<xsl:element name="extent" namespace="http://www.loc.gov/mods/v3">
<xsl:attribute name="unit">page</xsl:attribute>
<xsl:element name="start" namespace="http://www.loc.gov/mods/v3">
<xsl:value-of select="tokenize(.,'--')[1]"/>
</xsl:element>
<xsl:element name="end" namespace="http://www.loc.gov/mods/v3">
<xsl:value-of select="tokenize(.,'--')[2]"/>
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template match="volume" mode="journal_article">
<xsl:element name="detail" namespace="http://www.loc.gov/mods/v3">
<xsl:attribute name="type">volume</xsl:attribute>
<xsl:element name="number" namespace="http://www.loc.gov/mods/v3">
<xsl:value-of select="."/>
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template mode="journal_article" match="date">
<xsl:element name="date" namespace="http://www.loc.gov/mods/v3">
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="location" mode="book">
<xsl:element name="place" namespace="http://www.loc.gov/mods/v3">
<xsl:element name="placeTerm" namespace="http://www.loc.gov/mods/v3">
<xsl:attribute name="type">text</xsl:attribute>
<xsl:value-of select="."/>
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template match="date" mode="book">
<xsl:element name="dateIssued" namespace="http://www.loc.gov/mods/v3">
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="publisher" mode="book">
<xsl:element name="publisher" namespace="http://www.loc.gov/mods/v3">
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="notes" mode="journal_article">
<xsl:comment>
<xsl:value-of select="."/>
</xsl:comment>
</xsl:template>
<xsl:template mode="journal_article" match="location"/>
<xsl:template mode="journal_article" match="title"/>
</xsl:stylesheet>

0 comments on commit 5d83f23

Please sign in to comment.