Skip to content

Commit

Permalink
Merge branch 'master' of github.com:okfn/bibserver
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark MacGillivray committed Jan 19, 2012
2 parents 362987e + e650b3c commit f41d25e
Show file tree
Hide file tree
Showing 14 changed files with 4,268 additions and 31 deletions.
14 changes: 7 additions & 7 deletions bibserver/parser.py
Expand Up @@ -3,6 +3,7 @@
from parsers.BibTexParser import BibTexParser
from parsers.JSONParser import JSONParser
from parsers.CSVParser import CSVParser
from parsers.RISParser import RISParser

class Parser(object):

Expand All @@ -13,17 +14,16 @@ def parse(self, fileobj, format):
:return: a python dict json-i-fiable to bibjson.
'''
if format == "bibtex" or format == "bib":
parser = BibTexParser()
data, metadata = parser.parse(fileobj)
parser = BibTexParser(fileobj)
elif format == "json":
parser = JSONParser()
data, metadata = parser.parse(fileobj)
parser = JSONParser(fileobj)
elif format == "csv" or format == "google":
parser = CSVParser()
data, metadata = parser.parse(fileobj)
parser = CSVParser(fileobj)
elif format == "ris":
parser = RISParser(fileobj)
else:
raise Exception('Unable to convert from format: %s' % format)

data, metadata = parser.parse()
return data, metadata


Expand Down
19 changes: 11 additions & 8 deletions bibserver/parsers/BibTexParser.py
Expand Up @@ -4,6 +4,8 @@
import unicodedata
import re

from bibserver.parsers import BaseParser

'''this file can be called as a module or called directly from the command line like so:
python BibTexParser.py /path/to/file.bib
Expand All @@ -23,9 +25,11 @@
Returns a record dict
'''

class BibTexParser(object):
class BibTexParser(BaseParser):

def __init__(self):
def __init__(self, fileobj):
super(BibTexParser, self).__init__(fileobj)

# set which bibjson schema this parser parses to
self.schema = "v0.82"
self.has_metadata = False
Expand All @@ -46,13 +50,13 @@ def __init__(self):
}
self.identifier_types = ["doi","isbn","issn"]

def parse(self, fileobj):
def parse(self):
'''given a fileobject, parse it for bibtex records,
and pass them to the record parser'''
records = []
record = ""
# read each line, bundle them up until they form an object, then send for parsing
for line in fileobj:
for line in self.fileobj:
if '--BREAK--' in line:
break
else:
Expand Down Expand Up @@ -2663,11 +2667,10 @@ def getnames(self,names):

# in case file is run directly
if __name__ == "__main__":
import sys
parser = BibTexParser()
import sys
try:
fileobj = open(sys.argv[1])
print parser.parse(fileobj)
parser = BibTexParser(open(sys.argv[1]))
print parser.parse()
except:
print parser.parse_record(sys.argv[1])

10 changes: 4 additions & 6 deletions bibserver/parsers/CSVParser.py
@@ -1,13 +1,11 @@
import csv
from bibserver.parsers import BaseParser

class CSVParser(object):
class CSVParser(BaseParser):

def __init__(self):
pass

def parse(self, fileobj):
def parse(self):
#dialect = csv.Sniffer().sniff(fileobj.read(1024))
d = csv.DictReader(fileobj)
d = csv.DictReader(self.fileobj)
data = []

# do any required conversions
Expand Down
10 changes: 4 additions & 6 deletions bibserver/parsers/JSONParser.py
@@ -1,12 +1,10 @@
import json
from bibserver.parsers import BaseParser

class JSONParser(object):
class JSONParser(BaseParser):

def __init__(self):
pass

def parse(self, fileobj):
incoming = json.load(fileobj)
def parse(self):
incoming = json.load(self.fileobj)

if 'records' in incoming:
# if the incoming is bibjson, get records and metadata
Expand Down
109 changes: 109 additions & 0 deletions bibserver/parsers/RISParser.py
@@ -0,0 +1,109 @@
'''this file can be called as a module or called directly from the command line like so:
python RISParser.py /path/to/file.txt
Returns a list of record dicts
Details of the RIS format
http://en.wikipedia.org/wiki/RIS_%28file_format%29
'''

FIELD_MAP = {
"DO": "doi",
"SP": "pages",
"M2": "start page",
"DB": "name of database",
"DA": "date",
"M1": "number",
"M3": "type",
"N1": "notes",
"ST": "short title",
"DP": "database provider",
"CN": "call number",
"IS": "number",
"LB": "label",
"TA": "translated author",
"TY": "type ",
"UR": "url",
"TT": "translated title",
"PY": "year",
"PB": "publisher",
"A3": "tertiary author",
"C8": "custom 8",
"A4": "subsidiary author",
"TI": "title",
"C3": "custom 3",
"C2": "pmcid",
"C1": "note",
"C7": "custom 7",
"C6": "nihmsid",
"C5": "custom 5",
"C4": "custom 4",
"AB": "note",
"AD": "institution",
"VL": "volume",
"CA": "caption",
"T2": "secondary title",
"T3": "tertiary title",
"AN": "accession number",
"L4": "figure",
"NV": "number of volumes",
"AU": "author",
"RP": "reprint edition",
"L1": "file attachments",
"ET": "epub date",
"A2": "author",
"RN": "notes",
"LA": "language",
"CY": "place published",
"J2": "alternate title",
"RI": "reviewed item",
"KW": "keywords",
"SN": "issn",
"Y2": "access date",
"SE": "section",
"OP": "original publication"
}

VALUE_MAP = {
'AU' : lambda v: [{u'name':vv.decode('utf8')} for vv in v]
}
DEFAULT_VALUE_FUNC = lambda v: u' '.join(vv.decode('utf8') for vv in v)

from bibserver.parsers import BaseParser

class RISParser(BaseParser):
def __init__(self, fileobj):
super(RISParser, self).__init__(fileobj)
self.data = []

def add_chunk(self, chunk):
if not chunk: return
tmp = {}
for k,v in chunk.items():
tmp[FIELD_MAP.get(k, k)] = VALUE_MAP.get(k, DEFAULT_VALUE_FUNC)(v)
self.data.append(tmp)

def parse(self):
data, chunk = [], {}
for line in self.fileobj:
line = line.strip()
if not line: continue
parts = line.split(' - ')
if len(parts) < 2: continue
field = parts[0]
if field == 'TY':
self.add_chunk(chunk)
chunk = {}
value = ' - '.join(parts[1:])
if value:
chunk.setdefault(field, []).append(value)
self.add_chunk(chunk)
return self.data, {}

# in case file is run directly
if __name__ == "__main__":
import sys, json
fileobj = open(sys.argv[1])
parser = RISParser(fileobj)
data, metadata = parser.parse()
sys.stdout.write(json.dumps(data, indent=2))
8 changes: 8 additions & 0 deletions bibserver/parsers/__init__.py
@@ -0,0 +1,8 @@
class BaseParser(object):
def __init__(self, fileobj):
if hasattr(fileobj, 'seek'):
# Some files have Byte-order marks inserted at the start
possible_BOM = fileobj.read(3)
if possible_BOM != '\xef\xbb\xbf':
fileobj.seek(0)
self.fileobj = fileobj
153 changes: 153 additions & 0 deletions doc/Makefile
@@ -0,0 +1,153 @@
# Makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build

# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .

.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext

help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"

clean:
-rm -rf $(BUILDDIR)/*

html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."

singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."

pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."

json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."

htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."

qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BibServer.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BibServer.qhc"

devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/BibServer"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BibServer"
@echo "# devhelp"

epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."

latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."

latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."

man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."

texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."

info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."

gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."

changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."

linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."

doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."

0 comments on commit f41d25e

Please sign in to comment.