Permalink
Browse files

Adding bibtex file iterator.

  • Loading branch information...
odarbelaeze committed Mar 18, 2016
1 parent 996a7df commit 065a4a79816714a14695db661d6915ac18c44183
Showing with 47 additions and 1 deletion.
  1. +8 −0 data/bib/oaa.bib
  2. +23 −1 lsa/record.py
  3. +7 −0 lsa/scripts/populate.py
  4. +9 −0 test/test_scripts.py
View
@@ -28,3 +28,11 @@ @inproceedings{Duque2015j
title = {{Evaluaci\'{o}n de Accesibilidad de Recursos Educativos Digitales Multimedia}},
year = {2015}
}
@inproceedings{Duque2015j,
author = {Duque, N\'{e}stor and Ospina, Alejandra and Londo\~{n}o, Lu\'{\i}s Felipe and Tabares, Valentina},
booktitle = {Congreso Internacional en Ambientes Virtuales de Aprendizajes Accesibles y Adaptativos - CAVA 2015},
file = {:F$\backslash$:/MENDELEY/TodosMendeley/Duque et al. - 2015 - Evaluaci\'{o}n de Accesibilidad de Recursos Educativos Digitales Multimedia.pdf:pdf},
keywords = {accesibilidad,accessibility,automatic,evaluaci\'{o}n autom\'{a}tica,evaluaci\'{o}n manual,evaluation,keyworks,learning object,manual evaluation,material multimedia,multimedia,objetos de aprendizaje},
title = {{Evaluaci\'{o}n de Accesibilidad de Recursos Educativos Digitales Multimedia}},
year = {2015}
}
View
@@ -1,5 +1,6 @@
import hashlib
import functools
import hashlib
import json
import operator
import re
@@ -127,6 +128,11 @@ def _clear_keywords(self, raw):
line = raw.get(self.get_mapping('keywords'), '')
return re.split(r'[,; ]+', line)
def _clear_uuid(self, raw):
sha = hashlib.sha1()
sha.update(json.dumps(raw).encode('utf-8'))
return sha.hexdigest()
def get_mapping(self, field):
return self.mappings.get(field, field)
@@ -184,3 +190,19 @@ def __iter__(self):
if line[:2] == 'ER':
yield parser.parse('\n'.join(buff))
buff = []
class BibtexRecordIterator(RecordIterator):
'''
Iterates over bibtex reccords
'''
parser_class = BibtexRecordParser
def __iter__(self):
with open(self.filename, 'r') as bibtex:
database = bibtexparser.load(bibtex)
parser = self.parser_class()
for entry in database.entries:
yield parser.parse(entry)
View
@@ -7,6 +7,7 @@
from lsa.record import FroacRecordIterator
from lsa.record import IsiRecordIterator
from lsa.record import BibtexRecordIterator
from .dbutil import collection_name
from .dbutil import collection
@@ -17,6 +18,8 @@ def recordset_class(name):
return IsiRecordIterator
elif name == 'xml':
return FroacRecordIterator
elif name == 'bib':
return BibtexRecordIterator
raise NotImplementedError('{} parser is not implemented yet'.format(name))
@@ -26,6 +29,8 @@ def recordset_class(name):
help='Use the xml parser (default)')
@click.option('--isi', 'kind', flag_value='isi',
help='Use the isi plain text parser (default xml)')
@click.option('--bib', 'kind', flag_value='bib',
help='Use the bibtex parser (default xml)')
@click.option('--wipedb/--no-wipedb', default=True,
help='Wipe existing database.')
@click.option('--dbname', default='program',
@@ -63,9 +68,11 @@ def lsapopulate(pattern, kind, wipedb, dbname, verbose):
click.echo('I\'m processing file {}...'.format(filename))
rs = rs_class(filename)
for record in rs:
print('here')
try:
records.insert_one(record)
except DuplicateKeyError:
print('duplicate')
continue
click.echo('And... I\'m done')
View
@@ -18,6 +18,15 @@ def test_basic_usage(runner):
assert 'Usage' in res.output
def test_populate_bibtex(runner):
res = runner.invoke(
lsapopulate,
['--bib', '--verbose', 'data/bib/*.bib']
)
assert res.exit_code == 0
assert 'The database contains 3 records' in res.output
def test_basic_usage_model(runner):
res = runner.invoke(lsamodel, ['--help'])
assert res.exit_code == 0

0 comments on commit 065a4a7

Please sign in to comment.