In [None]:
!pytest bibliograph/tests.py

In [None]:
from crossref.restful import Works, Etiquette
my_etiquette = Etiquette(
    'bibliograph',
    '0.0.1',
    'https://github.com/shortorian/bibliograph',
    'short.devin@gmail.com'
)

works = Works(etiquette=my_etiquette)
result = works.sample(5).select('DOI')
result.do_http_request('get', result.url, only_headers=True, custom_header=result.custom_header)

In [None]:
import bibliograph as bg

tn = bg.slurp_bibtex(
    'bibliograph/test_data/NCAR_papers.bib',
    entry_syntax_fname="bibliograph/resources/default_bibtex_syntax.csv",
    syntax_case_sensitive=False,
    allow_redundant_items=True,
    automatic_aliasing=True,
    link_constraints_fname='bibliograph/resources/default_link_constraints.csv',
    space_char='|',
    na_string_values='!',
    na_node_type='missing',
)

tn.strings

In [3]:
import pandas as pd
from bibliograph.alias_generators import western_surname_alias_generator_serial
from bibliograph.alias_generators import western_surname_alias_generator_vector
from bibtexparser import dumps as _dump_bibtex_string
from bibtexparser.bibdatabase import BibDatabase as _bibtex_db
from bibtexparser.bparser import BibTexParser as _bibtexparser

bibtex_parser = _bibtexparser(common_strings=True)

bibtex_fname = 'bibliograph/test_data/NCAR_papers.bib'

with open(bibtex_fname, encoding='utf8') as f:
    data = pd.DataFrame(bibtex_parser.parse_file(f).entries)

names = data.author.str.split(' and ').explode()
names = names.str.casefold().drop_duplicates().reset_index(drop=True)
western_surname_alias_generator_vector(names)

0        thompsonpd
1       schneidersh
2       dickinsonre
3            firorj
4         kasaharaa
           ...     
4595         stowgd
4596          oyera
4597      friedmani
4598         tylers
4599      robbinsde
Length: 4600, dtype: object

In [None]:
has_sheeley = names.str.contains('sheeley')
has_newkirk = names.str.contains('newkirk')
has_bruner = names.str.contains('bruner')
names.loc[has_sheeley|has_newkirk|has_bruner].array

In [None]:
import pandas as pd
from bibliograph.alias_generators import western_surname_alias_generator_serial
from bibliograph.alias_generators import western_surname_alias_generator_vector


names = pd.Series([
    'Loon, H. van',
    'van Loon, h.',
    'van Loon, Harry',
    'VAN LOON, H',
    'Van loon, ',
    'some other person',
    'Rodríguez-Silva, Ileana',
    'nasa',
    'Martin Luther King, jr.',
    'King, Martin Luther jr.',
    'King, Jr., Martin Luther',
    'Mr. Martin Luther King, jr.',
    'St. Whatever, Given Name',
    'Whatever, Given Name St.',
    'University of Washington, Seattle',
    'University of Chicago',
    'Ms. Gerould, Joanne',
    'Gerould, Ms. Joanne',
    'Gerould, Joanne, Ms.',
    'Joanne Gerould, Ms.',
    'Surname, Compound Given-Name',
    'Monde, Alice le',
    'le Monde, Alice',
    'Sheeley, Jr., N. R.',
    'Newkirk, Jr., G.',
    'Newkirk, Jr., Gordon',
    'Bruner, Jr., E. C.'
])

serial = names.map(western_surname_alias_generator_serial)
vector = western_surname_alias_generator_vector(names)
((serial == vector) | (serial.isna() & vector.isna())).all()
pd.DataFrame({'in':names, 'out':vector})

In [2]:
import pandas as pd
from bibliograph.alias_generators import western_surname_alias_generator_serial
from bibliograph.alias_generators import western_surname_alias_generator_vector

names = pd.Series([
    'newkirk, gordon a.',
    'newkirk, gordon',
    'bruner, e. c.',
    'newkirk, g. a.',
    'sheeley, jr., n. r.',
    'newkirk, jr., g.',
    'sheeley, n. r.',
    'newkirk, g.',
    'newkirk, jr., gordon',
    'newkirk, jr., g. a.',
    'bruner, jr., e. c.'
])

serial = names.map(western_surname_alias_generator_serial)
vector = western_surname_alias_generator_vector(names)
((serial == vector) | (serial.isna() & vector.isna())).all()
vector.sort_values()

2        brunerec
10     brunerjrec
1        newkirkg
7        newkirkg
0       newkirkga
3       newkirkga
5      newkirkjrg
8      newkirkjrg
9     newkirkjrga
4     sheeleyjrnr
6       sheeleynr
dtype: object