In [1]:
import pandas as pd
import shorthand as shnd

s = shnd.Shorthand(
    entry_syntax="shorthand/resources/default_entry_syntax.csv",
    link_syntax="shorthand/resources/default_link_syntax.csv",
    syntax_case_sensitive=False
)
'''
parsed = s.parse_text(
    'shorthand/test_data/manual_annotation.shnd',
    skiprows=2,
    comment_char='#'
)
'''

parsed = s.parse_text(
    'shorthand/test_data/single_column.shnd',
    item_separator='__',
    default_entry_prefix='wrk',
    space_char='|',
    na_string_values='!',
    na_node_type='missing',
    skiprows=0,
    comment_char='#',
    drop_na=False
)

links = parsed.links
links.iloc[60:]
'''
print('       strings', parsed.strings.memory_usage(deep=True).sum()/1000, 'kb')
print('         links', parsed.links.memory_usage(deep=True).sum()/1000, 'kb')
print('resolved links', parsed.resolve_links().memory_usage(deep=True).sum()/1000, 'kb')
'''
entry_type = parsed.id_lookup('link_types', 'entry')
entry_string_ids = parsed.links.loc[parsed.links['link_type_id'] == entry_type, 'tgt_string_id']

parsed.resolve_links().query('src_string.str.contains("nasa")').query('src_node_type != "shorthand_text"').query('tgt_node_type != "shorthand_text"')

parsed.resolve_links().query('link_type == "cited"').merge(parsed.links, left_index=True, right_index=True)

s = parsed.synthesize_shorthand_entries('wrk', fill_spaces=True)

check = pd.Series([
    'asmith_bwu__1999__s_bams__101__803__xxx',
    'asmith_bwu__1998__s_bams__100__42__yyy',
    'bjones__1975__s_jats__90__1__!',
    'bwu__1989__t_long|title__!__80__!',
    'Some|Author__1989__t_A|Title|With|\\#__!__!__!',
    'asmith_bwu__2008__s_bams__110__1__zzz'
])

(check == s).all()

True

In [3]:
import pandas as pd
import shorthand as shnd
from bibtexparser.bparser import BibTexParser as _BibTexParser

bibtex_parser = _BibTexParser(common_strings=True)
with open("shorthand/test_data/bibtex_test_data_short.bib", encoding='utf8') as f:
    bibdatabase = bibtex_parser.parse_file(f)

data = pd.DataFrame(bibdatabase.entries)

s = shnd.Shorthand(
    entry_syntax="shorthand/resources/default_bibtex_syntax.csv",
    syntax_case_sensitive=False
)

parsed = s.parse_items(
    data.iloc[:4],
    space_char='|',
    na_string_values='!',
    na_node_type='missing'
)


Unnamed: 0,file,pages,note,year,month,author,journal,urldate,number,language,...,copyright,volume,title,ENTRYTYPE,ID,abstract,editor,publisher,booktitle,address
0,Newkirk_Eddy_1962_Daytime Sky Radiance from Fo...,638--641,Number: 4829\nPublisher: Nature Publishing Group,1962,May,"Newkirk, Gordon A. and Eddy, John A.",Nature,2020-08-19,4829.0,en,...,1962 Nature Publishing Group,194.0,Daytime {Sky} {Radiance} from {Forty} to {Eigh...,article,newkirk_daytime_1962,,,,,
1,Wiin-Nielsen_1962_ON TRANSFORMATION OF KINETIC...,311--323,Publisher: American Meteorological Society,1962,August,"Wiin-Nielsen, A.",Monthly Weather Review,2020-08-19,8.0,en,...,,90.0,{ON} {TRANSFORMATION} {OF} {KINETIC} {ENERGY} ...,article,wiin-nielsen_transformation_1962,,,,,
2,Wiin-Nielsen_1962_On truncation errors due to ...,261--280,Publisher: Taylor \& Francis\n\_eprint: https:...,1962,January,"Wiin-Nielsen, A.",Tellus,2020-08-19,3.0,,...,,14.0,On truncation errors due to vertical differenc...,article,wiin-nielsen_truncation_1962,Some estimates of truncation errors due to ver...,,,,
3,Lally_1962_Meteorological Measurements—The Gen...,451--453,Publisher: American Meteorological Society,1962,September,"Lally, Vincent E.",Bulletin of the American Meteorological Society,2020-08-19,9.0,en,...,,43.0,Meteorological {Measurements}—{The} {Gentle} {...,article,lally_meteorological_1962,,,,,
4,Squires_Turner_1962_An entraining jet model fo...,422--434,Publisher: Taylor \& Francis\n\_eprint: https:...,1962,January,"Squires, P. and Turner, J. S.",Tellus,2020-08-19,4.0,,...,,14.0,An entraining jet model for cumulo-nimbus updr...,article,squires_entraining_1962,A model of a cumulo-nimbus updraught is presen...,,,,
5,,64--77,,1962,July,"London, Julius","Archiv für Meteorologie, Geophysik und Bioklim...",2020-08-19,1.0,en,...,,12.0,The use of satellite observations for atmosphe...,article,london_use_1962,The problem of satellite observation of infrar...,,,,
6,,144--166,,1962,October,"Haurwitz, B.","Archiv für Meteorologie, Geophysik und Bioklim...",2020-08-19,2.0,en,...,,13.0,Wind and pressure oscillations in the upper at...,article,haurwitz_wind_1962,Observations of the diurnal (S1) and semidiurn...,,,,
7,Akasofu et al_1963_The main phase of great mag...,3345--3350,\_eprint: https://onlinelibrary.wiley.com/doi/...,1963,,"Akasofu, S.-I. and Chapman, S. and Venkatesan, B.",Journal of Geophysical Research,2020-08-19,11.0,en,...,,68.0,The main phase of great magnetic storms,article,akasofu_main_1963,It is shown that the main phase of great magne...,,,,
8,Akasofu_Chapman_1963_The enhancement of the eq...,2375--2382,\_eprint: https://onlinelibrary.wiley.com/doi/...,1963,,"Akasofu, Syun-Ichi and Chapman, Sydney",Journal of Geophysical Research,2020-08-19,9.0,en,...,,68.0,The enhancement of the equatorial electrojet d...,article,akasofu_enhancement_1963,It is shown that the growth of auroral electro...,,,,
9,Latham_Mason_1961_Generation of Electric Charg...,537--549,Publisher: The Royal Society,1961,,"Latham, J. and Mason, B. J.",Proceedings of the Royal Society of London. A....,2021-03-09,1303.0,,...,,260.0,Generation of {Electric} {Charge} {Associated}...,article,latham_generation_1961,The electrical charging which results from col...,,,,


In [16]:
from bibtexparser.bibdatabase import BibDatabase as _bibtex_db
from bibtexparser import dumps as _dump_bibtex_string

def btwriter(entry_series):
    db = _bibtex_db()
    db.entries = [dict(entry_series.dropna().map(str))]
    return _dump_bibtex_string(db)

print(data.apply(btwriter, axis=1)[9])

@article{latham_generation_1961,
 abstract = {The electrical charging which results from collisions between ice crystals and a simulated hailstone is measured as a function of their temperature difference, and of the size and impact velocity of the crystals. It is found that the sign of the charging is governed by that of the temperature difference, the hailstone becoming negatively charged if it is warmer than the rebounding crystals. The magnitude of the charging is proportional to the temperature difference but rather insensitive to the size and impact velocity of the crystals. With a temperature difference of 5 ⚬C, a rebounding crystal of diameter about 50 μ produces, on average, a charge of 5 × 10-9 e.s.u. The electrification of an artificial pellet of soft hail growing by the accretion of supercooled water droplets (riming) is also investigated. Freezing of the droplets on the hailstone is accompanied by the ejection of positively charged ice splinters, the hailstone acquiring a 