In [None]:
import bibliograph as bg

bibtex_fname = 'bibliograph/test_data/bibtex_test_data_short.bib'
entry_syntax_fname = "bibliograph/resources/default_bibtex_syntax.csv"

tn = bg.slurp_bibtex(
    bibtex_fname,
    entry_syntax_fname,
    syntax_case_sensitive=True,
    allow_redundant_items=False,
    aliases_dict=None,
    aliases_case_sensitive=True,
    space_char='|',
    na_string_values='!',
    na_node_type='missing'
)
tn.resolve_assertions()

In [29]:
!pytest bibliograph/tests.py

platform win32 -- Python 3.9.13, pytest-7.1.2, pluggy-1.0.0
rootdir: c:\Users\short\Dropbox\jd\60-69 projects\61 public\61.02 bibliograph
plugins: anyio-3.4.0
collected 28 items

bibliograph\tests.py ............................                        [100%]



In [None]:
    import bibliograph as bg
    import pandas as pd

    s = bg.Shorthand(
        entry_syntax="bibliograph/resources/default_entry_syntax.csv",
        link_syntax="bibliograph/resources/default_link_syntax.csv",
        syntax_case_sensitive=False
    )

    parsed = s.parse_text(
        'bibliograph/test_data/manual_annotation.shnd',
        item_separator='__',
        default_entry_prefix='wrk',
        space_char='|',
        na_string_values=['!', 'x'],
        na_node_type='missing',
        skiprows=2,
        comment_char='#'
    )

    synthesized = parsed.synthesize_shorthand_entries('wrk', fill_spaces=True)

    check = pd.Series([
        'asmith_bwu__1999__s_bams__101__803__xxx',
        'asmith_bwu__1998__s_bams__100__42__yyy',
        'bjones__1975__s_jats__90__1__!',
        'bwu__1989__t_long|title__x__80__!',
        'Some|Author__1989__t_Title|With|\\#__x__x__!',
        'asmith_bwu__2008__s_bams__110__1__zzz'
    ])

    synthesized == check

In [None]:
import bibliograph as bg
import pandas as pd

aliases_dict = {
    'actor': 'bibliograph/test_data/aliases_actor.csv',
    'work': 'bibliograph/test_data/aliases_work.csv'
}

constraints_fname = "bibliograph/resources/default_link_constraints.csv"

tn = bg.slurp_shorthand(
    'bibliograph/test_data/shorthand_for_auto_aliasing.shnd',
    "bibliograph/resources/default_entry_syntax.csv",
    "bibliograph/resources/default_link_syntax.csv",
    syntax_case_sensitive=False,
    aliases_dict=aliases_dict,
    aliases_case_sensitive=False,
    automatic_aliasing=True,
    link_constraints_fname=constraints_fname,
    links_excluded_from_edges=['alias', 'title', 'supertitle'],
    item_separator='__',
    space_char='|',
    na_string_values='!',
    na_node_type='missing',
    default_entry_prefix='wrk',
    comment_char='#',
)

output_link_types = ['cited', 'acknowledged']
edge_subset = None
include_prefixes=False
include_references=False
string_type='abbr'

link_type_ids = [
    tn.id_lookup('link_types', t) for t in output_link_types
    if t in tn.link_types['link_type'].array
]

edge_subset = tn.edges

if edge_subset is None:
    edge_subset = tn.edges.loc[edge_subset].query(
        'link_type_id.isin(@link_type_ids)'
    )
else:
    edge_subset = tn.edges.query('link_type_id.isin(@link_type_ids)')

def get_assertions_by_string_id(tn, string_ids, assertion_component):
    
    if not bg.util.iterable_note_string(assertion_component):
        assertion_component = [assertion_component]

    if assertion_component == ['all']:
        src = True
        tgt = True
        ref = True

    selection = pd.Series(False, index=tn.assertions.index)

    if ('src' in assertion_component) or src:
        selection = selection | tn.assertions['src_string_id'].isin(string_ids)
        
    if ('tgt' in assertion_component) or tgt:
        selection = selection | tn.assertions['tgt_string_id'].isin(string_ids)
        
    if ('ref' in assertion_component) or ref:
        selection = selection | tn.assertions['ref_string_id'].isin(string_ids)

def get_metadata_by_string_id(tn, string_ids, assertion_component):
    
    metadata_node_types = tn.node_types.query('has_metadata').index
    nodes = tn.strings.loc[string_ids, 'node_id']
    nodes = tn.nodes.loc[nodes].query(
        'node_type_id.isin(@metadata_node_types)'
    )

    metadata_table_names = tn.get_node_types_by_node_id(nodes.index)
    metadata_table_names = metadata_table_names.reset_index(name='name')
    
    node_id_groups = {
        n: metadata_table_names.query('name == @n')['index'].array
        for n in metadata_table_names['name'].unique()
    }

    metadata = {
        k: tn.__getattr__(k).query('node_id.isin(@v)')
        for k, v in node_id_groups.items()
    }

    return metadata

if include_prefixes:

    tgt_inp_string_ids = get_assertions_by_string_id(
        tn,
        edge_subset['tgt_string_id'],
        assertion_component='tgt'
    )
    tgt_inp_string_ids = tgt_inp_string_ids['inp_string_id']



    if include_references:
        pass

edge_subset = tn.resolve_edges().loc[edge_subset.index]
output = edge_subset.groupby('src_string').apply(
    lambda x: '\n'.join(['    , {}'.format(s) for s in x['tgt_string']])
)
output = output.reset_index().apply(lambda x: '{},\n{}'.format(x['src_string'], x[0]), axis='columns')
output = '\n'.join(output)
print(output)
get_metadata_by_string_id(tn, tn.strings.index, 'all')