diff --git a/texmf/tex/latex/tagpdf/tagpdf-base.sty b/texmf/tex/latex/tagpdf/tagpdf-base.sty new file mode 100644 index 000000000..a268313ab --- /dev/null +++ b/texmf/tex/latex/tagpdf/tagpdf-base.sty @@ -0,0 +1,112 @@ +%% +%% This is file `tagpdf-base.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% tagpdf.dtx (with options: `base') +%% tagpdf-mc-generic.dtx (with options: `base') +%% tagpdf-mc-shared.dtx (with options: `base') +%% tagpdf-struct.dtx (with options: `base') +%% tagpdf-checks.dtx (with options: `base') +%% tagpdf-user.dtx (with options: `base') +%% +%% Copyright (C) 2019-2022 Ulrike Fischer +%% +%% It may be distributed and/or modified under the conditions of +%% the LaTeX Project Public License (LPPL), either version 1.3c of +%% this license or (at your option) any later version. The latest +%% version of this license is in the file: +%% +%% https://www.latex-project.org/lppl.txt +%% +%% This file is part of the "tagpdf bundle" (The Work in LPPL) +%% and all files in that bundle must be distributed together. +%% +%% File: tagpdf.dtx +\ProvidesExplPackage {tagpdf-base} {2022-08-24} {0.97} + {part of tagpdf - provide base, no-op versions of the user commands } +\AddToHook{begindocument} + { + \str_case:VnF \c_sys_backend_str + { + { luatex } { \cs_new_protected:Npn \__tag_whatsits: {} } + { dvisvgm } { \cs_new_protected:Npn \__tag_whatsits: {} } + } + { + \cs_new_protected:Npn \__tag_whatsits: {\tex_special:D {} } + } + } + +\cs_new_protected:Npn \tag_stop:{} +\cs_new_protected:Npn \tag_start:{} +\cs_new_protected:Npn \tag_stop:n{} +\cs_new_protected:Npn \tag_start:n{} + +%% File: tagpdf-mc-generic.dtx +\cs_new_protected:Npn \tag_mc_begin:n #1 { \__tag_whatsits: } +\cs_new_protected:Nn \tag_mc_end:{ \__tag_whatsits: } + +%% File: tagpdf-mc-shared.dtx + +\cs_new_protected:Npn \tag_mc_use:n #1 { \__tag_whatsits: } +\cs_new_protected:Npn \tag_mc_artifact_group_begin:n #1 {} +\cs_new_protected:Npn \tag_mc_artifact_group_end:{} +\cs_new_protected:Npn \tag_mc_end_push: {} +\cs_new_protected:Npn \tag_mc_begin_pop:n #1 {} +%% File: tagpdf-struct.dtx +\newcounter { g__tag_struct_abs_int } +\int_gzero:N \c@g__tag_struct_abs_int +\tl_new:N \g__tag_struct_stack_current_tl +\tl_gset:Nn \g__tag_struct_stack_current_tl {\int_use:N\c@g__tag_struct_abs_int} +\cs_new:Npn \__tag_get_data_struct_num: + { + \g__tag_struct_stack_current_tl + } +\cs_new_protected:Npn \tag_struct_begin:n #1 {\int_gincr:N \c@g__tag_struct_abs_int} +\cs_new_protected:Npn \tag_struct_end:{} +\cs_new_protected:Npn \tag_struct_use:n #1 {} + +%% File: tagpdf-checks.dtx +\cs_new:Npn \tag_get:n #1 { \use:c {__tag_get_data_#1: } } +\prg_new_conditional:Npnn \tag_if_active: { p , T , TF, F } + { \prg_return_false: } +%% File: tagpdf-user.dtx + + +\NewDocumentCommand \tagpdfsetup { m }{} + +\NewDocumentCommand \tagmcbegin { m } + { + \tag_mc_begin:n {#1} + } + +\NewDocumentCommand \tagmcend { } + { + \tag_mc_end: + } + +\NewDocumentCommand \tagmcuse { m } + { + \tag_mc_use:n {#1} + } + +\NewDocumentCommand \tagstructbegin { m } + { + \tag_struct_begin:n {#1} + } + +\NewDocumentCommand \tagstructend { } + { + \tag_struct_end: + } + +\NewDocumentCommand \tagstructuse { m } + { + \tag_struct_use:n {#1} + } +\newcommand\tagpdfparaOn {} +\newcommand\tagpdfparaOff{} +%% +%% +%% End of file `tagpdf-base.sty'. diff --git a/texmf/tex/latex/tagpdf/tagpdf-debug-generic.sty b/texmf/tex/latex/tagpdf/tagpdf-debug-generic.sty new file mode 100644 index 000000000..185bb750b --- /dev/null +++ b/texmf/tex/latex/tagpdf/tagpdf-debug-generic.sty @@ -0,0 +1,79 @@ +%% +%% This is file `tagpdf-debug-generic.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% tagpdf-mc-generic.dtx (with options: `debug') +%% +%% Copyright (C) 2019-2022 Ulrike Fischer +%% +%% It may be distributed and/or modified under the conditions of +%% the LaTeX Project Public License (LPPL), either version 1.3c of +%% this license or (at your option) any later version. The latest +%% version of this license is in the file: +%% +%% https://www.latex-project.org/lppl.txt +%% +%% This file is part of the "tagpdf bundle" (The Work in LPPL) +%% and all files in that bundle must be distributed together. +%% +%% File: tagpdf-mc-generic.dtx +\ProvidesExplPackage {tagpdf-debug-generic} {2022-08-24} {0.97} + {part of tagpdf - debugging code related to marking chunks - generic mode} +\cs_set_protected:Npn \tag_mc_begin:n #1 %#1 keyval + { + \__tag_check_if_active_mc:TF + { + \__tag_debug_mc_begin_insert:n { #1 } + \group_begin: %hm + \__tag_check_mc_if_nested: + \bool_gset_true:N \g__tag_in_mc_bool + \keys_set:nn { __tag / mc } {#1} + \bool_if:NTF \l__tag_mc_artifact_bool + { %handle artifact + \__tag_mc_handle_artifact:N \l__tag_mc_artifact_type_tl + \exp_args:NV + \__tag_mc_artifact_begin_marks:n \l__tag_mc_artifact_type_tl + } + { %handle mcid type + \__tag_check_mc_tag:N \l__tag_mc_key_tag_tl + \__tag_mc_handle_mcid:VV + \l__tag_mc_key_tag_tl + \l__tag_mc_key_properties_tl + \__tag_mc_begin_marks:oo{\l__tag_mc_key_tag_tl}{\l__tag_mc_key_label_tl} + \tl_if_empty:NF {\l__tag_mc_key_label_tl} + { + \exp_args:NV + \__tag_mc_handle_mc_label:n \l__tag_mc_key_label_tl + } + \bool_if:NF \l__tag_mc_key_stash_bool + { + \__tag_mc_handle_stash:x { \int_use:N \c@g__tag_MCID_abs_int } + } + } + \group_end: + } + { + \__tag_debug_mc_begin_ignore:n { #1 } + } + } +\cs_set_protected:Nn \tag_mc_end: + { + \__tag_check_if_active_mc:TF + { + \__tag_debug_mc_end_insert: + \__tag_check_mc_if_open: + \bool_gset_false:N \g__tag_in_mc_bool + \tl_gset:Nn \g__tag_mc_key_tag_tl { } + \__tag_mc_emc: + \__tag_mc_end_marks: + } + { + \__tag_debug_mc_end_ignore: + } + } + +%% +%% +%% End of file `tagpdf-debug-generic.sty'. diff --git a/texmf/tex/latex/tagpdf/tagpdf-debug-lua.sty b/texmf/tex/latex/tagpdf/tagpdf-debug-lua.sty new file mode 100644 index 000000000..262e2bdca --- /dev/null +++ b/texmf/tex/latex/tagpdf/tagpdf-debug-lua.sty @@ -0,0 +1,24 @@ +%% +%% This is file `tagpdf-debug-lua.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% tagpdf-mc-luacode.dtx (with options: `debug') +%% +%% Copyright (C) 2019-2022 Ulrike Fischer +%% +%% It may be distributed and/or modified under the conditions of +%% the LaTeX Project Public License (LPPL), either version 1.3c of +%% this license or (at your option) any later version. The latest +%% version of this license is in the file: +%% +%% https://www.latex-project.org/lppl.txt +%% +%% This file is part of the "tagpdf bundle" (The Work in LPPL) +%% and all files in that bundle must be distributed together. +%% +%% File: tagpdf-mc-luacode.dtx +%% +%% +%% End of file `tagpdf-debug-lua.sty'. diff --git a/texmf/tex/latex/tagpdf/tagpdf-debug.sty b/texmf/tex/latex/tagpdf/tagpdf-debug.sty new file mode 100644 index 000000000..819600419 --- /dev/null +++ b/texmf/tex/latex/tagpdf/tagpdf-debug.sty @@ -0,0 +1,218 @@ +%% +%% This is file `tagpdf-debug.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% tagpdf.dtx (with options: `debug') +%% tagpdf-checks.dtx (with options: `debug') +%% tagpdf-user.dtx (with options: `debug') +%% tagpdf-mc-shared.dtx (with options: `debug') +%% tagpdf-tree.dtx (with options: `debug') +%% tagpdf-roles.dtx (with options: `debug') +%% tagpdf-struct.dtx (with options: `debug') +%% tagpdf-space.dtx (with options: `debug') +%% +%% Copyright (C) 2019-2022 Ulrike Fischer +%% +%% It may be distributed and/or modified under the conditions of +%% the LaTeX Project Public License (LPPL), either version 1.3c of +%% this license or (at your option) any later version. The latest +%% version of this license is in the file: +%% +%% https://www.latex-project.org/lppl.txt +%% +%% This file is part of the "tagpdf bundle" (The Work in LPPL) +%% and all files in that bundle must be distributed together. +%% +%% File: tagpdf.dtx +\ProvidesExplPackage {tagpdf-debug} {2022-08-24} {0.97} + { debug code for tagpdf } +\@ifpackageloaded{tagpdf}{}{\PackageWarning{tagpdf-debug}{tagpdf~not~loaded,~quitting}\endinput} +\prop_gput:Nnn \g_msg_module_type_prop { tag / debug} {} +\prop_gput:Nnn \g_msg_module_name_prop { tag / debug }{tagpdf~DEBUG} + + +\bool_if:NTF \g__tag_mode_lua_bool + { + \RequirePackage {tagpdf-debug-lua} + } + { + \RequirePackage {tagpdf-debug-generic} % + } +%% File: tagpdf-checks.dtx +\msg_new:nnn { tag / debug } {mc-begin} { MC~begin~#1~with~options:~\tl_to_str:n{#2}~[\msg_line_context:] } +\msg_new:nnn { tag / debug } {mc-end} { MC~end~#1~[\msg_line_context:] } + +\cs_new_protected:Npn \__tag_debug_mc_begin_insert:n #1 + { + \int_compare:nNnT { \l__tag_loglevel_int } > {0} + { + \msg_note:nnnn { tag / debug } {mc-begin} {inserted} { #1 } + } + } +\cs_new_protected:Npn \__tag_debug_mc_begin_ignore:n #1 + { + \int_compare:nNnT { \l__tag_loglevel_int } > {0} + { + \msg_note:nnnn { tag / debug } {mc-begin } {ignored} { #1 } + } + } +\cs_new_protected:Npn \__tag_debug_mc_end_insert: + { + \int_compare:nNnT { \l__tag_loglevel_int } > {0} + { + \msg_note:nnn { tag / debug } {mc-end} {inserted} + } + } +\cs_new_protected:Npn \__tag_debug_mc_end_ignore: + { + \int_compare:nNnT { \l__tag_loglevel_int } > {0} + { + \msg_note:nnn { tag / debug } {mc-end } {ignored} + } + } +\msg_new:nnn { tag / debug } {struct-begin} + { + Struct~\tag_get:n{struct_num}~begin~#1~with~options:~\tl_to_str:n{#2}~[\msg_line_context:] + } +\msg_new:nnn { tag / debug } {struct-end} + { + Struct~end~#1~[\msg_line_context:] + } + +\cs_new_protected:Npn \__tag_debug_struct_begin_insert:n #1 + { + \int_compare:nNnT { \l__tag_loglevel_int } > {0} + { + \msg_note:nnnn { tag / debug } {struct-begin} {inserted} { #1 } + \seq_log:N \g__tag_struct_tag_stack_seq + } + } +\cs_new_protected:Npn \__tag_debug_struct_begin_ignore:n #1 + { + \int_compare:nNnT { \l__tag_loglevel_int } > {0} + { + \msg_note:nnnn { tag / debug } {struct-begin } {ignored} { #1 } + } + } +\cs_new_protected:Npn \__tag_debug_struct_end_insert: + { + \int_compare:nNnT { \l__tag_loglevel_int } > {0} + { + \msg_note:nnn { tag / debug } {struct-end} {inserted} + \seq_log:N \g__tag_struct_tag_stack_seq + } + } +\cs_new_protected:Npn \__tag_debug_struct_end_ignore: + { + \int_compare:nNnT { \l__tag_loglevel_int } > {0} + { + \msg_note:nnn { tag / debug } {struct-end } {ignored} + } + } +%% File: tagpdf-user.dtx + + + + +%% File: tagpdf-mc-shared.dtx + +%% File: tagpdf-tree.dtx +%% File: tagpdf-roles.dtx +%% File: tagpdf-struct.dtx +\cs_set_protected:Npn \tag_struct_begin:n #1 %#1 key-val + { +\__tag_check_if_active_struct:TF + { + \group_begin: + \int_gincr:N \c@g__tag_struct_abs_int + \__tag_prop_new:c { g__tag_struct_\int_eval:n { \c@g__tag_struct_abs_int }_prop } + \__tag_new_output_prop_handler:n {\int_eval:n { \c@g__tag_struct_abs_int }} + \__tag_seq_new:c { g__tag_struct_kids_\int_eval:n { \c@g__tag_struct_abs_int }_seq} + \exp_args:Ne + \pdf_object_new:n + { __tag/struct/\int_eval:n { \c@g__tag_struct_abs_int } } + \__tag_prop_gput:cno + { g__tag_struct_\int_eval:n { \c@g__tag_struct_abs_int }_prop } + { Type } + { /StructElem } + \tl_set:Nn \l__tag_struct_stack_parent_tmpa_tl {-1} + \keys_set:nn { __tag / struct} { #1 } + \__tag_check_structure_has_tag:n { \int_eval:n {\c@g__tag_struct_abs_int} } + \tl_if_empty:NF + \l__tag_struct_key_label_tl + { + \__tag_ref_label:en{tagpdfstruct-\l__tag_struct_key_label_tl}{struct} + } + \int_compare:nNnT { \l__tag_struct_stack_parent_tmpa_tl } = { -1 } + { + \seq_get:NNF + \g__tag_struct_stack_seq + \l__tag_struct_stack_parent_tmpa_tl + { + \msg_error:nn { tag } { struct-faulty-nesting } + } + } + \seq_gpush:NV \g__tag_struct_stack_seq \c@g__tag_struct_abs_int + \seq_gpush:NV \g__tag_struct_tag_stack_seq \g__tag_struct_tag_tl + \tl_gset:NV \g__tag_struct_stack_current_tl \c@g__tag_struct_abs_int + %\seq_show:N \g__tag_struct_stack_seq + \bool_if:NF + \l__tag_struct_elem_stash_bool + {%set the parent + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { P } + { + \pdf_object_ref:e { __tag/struct/\l__tag_struct_stack_parent_tmpa_tl } + } + %record this structure as kid: + %\tl_show:N \g__tag_struct_stack_current_tl + %\tl_show:N \l__tag_struct_stack_parent_tmpa_tl + \__tag_struct_kid_struct_gput_right:xx + { \l__tag_struct_stack_parent_tmpa_tl } + { \g__tag_struct_stack_current_tl } + %\prop_show:c { g__tag_struct_\g__tag_struct_stack_current_tl _prop } + %\seq_show:c {g__tag_struct_kids_\l__tag_struct_stack_parent_tmpa_tl _seq} + } + %\prop_show:c { g__tag_struct_\g__tag_struct_stack_current_tl _prop } + %\seq_show:c {g__tag_struct_kids_\l__tag_struct_stack_parent_tmpa_tl _seq} + \__tag_debug_struct_begin_insert:n { #1 } + \group_end: + } +{ \__tag_debug_struct_begin_ignore:n { #1 }} + } +\cs_set_protected:Nn \tag_struct_end: + { %take the current structure num from the stack: + %the objects are written later, lua mode hasn't all needed info yet + %\seq_show:N \g__tag_struct_stack_seq +\__tag_check_if_active_struct:TF + { + \seq_gpop:NN \g__tag_struct_tag_stack_seq \l__tag_tmpa_tl + \seq_gpop:NNTF \g__tag_struct_stack_seq \l__tag_tmpa_tl + { + \__tag_check_info_closing_struct:o { \g__tag_struct_stack_current_tl } + } + { \__tag_check_no_open_struct: } + % get the previous one, shouldn't be empty as the root should be there + \seq_get:NNTF \g__tag_struct_stack_seq \l__tag_tmpa_tl + { + \tl_gset:NV \g__tag_struct_stack_current_tl \l__tag_tmpa_tl + } + { + \__tag_check_no_open_struct: + } + \seq_get:NNT \g__tag_struct_tag_stack_seq \l__tag_tmpa_tl + { + \tl_gset:NV \g__tag_struct_tag_tl \l__tag_tmpa_tl + } +\__tag_debug_struct_end_insert: + } +{\__tag_debug_struct_end_ignore:} + } + +%% File: tagpdf-space.dtx +%% +%% +%% End of file `tagpdf-debug.sty'. diff --git a/texmf/tex/latex/tagpdf/tagpdf-luatex.def b/texmf/tex/latex/tagpdf/tagpdf-luatex.def new file mode 100644 index 000000000..7fcc6ec60 --- /dev/null +++ b/texmf/tex/latex/tagpdf/tagpdf-luatex.def @@ -0,0 +1,78 @@ +%% +%% This is file `tagpdf-luatex.def', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% tagpdf-backend.dtx (with options: `luatex') +%% +%% Copyright (C) 2019-2022 Ulrike Fischer +%% +%% It may be distributed and/or modified under the conditions of +%% the LaTeX Project Public License (LPPL), either version 1.3c of +%% this license or (at your option) any later version. The latest +%% version of this license is in the file: +%% +%% https://www.latex-project.org/lppl.txt +%% +%% This file is part of the "tagpdf bundle" (The Work in LPPL) +%% and all files in that bundle must be distributed together. +%% +%% File: tagpdf-backend.dtx +\ProvidesExplFile {tagpdf-luatex.def} {2022-08-24} {0.97} + {tagpdf~driver~for~luatex} +{ + \fontencoding{TU}\fontfamily{lmr}\fontseries{m}\fontshape{n}\fontsize{10pt}{10pt}\selectfont +} +\lua_now:e { tagpdf=require('tagpdf.lua') } +\cs_set_protected:Npn \__tag_prop_new:N #1 + { + \prop_new:N #1 + \lua_now:e { ltx.__tag.tables.\cs_to_str:N#1 = {} } + } + +\cs_set_protected:Npn \__tag_seq_new:N #1 + { + \seq_new:N #1 + \lua_now:e { ltx.__tag.tables.\cs_to_str:N#1 = {} } + } + +\cs_set_protected:Npn \__tag_prop_gput:Nnn #1 #2 #3 + { + \prop_gput:Nnn #1 { #2 } { #3 } + \lua_now:e { ltx.__tag.tables.\cs_to_str:N#1 ["#2"] = "#3" } + } + +\cs_set_protected:Npn \__tag_seq_gput_right:Nn #1 #2 + { + \seq_gput_right:Nn #1 { #2 } + \lua_now:e { table.insert(ltx.__tag.tables.\cs_to_str:N#1, "#2") } + } + + +\cs_set:Npn \__tag_seq_item:cn #1 #2 + { + \lua_now:e { tex.print(ltx.__tag.tables.#1[#2]) } + } + +\cs_set:Npn \__tag_prop_item:cn #1 #2 + { + \lua_now:e { tex.print(ltx.__tag.tables.#1["#2"]) } + } + +\cs_set_protected:Npn \__tag_seq_show:N #1 + { + \seq_show:N #1 + \lua_now:e { ltx.__tag.trace.log ("lua~sequence~array~\cs_to_str:N#1",1) } + \lua_now:e { ltx.__tag.trace.show_seq (ltx.__tag.tables.\cs_to_str:N#1) } + } + +\cs_set_protected:Npn \__tag_prop_show:N #1 + { + \prop_show:N #1 + \lua_now:e {ltx.__tag.trace.log ("lua~property~table~\cs_to_str:N#1",1) } + \lua_now:e {ltx.__tag.trace.show_prop (ltx.__tag.tables.\cs_to_str:N#1) } + } +%% +%% +%% End of file `tagpdf-luatex.def'. diff --git a/texmf/tex/latex/tagpdf/tagpdf-mc-code-generic.sty b/texmf/tex/latex/tagpdf/tagpdf-mc-code-generic.sty new file mode 100644 index 000000000..38651bdcf --- /dev/null +++ b/texmf/tex/latex/tagpdf/tagpdf-mc-code-generic.sty @@ -0,0 +1,424 @@ +%% +%% This is file `tagpdf-mc-code-generic.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% tagpdf-mc-generic.dtx (with options: `generic') +%% +%% Copyright (C) 2019-2022 Ulrike Fischer +%% +%% It may be distributed and/or modified under the conditions of +%% the LaTeX Project Public License (LPPL), either version 1.3c of +%% this license or (at your option) any later version. The latest +%% version of this license is in the file: +%% +%% https://www.latex-project.org/lppl.txt +%% +%% This file is part of the "tagpdf bundle" (The Work in LPPL) +%% and all files in that bundle must be distributed together. +%% +%% File: tagpdf-mc-generic.dtx +\ProvidesExplPackage {tagpdf-mc-code-generic} {2022-08-24} {0.97} + {part of tagpdf - code related to marking chunks - generic mode} +\__tag_prop_new:N \g__tag_MCID_byabspage_prop + +\tl_new:N \l__tag_mc_ref_abspage_tl + +\tl_new:N \l__tag_mc_tmpa_tl + +\newmarks \g__tag_mc_marks +\seq_new:N \g__tag_mc_main_marks_seq +\seq_new:N \g__tag_mc_footnote_marks_seq +\seq_new:N \g__tag_mc_multicol_marks_seq + +\seq_new:N \l__tag_mc_firstmarks_seq +\seq_new:N \l__tag_mc_botmarks_seq +\cs_new_protected:Npn \__tag_mc_begin_marks:nn #1 #2 %#1 tag, #2 label + { + \tex_marks:D \g__tag_mc_marks + { + b-, %first of begin pair + \int_use:N\c@g__tag_MCID_abs_int, %mc-num + \g__tag_struct_stack_current_tl, %structure num + #1, %tag + \bool_if:NT \l__tag_mc_key_stash_bool{stash}, % stash info + #2, %label + } + \tex_marks:D \g__tag_mc_marks + { + b+, % second of begin pair + \int_use:N\c@g__tag_MCID_abs_int, %mc-num + \g__tag_struct_stack_current_tl, %structure num + #1, %tag + \bool_if:NT \l__tag_mc_key_stash_bool{stash}, % stash info + #2, %label + } + } +\cs_generate_variant:Nn \__tag_mc_begin_marks:nn {oo} +\cs_new_protected:Npn \__tag_mc_artifact_begin_marks:n #1 %#1 type + { + \tex_marks:D \g__tag_mc_marks + { + b-, %first of begin pair + \int_use:N\c@g__tag_MCID_abs_int, %mc-num + -1, %structure num + #1 %type + } + \tex_marks:D \g__tag_mc_marks + { + b+, %first of begin pair + \int_use:N\c@g__tag_MCID_abs_int, %mc-num + -1, %structure num + #1 %Type + } + } + +\cs_new_protected:Npn \__tag_mc_end_marks: + { + \tex_marks:D \g__tag_mc_marks + { + e-, %first of end pair + \int_use:N\c@g__tag_MCID_abs_int, %mc-num + \g__tag_struct_stack_current_tl, %structure num + } + \tex_marks:D \g__tag_mc_marks + { + e+, %second of end pair + \int_use:N\c@g__tag_MCID_abs_int, %mc-num + \g__tag_struct_stack_current_tl, %structure num + } + } +\cs_new_protected:Npn \__tag_mc_disable_marks: + { + \cs_set_eq:NN \__tag_mc_begin_marks:nn \use_none:nn + \cs_set_eq:NN \__tag_mc_artifact_begin_marks:n \use_none:n + \cs_set_eq:NN \__tag_mc_end_marks: \prg_do_nothing: + } +\cs_new_protected:Npn \__tag_mc_get_marks: + { + \exp_args:NNx + \seq_set_from_clist:Nn \l__tag_mc_firstmarks_seq + { \tex_firstmarks:D \g__tag_mc_marks } + \exp_args:NNx + \seq_set_from_clist:Nn \l__tag_mc_botmarks_seq + { \tex_botmarks:D \g__tag_mc_marks } + } +\cs_new_protected:Npn \__tag_mc_store:nnn #1 #2 #3 %#1 mc-prev, #2 mc-num #3 structure-num + { + %\prop_show:N \g__tag_struct_cont_mc_prop + \prop_get:NnNTF \g__tag_struct_cont_mc_prop {#1} \l__tag_tmpa_tl + { + \prop_gput:Nnx \g__tag_struct_cont_mc_prop {#1}{ \l__tag_tmpa_tl \__tag_struct_mcid_dict:n {#2}} + } + { + \prop_gput:Nnx \g__tag_struct_cont_mc_prop {#1}{ \__tag_struct_mcid_dict:n {#2}} + } + \prop_gput:Nxx \g__tag_mc_parenttree_prop + {#2} + {#3} + } +\cs_generate_variant:Nn \__tag_mc_store:nnn {xxx} +\cs_new_protected:Npn \__tag_mc_insert_extra_tmb:n #1 % #1 stream: e.g. main or footnote + { + \__tag_check_typeout_v:n {=>~ first~ \seq_use:Nn \l__tag_mc_firstmarks_seq {,~}} + \__tag_check_typeout_v:n {=>~ bot~ \seq_use:Nn \l__tag_mc_botmarks_seq {,~}} + \__tag_check_if_mc_tmb_missing:TF + { + \__tag_check_typeout_v:n {=>~ TMB~ ~ missing~ --~ inserted} + %test if artifact + \int_compare:nNnTF { \seq_item:cn { g__tag_mc_#1_marks_seq } {3} } = {-1} + { + \tl_set:Nx \l__tag_tmpa_tl { \seq_item:cn { g__tag_mc_#1_marks_seq } {4} } + \__tag_mc_handle_artifact:N \l__tag_tmpa_tl + } + { + \exp_args:Nx + \__tag_mc_bdc_mcid:n + { + \seq_item:cn { g__tag_mc_#1_marks_seq } {4} + } + \str_if_eq:eeTF + { + \seq_item:cn { g__tag_mc_#1_marks_seq } {5} + } + {} + { + %store + \__tag_mc_store:xxx + { + \seq_item:cn { g__tag_mc_#1_marks_seq } {2} + } + { \int_eval:n{\c@g__tag_MCID_abs_int} } + { + \seq_item:cn { g__tag_mc_#1_marks_seq } {3} + } + } + { + %stashed -> warning!! + } + } + } + { + \__tag_check_typeout_v:n {=>~ TMB~ not~ missing} + } + } + +\cs_new_protected:Npn \__tag_mc_insert_extra_tme:n #1 % #1 stream, eg. main or footnote + { + \__tag_check_if_mc_tme_missing:TF + { + \__tag_check_typeout_v:n {=>~ TME~ ~ missing~ --~ inserted} + \__tag_mc_emc: + \seq_gset_eq:cN + { g__tag_mc_#1_marks_seq } + \l__tag_mc_botmarks_seq + } + { + \__tag_check_typeout_v:n {=>~ TME~ not~ missing} + } + } +\cs_new_protected:Npn\__tag_add_missing_mcs:Nn #1 #2 { + \vbadness \@M + \vfuzz \c_max_dim + \vbox_set_to_ht:Nnn #1 { \box_ht:N #1 } { + \hbox_set:Nn \l__tag_tmpa_box { \__tag_mc_insert_extra_tmb:n {#2} } + \hbox_set:Nn \l__tag_tmpb_box { \__tag_mc_insert_extra_tme:n {#2} } + \int_compare:nNnT {\l__tag_loglevel_int} > { 0 } + { + \seq_log:c { g__tag_mc_#2_marks_seq} + } + \box_set_ht:Nn \l__tag_tmpa_box \c_zero_dim + \box_set_dp:Nn \l__tag_tmpa_box \c_zero_dim + \box_set_ht:Nn \l__tag_tmpb_box \c_zero_dim + \box_set_dp:Nn \l__tag_tmpb_box { \box_dp:N #1 } + \boxmaxdepth \@maxdepth + \box_use_drop:N \l__tag_tmpa_box + \vbox_unpack_drop:N #1 + \tex_kern:D -\box_dp:N \l__tag_tmpb_box + \nointerlineskip + \box_use_drop:N \l__tag_tmpb_box + } +} + +\cs_new_protected:Npn \__tag_add_missing_mcs_to_stream:Nn #1#2 + { + \__tag_check_if_active_mc:T { + \vbadness\maxdimen + \box_set_eq:NN \l__tag_tmpa_box #1 + \vbox_set_split_to_ht:NNn \l__tag_tmpa_box \l__tag_tmpa_box \c_max_dim + \exp_args:NNx + \seq_set_from_clist:Nn \l__tag_mc_firstmarks_seq + { \tex_splitfirstmarks:D \g__tag_mc_marks } + \seq_if_empty:NTF \l__tag_mc_firstmarks_seq + { + \__tag_check_typeout_v:n + { + No~ marks~ so~ use~ saved~ bot~ mark:~ + \seq_use:cn {g__tag_mc_#2_marks_seq} {,~} \iow_newline: + } + \seq_set_eq:Nc \l__tag_mc_firstmarks_seq {g__tag_mc_#2_marks_seq} + \seq_set_eq:NN \l__tag_mc_botmarks_seq \l__tag_mc_firstmarks_seq + } + { + \__tag_check_typeout_v:n + { + Pick~ up~ new~ bot~ mark! + } + \exp_args:NNx + \seq_set_from_clist:Nn \l__tag_mc_botmarks_seq + { \tex_splitbotmarks:D \g__tag_mc_marks } + } + \__tag_add_missing_mcs:Nn #1 {#2} +%% + \seq_gset_eq:cN {g__tag_mc_#2_marks_seq} \l__tag_mc_botmarks_seq +%% + } +} +\prg_new_conditional:Nnn \__tag_mc_if_in: {p,T,F,TF} + { + \bool_if:NTF \g__tag_in_mc_bool + { \prg_return_true: } + { \prg_return_false: } + } + +\prg_new_eq_conditional:NNn \tag_mc_if_in: \__tag_mc_if_in: {p,T,F,TF} +\cs_set_eq:NN \__tag_mc_bmc:n \pdf_bmc:n +\cs_set_eq:NN \__tag_mc_emc: \pdf_emc: +\cs_set_eq:NN \__tag_mc_bdc:nn \pdf_bdc:nn +\cs_generate_variant:Nn \__tag_mc_bdc:nn {nx} +\cs_new_protected:Npn \__tag_mc_bdc_mcid:nn #1 #2 + { + \int_gincr:N \c@g__tag_MCID_abs_int + \tl_set:Nx \l__tag_mc_ref_abspage_tl + { + \__tag_ref_value:enn %3 args + { + mcid-\int_use:N \c@g__tag_MCID_abs_int + } + { tagabspage } + {-1} + } + \prop_get:NoNTF + \g__tag_MCID_byabspage_prop + { + \l__tag_mc_ref_abspage_tl + } + \l__tag_mc_tmpa_tl + { + %key already present, use value for MCID and add 1 for the next + \int_gset:Nn \g__tag_MCID_tmp_bypage_int { \l__tag_mc_tmpa_tl } + \__tag_prop_gput:Nxx + \g__tag_MCID_byabspage_prop + { \l__tag_mc_ref_abspage_tl } + { \int_eval:n {\l__tag_mc_tmpa_tl +1} } + } + { + %key not present, set MCID to 0 and insert 1 + \int_gzero:N \g__tag_MCID_tmp_bypage_int + \__tag_prop_gput:Nxx + \g__tag_MCID_byabspage_prop + { \l__tag_mc_ref_abspage_tl } + {1} + } + \__tag_ref_label:en + { + mcid-\int_use:N \c@g__tag_MCID_abs_int + } + { mc } + \__tag_mc_bdc:nx + {#1} + { /MCID~\int_eval:n { \g__tag_MCID_tmp_bypage_int }~ \exp_not:n { #2 } } + } +\cs_new_protected:Npn \__tag_mc_bdc_mcid:n #1 + { + \__tag_mc_bdc_mcid:nn {#1} {} + } + +\cs_new_protected:Npn \__tag_mc_handle_mcid:nn #1 #2 %#1 tag, #2 properties + { + \__tag_mc_bdc_mcid:nn {#1} {#2} + } + +\cs_generate_variant:Nn \__tag_mc_handle_mcid:nn {VV} + +\cs_new_protected:Npn \__tag_mc_handle_stash:n #1 %1 mcidnum + { + \__tag_check_mc_used:n {#1} + \__tag_struct_kid_mc_gput_right:nn + { \g__tag_struct_stack_current_tl } + {#1} + \prop_gput:Nxx \g__tag_mc_parenttree_prop + {#1} + { \g__tag_struct_stack_current_tl } + } +\cs_generate_variant:Nn \__tag_mc_handle_stash:n { x } + +\cs_new_protected:Npn \__tag_mc_bmc_artifact: + { + \__tag_mc_bmc:n {Artifact} + } +\cs_new_protected:Npn \__tag_mc_bmc_artifact:n #1 + { + \__tag_mc_bdc:nn {Artifact}{/Type/#1} + } +\cs_new_protected:Npn \__tag_mc_handle_artifact:N #1 + % #1 is a var containing the artifact type + { + \int_gincr:N \c@g__tag_MCID_abs_int + \tl_if_empty:NTF #1 + { \__tag_mc_bmc_artifact: } + { \exp_args:NV\__tag_mc_bmc_artifact:n #1 } + } + +\cs_new:Nn \__tag_get_data_mc_tag: { \g__tag_mc_key_tag_tl } +\cs_set_protected:Npn \tag_mc_begin:n #1 %#1 keyval + { + \__tag_check_if_active_mc:T + { + \group_begin: %hm + \__tag_check_mc_if_nested: + \bool_gset_true:N \g__tag_in_mc_bool + \keys_set:nn { __tag / mc } {#1} + \bool_if:NTF \l__tag_mc_artifact_bool + { %handle artifact + \__tag_mc_handle_artifact:N \l__tag_mc_artifact_type_tl + \exp_args:NV + \__tag_mc_artifact_begin_marks:n \l__tag_mc_artifact_type_tl + } + { %handle mcid type + \__tag_check_mc_tag:N \l__tag_mc_key_tag_tl + \__tag_mc_handle_mcid:VV + \l__tag_mc_key_tag_tl + \l__tag_mc_key_properties_tl + \__tag_mc_begin_marks:oo{\l__tag_mc_key_tag_tl}{\l__tag_mc_key_label_tl} + \tl_if_empty:NF {\l__tag_mc_key_label_tl} + { + \exp_args:NV + \__tag_mc_handle_mc_label:n \l__tag_mc_key_label_tl + } + \bool_if:NF \l__tag_mc_key_stash_bool + { + \__tag_mc_handle_stash:x { \int_use:N \c@g__tag_MCID_abs_int } + } + } + \group_end: + } + } +\cs_set_protected:Nn \tag_mc_end: + { + \__tag_check_if_active_mc:T + { + \__tag_check_mc_if_open: + \bool_gset_false:N \g__tag_in_mc_bool + \tl_gset:Nn \g__tag_mc_key_tag_tl { } + \__tag_mc_emc: + \__tag_mc_end_marks: + } + } + +\keys_define:nn { __tag / mc } + { + tag .code:n = % the name (H,P,Span) etc + { + \tl_set:Nx \l__tag_mc_key_tag_tl { #1 } + \tl_gset:Nx \g__tag_mc_key_tag_tl { #1 } + }, + raw .code:n = + { + \tl_put_right:Nx \l__tag_mc_key_properties_tl { #1 } + }, + alt .code:n = % Alt property + { + \str_set_convert:Noon + \l__tag_tmpa_str + { #1 } + { default } + { utf16/hex } + \tl_put_right:Nn \l__tag_mc_key_properties_tl { /Alt~< } + \tl_put_right:No \l__tag_mc_key_properties_tl { \l__tag_tmpa_str>~ } + }, + alttext .meta:n = {alt=#1}, + actualtext .code:n = % ActualText property + { + \str_set_convert:Noon + \l__tag_tmpa_str + { #1 } + { default } + { utf16/hex } + \tl_put_right:Nn \l__tag_mc_key_properties_tl { /ActualText~< } + \tl_put_right:No \l__tag_mc_key_properties_tl { \l__tag_tmpa_str>~ } + }, + label .tl_set:N = \l__tag_mc_key_label_tl, + artifact .code:n = + { + \exp_args:Nnx + \keys_set:nn + { __tag / mc } + { __artifact-bool, __artifact-type=#1 } + }, + artifact .default:n = {notype} + } +%% +%% +%% End of file `tagpdf-mc-code-generic.sty'. diff --git a/texmf/tex/latex/tagpdf/tagpdf-mc-code-lua.sty b/texmf/tex/latex/tagpdf/tagpdf-mc-code-lua.sty new file mode 100644 index 000000000..d109c3acc --- /dev/null +++ b/texmf/tex/latex/tagpdf/tagpdf-mc-code-lua.sty @@ -0,0 +1,309 @@ +%% +%% This is file `tagpdf-mc-code-lua.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% tagpdf-mc-luacode.dtx (with options: `luamode') +%% +%% Copyright (C) 2019-2022 Ulrike Fischer +%% +%% It may be distributed and/or modified under the conditions of +%% the LaTeX Project Public License (LPPL), either version 1.3c of +%% this license or (at your option) any later version. The latest +%% version of this license is in the file: +%% +%% https://www.latex-project.org/lppl.txt +%% +%% This file is part of the "tagpdf bundle" (The Work in LPPL) +%% and all files in that bundle must be distributed together. +%% +%% File: tagpdf-mc-luacode.dtx +\ProvidesExplPackage {tagpdf-mc-code-lua} {2022-08-24} {0.97} + {tagpdf - mc code only for the luamode } +\hook_gput_code:nnn{begindocument}{tagpdf/mc} + { + \bool_if:NT\g__tag_active_space_bool + { + \lua_now:e + { + if~luatexbase.callbacktypes.pre_shipout_filter~then~ + luatexbase.add_to_callback("pre_shipout_filter", function(TAGBOX)~ + ltx.__tag.func.space_chars_shipout(TAGBOX)~return~true~ + end, "tagpdf")~ + end + } + \lua_now:e + { + if~luatexbase.callbacktypes.pre_shipout_filter~then~ + token.get_next()~ + end + }\@secondoftwo\@gobble + { + \hook_gput_code:nnn{shipout/before}{tagpdf/lua} + { + \lua_now:e + { ltx.__tag.func.space_chars_shipout (tex.box["ShipoutBox"]) } + } + } + } + \bool_if:NT\g__tag_active_mc_bool + { + \lua_now:e + { + if~luatexbase.callbacktypes.pre_shipout_filter~then~ + luatexbase.add_to_callback("pre_shipout_filter", function(TAGBOX)~ + ltx.__tag.func.mark_shipout(TAGBOX)~return~true~ + end, "tagpdf")~ + end + } + \lua_now:e + { + if~luatexbase.callbacktypes.pre_shipout_filter~then~ + token.get_next()~ + end + }\@secondoftwo\@gobble + { + \hook_gput_code:nnn{shipout/before}{tagpdf/lua} + { + \lua_now:e + { ltx.__tag.func.mark_shipout (tex.box["ShipoutBox"]) } + } + } + } + } +\cs_new_protected:Npn \__tag_add_missing_mcs_to_stream:Nn #1#2 {} +\prg_new_conditional:Nnn \__tag_mc_if_in: {p,T,F,TF} + { + \int_compare:nNnTF + { -2147483647 } + = + {\lua_now:e + { + tex.print(tex.getattribute(luatexbase.attributes.g__tag_mc_type_attr)) + } + } + { \prg_return_false: } + { \prg_return_true: } + } + +\prg_new_eq_conditional:NNn \tag_mc_if_in: \__tag_mc_if_in: {p,T,F,TF} +\cs_new:Nn \__tag_mc_lua_set_mc_type_attr:n % #1 is a tag name + { + %TODO ltx.__tag.func.get_num_from("#1") seems not to return a suitable number?? + \tl_set:Nx\l__tag_tmpa_tl{\lua_now:e{ltx.__tag.func.output_num_from ("#1")} } + \lua_now:e + { + tex.setattribute + ( + "global", + luatexbase.attributes.g__tag_mc_type_attr, + \l__tag_tmpa_tl + ) + } + \lua_now:e + { + tex.setattribute + ( + "global", + luatexbase.attributes.g__tag_mc_cnt_attr, + \__tag_get_mc_abs_cnt: + ) + } + } + +\cs_generate_variant:Nn\__tag_mc_lua_set_mc_type_attr:n { o } + +\cs_new:Nn \__tag_mc_lua_unset_mc_type_attr: + { + \lua_now:e + { + tex.setattribute + ( + "global", + luatexbase.attributes.g__tag_mc_type_attr, + -2147483647 + ) + } + \lua_now:e + { + tex.setattribute + ( + "global", + luatexbase.attributes.g__tag_mc_cnt_attr, + -2147483647 + ) + } + } + +\cs_new:Nn \__tag_mc_insert_mcid_kids:n + { + \lua_now:e { ltx.__tag.func.mc_insert_kids (#1,0) } + } + +\cs_new:Nn \__tag_mc_insert_mcid_single_kids:n + { + \lua_now:e {ltx.__tag.func.mc_insert_kids (#1,1) } + } +\cs_new:Nn \__tag_mc_handle_stash:n %1 mcidnum + { + \__tag_check_mc_used:n { #1 } + \seq_gput_right:cn % Don't fill a lua table due to the command in the item, + % so use the kernel command + { g__tag_struct_kids_\g__tag_struct_stack_current_tl _seq } + { + \__tag_mc_insert_mcid_kids:n {#1}% + } + \lua_now:e + { + ltx.__tag.func.store_struct_mcabs + ( + \g__tag_struct_stack_current_tl,#1 + ) + } + \prop_gput:Nxx + \g__tag_mc_parenttree_prop + { #1 } + { \g__tag_struct_stack_current_tl } + } + +\cs_generate_variant:Nn \__tag_mc_handle_stash:n { x } +\cs_set_protected:Nn \tag_mc_begin:n + { + \__tag_check_if_active_mc:T + { + \group_begin: + %\__tag_check_mc_if_nested: + \bool_gset_true:N \g__tag_in_mc_bool + \bool_set_false:N\l__tag_mc_artifact_bool + \tl_clear:N \l__tag_mc_key_properties_tl + \int_gincr:N \c@g__tag_MCID_abs_int + \keys_set:nn { __tag / mc }{ label={}, #1 } + %check that a tag or artifact has been used + \__tag_check_mc_tag:N \l__tag_mc_key_tag_tl + %set the attributes: + \__tag_mc_lua_set_mc_type_attr:o { \l__tag_mc_key_tag_tl } + \bool_if:NF \l__tag_mc_artifact_bool + { % store the absolute num name in a label: + \tl_if_empty:NF {\l__tag_mc_key_label_tl} + { + \exp_args:NV + \__tag_mc_handle_mc_label:n \l__tag_mc_key_label_tl + } + % if not stashed record the absolute number + \bool_if:NF \l__tag_mc_key_stash_bool + { + \__tag_mc_handle_stash:x { \__tag_get_mc_abs_cnt: } + } + } + \group_end: + } + } +\cs_set_protected:Nn \tag_mc_end: + { + \__tag_check_if_active_mc:T + { + %\__tag_check_mc_if_open: + \bool_gset_false:N \g__tag_in_mc_bool + \bool_set_false:N\l__tag_mc_artifact_bool + \__tag_mc_lua_unset_mc_type_attr: + \tl_set:Nn \l__tag_mc_key_tag_tl { } + \tl_gset:Nn \g__tag_mc_key_tag_tl { } + } + } +\cs_new:Npn \__tag_get_data_mc_tag: { \g__tag_mc_key_tag_tl } + +\keys_define:nn { __tag / mc } + { + tag .code:n = % + { + \tl_set:Nx \l__tag_mc_key_tag_tl { #1 } + \tl_gset:Nx \g__tag_mc_key_tag_tl { #1 } + \lua_now:e + { + ltx.__tag.func.store_mc_data(\__tag_get_mc_abs_cnt:,"tag","#1") + } + }, + raw .code:n = + { + \tl_put_right:Nx \l__tag_mc_key_properties_tl { #1 } + \lua_now:e + { + ltx.__tag.func.store_mc_data(\__tag_get_mc_abs_cnt:,"raw","#1") + } + }, + alt .code:n = % Alt property + { + \str_set_convert:Noon + \l__tag_tmpa_str + { #1 } + { default } + { utf16/hex } + \tl_put_right:Nn \l__tag_mc_key_properties_tl { /Alt~< } + \tl_put_right:No \l__tag_mc_key_properties_tl { \l__tag_tmpa_str>~ } + \lua_now:e + { + ltx.__tag.func.store_mc_data + ( + \__tag_get_mc_abs_cnt:,"alt","/Alt~<\str_use:N \l__tag_tmpa_str>" + ) + } + }, + alttext .meta:n = {alt=#1}, + actualtext .code:n = % Alt property + { + \str_set_convert:Noon + \l__tag_tmpa_str + { #1 } + { default } + { utf16/hex } + \tl_put_right:Nn \l__tag_mc_key_properties_tl { /Alt~< } + \tl_put_right:No \l__tag_mc_key_properties_tl { \l__tag_tmpa_str>~ } + \lua_now:e + { + ltx.__tag.func.store_mc_data + ( + \__tag_get_mc_abs_cnt:, + "actualtext", + "/ActualText~<\str_use:N \l__tag_tmpa_str>" + ) + } + }, + label .code:n = + { + \tl_set:Nn\l__tag_mc_key_label_tl { #1 } + \lua_now:e + { + ltx.__tag.func.store_mc_data + ( + \__tag_get_mc_abs_cnt:,"label","#1" + ) + } + }, + __artifact-store .code:n = + { + \lua_now:e + { + ltx.__tag.func.store_mc_data + ( + \__tag_get_mc_abs_cnt:,"artifact","#1" + ) + } + }, + artifact .code:n = + { + \exp_args:Nnx + \keys_set:nn + { __tag / mc} + { __artifact-bool, __artifact-type=#1, tag=Artifact } + \exp_args:Nnx + \keys_set:nn + { __tag / mc } + { __artifact-store=\l__tag_mc_artifact_type_tl } + }, + artifact .default:n = { notype } + } + +%% +%% +%% End of file `tagpdf-mc-code-lua.sty'. diff --git a/texmf/tex/latex/tagpdf/tagpdf.lua b/texmf/tex/latex/tagpdf/tagpdf.lua new file mode 100644 index 000000000..1c8d29867 --- /dev/null +++ b/texmf/tex/latex/tagpdf/tagpdf.lua @@ -0,0 +1,707 @@ +-- +-- This is file `tagpdf.lua', +-- generated with the docstrip utility. +-- +-- The original source files were: +-- +-- tagpdf-backend.dtx (with options: `lua') +-- +-- Copyright (C) 2019-2022 Ulrike Fischer +-- +-- It may be distributed and/or modified under the conditions of +-- the LaTeX Project Public License (LPPL), either version 1.3c of +-- this license or (at your option) any later version. The latest +-- version of this license is in the file: +-- +-- https://www.latex-project.org/lppl.txt +-- +-- This file is part of the "tagpdf bundle" (The Work in LPPL) +-- and all files in that bundle must be distributed together. +-- +-- File: tagpdf-backend.dtx +-- tagpdf.lua +-- Ulrike Fischer + +local ProvidesLuaModule = { + name = "tagpdf", + version = "0.97", --TAGVERSION + date = "2022-08-24", --TAGDATE + description = "tagpdf lua code", + license = "The LATEX Project Public License 1.3c" +} + +if luatexbase and luatexbase.provides_module then + luatexbase.provides_module (ProvidesLuaModule) +end + +--[[ +The code has quite probably a number of problems + - more variables should be local instead of global + - the naming is not always consistent due to the development of the code + - the traversing of the shipout box must be tested with more complicated setups + - it should probably handle more node types + - +--]] + +--[[ +the main table is named ltx.__tag. It contains the functions and also the data +collected during the compilation. + +ltx.__tag.mc will contain mc connected data. +ltx.__tag.struct will contain structure related data. +ltx.__tag.page will contain page data +ltx.__tag.tables contains also data from mc and struct (from older code). This needs cleaning up. + There are certainly dublettes, but I don't dare yet ... +ltx.__tag.func will contain (public) functions. +ltx.__tag.trace will contain tracing/loging functions. +local funktions starts with __ +functions meant for users will be in ltx.tag + +functions + ltx.__tag.func.get_num_from (tag): takes a tag (string) and returns the id number + ltx.__tag.func.output_num_from (tag): takes a tag (string) and prints (to tex) the id number + ltx.__tag.func.get_tag_from (num): takes a num and returns the tag + ltx.__tag.func.output_tag_from (num): takes a num and prints (to tex) the tag + ltx.__tag.func.store_mc_data (num,key,data): stores key=data in ltx.__tag.mc[num] + ltx.__tag.func.store_mc_label (label,num): stores label=num in ltx.__tag.mc.labels + ltx.__tag.func.store_mc_kid (mcnum,kid,page): stores the mc-kids of mcnum on page page + ltx.__tag.func.store_mc_in_page(mcnum,mcpagecnt,page): stores in the page table the number of mcnum on this page + ltx.__tag.func.store_struct_mcabs (structnum,mcnum): stores relations structnum<->mcnum (abs) + ltx.__tag.func.mc_insert_kids (mcnum): inserts the /K entries for mcnum by wandering throught the [kids] table + ltx.__tag.func.mark_page_elements(box,mcpagecnt,mccntprev,mcopen,name,mctypeprev) : the main function + ltx.__tag.func.mark_shipout (): a wrapper around the core function which inserts the last EMC + ltx.__tag.func.fill_parent_tree_line (page): outputs the entries of the parenttree for this page + ltx.__tag.func.output_parenttree(): outputs the content of the parenttree + ltx.__tag.func.pdf_object_ref(name): outputs the object reference for the object name + ltx.__tag.func.markspaceon(), ltx.__tag.func.markspaceoff(): (de)activates the marking of positions for space chars + ltx.__tag.trace.show_mc_data (num,loglevel): shows ltx.__tag.mc[num] is the current log level is >= loglevel + ltx.__tag.trace.show_all_mc_data (max,loglevel): shows a maximum about mc's if the current log level is >= loglevel + ltx.__tag.trace.show_seq: shows a sequence (array) + ltx.__tag.trace.show_struct_data (num): shows data of structure num + ltx.__tag.trace.show_prop: shows a prop + ltx.__tag.trace.log + ltx.__tag.trace.showspaces : boolean +--]] + +local mctypeattributeid = luatexbase.new_attribute ("g__tag_mc_type_attr") +local mccntattributeid = luatexbase.new_attribute ("g__tag_mc_cnt_attr") +local iwspaceattributeid = luatexbase.new_attribute ("g__tag_interwordspace_attr") +local iwfontattributeid = luatexbase.new_attribute ("g__tag_interwordfont_attr") +local tagunmarkedbool= token.create("g__tag_tagunmarked_bool") +local truebool = token.create("c_true_bool") +local catlatex = luatexbase.registernumber("catcodetable@latex") +local tableinsert = table.insert +local nodeid = node.id +local nodecopy = node.copy +local nodegetattribute = node.get_attribute +local nodesetattribute = node.set_attribute +local nodehasattribute = node.has_attribute +local nodenew = node.new +local nodetail = node.tail +local nodeslide = node.slide +local noderemove = node.remove +local nodetraverseid = node.traverse_id +local nodetraverse = node.traverse +local nodeinsertafter = node.insert_after +local nodeinsertbefore = node.insert_before +local pdfpageref = pdf.pageref + +local HLIST = node.id("hlist") +local VLIST = node.id("vlist") +local RULE = node.id("rule") +local DISC = node.id("disc") +local GLUE = node.id("glue") +local GLYPH = node.id("glyph") +local KERN = node.id("kern") +local PENALTY = node.id("penalty") +local LOCAL_PAR = node.id("local_par") +local MATH = node.id("math") +ltx = ltx or { } +ltx.__tag = ltx.__tag or { } +ltx.__tag.mc = ltx.__tag.mc or { } -- mc data +ltx.__tag.struct = ltx.__tag.struct or { } -- struct data +ltx.__tag.tables = ltx.__tag.tables or { } -- tables created with new prop and new seq. + -- wasn't a so great idea ... + -- g__tag_role_tags_seq used by tag<-> is in this tables! +ltx.__tag.page = ltx.__tag.page or { } -- page data, currently only i->{0->mcnum,1->mcnum,...} +ltx.__tag.trace = ltx.__tag.trace or { } -- show commands +ltx.__tag.func = ltx.__tag.func or { } -- functions +ltx.__tag.conf = ltx.__tag.conf or { } -- configuration variables +local __tag_log = + function (message,loglevel) + if (loglevel or 3) <= tex.count["l__tag_loglevel_int"] then + texio.write_nl("tagpdf: ".. message) + end + end + +ltx.__tag.trace.log = __tag_log +function ltx.__tag.trace.show_seq (seq) + if (type(seq) == "table") then + for i,v in ipairs(seq) do + __tag_log ("[" .. i .. "] => " .. tostring(v),1) + end + else + __tag_log ("sequence " .. tostring(seq) .. " not found",1) + end +end +local __tag_pairs_prop = + function (prop) + local a = {} + for n in pairs(prop) do tableinsert(a, n) end + table.sort(a) + local i = 0 -- iterator variable + local iter = function () -- iterator function + i = i + 1 + if a[i] == nil then return nil + else return a[i], prop[a[i]] + end + end + return iter + end + +function ltx.__tag.trace.show_prop (prop) + if (type(prop) == "table") then + for i,v in __tag_pairs_prop (prop) do + __tag_log ("[" .. i .. "] => " .. tostring(v),1) + end + else + __tag_log ("prop " .. tostring(prop) .. " not found or not a table",1) + end + end +function ltx.__tag.trace.show_mc_data (num,loglevel) + if ltx.__tag and ltx.__tag.mc and ltx.__tag.mc[num] then + for k,v in pairs(ltx.__tag.mc[num]) do + __tag_log ("mc"..num..": "..tostring(k).."=>"..tostring(v),loglevel) + end + if ltx.__tag.mc[num]["kids"] then + __tag_log ("mc" .. num .. " has " .. #ltx.__tag.mc[num]["kids"] .. " kids",loglevel) + for k,v in ipairs(ltx.__tag.mc[num]["kids"]) do + __tag_log ("mc ".. num .. " kid "..k.." =>" .. v.kid.." on page " ..v.page,loglevel) + end + end + else + __tag_log ("mc"..num.." not found",loglevel) + end +end +function ltx.__tag.trace.show_all_mc_data (min,max,loglevel) + for i = min, max do + ltx.__tag.trace.show_mc_data (i,loglevel) + end + texio.write_nl("") +end + +function ltx.__tag.trace.show_struct_data (num) + if ltx.__tag and ltx.__tag.struct and ltx.__tag.struct[num] then + for k,v in ipairs(ltx.__tag.struct[num]) do + __tag_log ("struct "..num..": "..tostring(k).."=>"..tostring(v),1) + end + else + __tag_log ("struct "..num.." not found ",1) + end +end + +local __tag_get_mc_cnt_type_tag = function (n) + local mccnt = nodegetattribute(n,mccntattributeid) or -1 + local mctype = nodegetattribute(n,mctypeattributeid) or -1 + local tag = ltx.__tag.func.get_tag_from(mctype) + return mccnt,mctype,tag +end +local function __tag_get_mathsubtype (mathnode) + if mathnode.subtype == 0 then + subtype = "beginmath" + else + subtype = "endmath" + end + return subtype +end + +local __tag_get_num_from = + function (tag) + if ltx.__tag.tables["g__tag_role_tags_prop"][tag] then + a= ltx.__tag.tables["g__tag_role_tags_prop"][tag] + else + a= -1 + end + return a + end + +ltx.__tag.func.get_num_from = __tag_get_num_from + +function ltx.__tag.func.output_num_from (tag) + local num = __tag_get_num_from (tag) + tex.sprint(catlatex,num) + if num == -1 then + __tag_log ("Unknown tag "..tag.." used") + end +end +local __tag_get_tag_from = + function (num) + if ltx.__tag.tables["g__tag_role_tags_seq"][num] then + a = ltx.__tag.tables["g__tag_role_tags_seq"][num] + else + a= "UNKNOWN" + end + return a +end + +ltx.__tag.func.get_tag_from = __tag_get_tag_from + +function ltx.__tag.func.output_tag_from (num) + tex.sprint(catlatex,__tag_get_tag_from (num)) +end +function ltx.__tag.func.store_mc_data (num,key,data) + ltx.__tag.mc[num] = ltx.__tag.mc[num] or { } + ltx.__tag.mc[num][key] = data + __tag_log ("INFO TEX-STORE-MC-DATA: "..num.." => "..tostring(key).." => "..tostring(data),3) +end +function ltx.__tag.func.store_mc_label (label,num) + ltx.__tag.mc["labels"] = ltx.__tag.mc["labels"] or { } + ltx.__tag.mc.labels[label] = num +end +function ltx.__tag.func.store_mc_kid (mcnum,kid,page) + ltx.__tag.trace.log("INFO TAG-STORE-MC-KID: "..mcnum.." => " .. kid.." on page " .. page,3) + ltx.__tag.mc[mcnum]["kids"] = ltx.__tag.mc[mcnum]["kids"] or { } + local kidtable = {kid=kid,page=page} + tableinsert(ltx.__tag.mc[mcnum]["kids"], kidtable ) +end + +function ltx.__tag.func.mc_num_of_kids (mcnum) + local num = 0 + if ltx.__tag.mc[mcnum] and ltx.__tag.mc[mcnum]["kids"] then + num = #ltx.__tag.mc[mcnum]["kids"] + end + ltx.__tag.trace.log ("INFO MC-KID-NUMBERS: " .. mcnum .. "has " .. num .. "KIDS",4) + return num +end +local function __tag_insert_emc_node (head,current) + local emcnode = nodenew("whatsit","pdf_literal") + emcnode.data = "EMC" + emcnode.mode=1 + head = node.insert_before(head,current,emcnode) + return head +end +local function __tag_insert_bmc_node (head,current,tag) + local bmcnode = nodenew("whatsit","pdf_literal") + bmcnode.data = "/"..tag.." BMC" + bmcnode.mode=1 + head = node.insert_before(head,current,bmcnode) + return head +end +local function __tag_insert_bdc_node (head,current,tag,dict) + local bdcnode = nodenew("whatsit","pdf_literal") + bdcnode.data = "/"..tag.."<<"..dict..">> BDC" + bdcnode.mode=1 + head = node.insert_before(head,current,bdcnode) + return head +end +local function __tag_pdf_object_ref (name) + local tokenname = 'c__pdf_backend_object_'..name..'_int' + local object = token.create(tokenname).index..' 0 R' + return object +end +ltx.__tag.func.pdf_object_ref=__tag_pdf_object_ref +local function __tag_show_spacemark (head,current,color,height) + local markcolor = color or "1 0 0" + local markheight = height or 10 + local pdfstring = node.new("whatsit","pdf_literal") + pdfstring.data = + string.format("q "..markcolor.." RG "..markcolor.." rg 0.4 w 0 %g m 0 %g l S Q",-3,markheight) + head = node.insert_after(head,current,pdfstring) + return head +end +local function __tag_fakespace() + tex.setattribute(iwspaceattributeid,1) + tex.setattribute(iwfontattributeid,font.current()) +end +ltx.__tag.func.fakespace = __tag_fakespace +--[[ a function to mark up places where real space chars should be inserted + it only sets an attribute. +--]] + +local function __tag_mark_spaces (head) + local inside_math = false + for n in nodetraverse(head) do + local id = n.id + if id == GLYPH then + local glyph = n + if glyph.next and (glyph.next.id == GLUE) + and not inside_math and (glyph.next.width >0) + then + nodesetattribute(glyph.next,iwspaceattributeid,1) + nodesetattribute(glyph.next,iwfontattributeid,glyph.font) + -- for debugging + if ltx.__tag.trace.showspaces then + __tag_show_spacemark (head,glyph) + end + elseif glyph.next and (glyph.next.id==KERN) and not inside_math then + local kern = glyph.next + if kern.next and (kern.next.id== GLUE) and (kern.next.width >0) + then + nodesetattribute(kern.next,iwspaceattributeid,1) + nodesetattribute(kern.next,iwfontattributeid,glyph.font) + end + end + -- look also back + if glyph.prev and (glyph.prev.id == GLUE) + and not inside_math + and (glyph.prev.width >0) + and not nodehasattribute(glyph.prev,iwspaceattributeid) + then + nodesetattribute(glyph.prev,iwspaceattributeid,1) + nodesetattribute(glyph.prev,iwfontattributeid,glyph.font) + -- for debugging + if ltx.__tag.trace.showspaces then + __tag_show_spacemark (head,glyph) + end + end + elseif id == PENALTY then + local glyph = n + -- ltx.__tag.trace.log ("PENALTY ".. n.subtype.."VALUE"..n.penalty,3) + if glyph.next and (glyph.next.id == GLUE) + and not inside_math and (glyph.next.width >0) and n.subtype==0 + then + nodesetattribute(glyph.next,iwspaceattributeid,1) + -- nodesetattribute(glyph.next,iwfontattributeid,glyph.font) + -- for debugging + if ltx.__tag.trace.showspaces then + __tag_show_spacemark (head,glyph) + end + end + elseif id == MATH then + inside_math = (n.subtype == 0) + end + end + return head +end +local function __tag_activate_mark_space () + if not luatexbase.in_callback ("pre_linebreak_filter","markspaces") then + luatexbase.add_to_callback("pre_linebreak_filter",__tag_mark_spaces,"markspaces") + luatexbase.add_to_callback("hpack_filter",__tag_mark_spaces,"markspaces") + end +end + +ltx.__tag.func.markspaceon=__tag_activate_mark_space + +local function __tag_deactivate_mark_space () + if luatexbase.in_callback ("pre_linebreak_filter","markspaces") then + luatexbase.remove_from_callback("pre_linebreak_filter","markspaces") + luatexbase.remove_from_callback("hpack_filter","markspaces") + end +end + +ltx.__tag.func.markspaceoff=__tag_deactivate_mark_space +local default_space_char = node.new(GLYPH) +local default_fontid = font.id("TU/lmr/m/n/10") +default_space_char.char = 32 +default_space_char.font = default_fontid +local function __tag_space_chars_shipout (box) + local head = box.head + if head then + for n in node.traverse(head) do + local spaceattr = nodegetattribute(n,iwspaceattributeid) or -1 + if n.id == HLIST then -- enter the hlist + __tag_space_chars_shipout (n) + elseif n.id == VLIST then -- enter the vlist + __tag_space_chars_shipout (n) + elseif n.id == GLUE then + if ltx.__tag.trace.showspaces and spaceattr==1 then + __tag_show_spacemark (head,n,"0 1 0") + end + if spaceattr==1 then + local space + local space_char = node.copy(default_space_char) + local curfont = nodegetattribute(n,iwfontattributeid) + ltx.__tag.trace.log ("INFO SPACE-FUNCTION-FONT: ".. tostring(curfont),3) + if curfont and luaotfload.aux.slot_of_name(curfont,"space") then + space_char.font=curfont + end + head, space = node.insert_before(head, n, space_char) -- + n.width = n.width - space.width + space.attr = n.attr + end + end + end + end +end + +function ltx.__tag.func.space_chars_shipout (box) + __tag_space_chars_shipout (box) +end +function ltx.__tag.func.mc_insert_kids (mcnum,single) + if ltx.__tag.mc[mcnum] then + ltx.__tag.trace.log("INFO TEX-MC-INSERT-KID-TEST: " .. mcnum,4) + if ltx.__tag.mc[mcnum]["kids"] then + if #ltx.__tag.mc[mcnum]["kids"] > 1 and single==1 then + tex.sprint("[") + end + for i,kidstable in ipairs( ltx.__tag.mc[mcnum]["kids"] ) do + local kidnum = kidstable["kid"] + local kidpage = kidstable["page"] + local kidpageobjnum = pdfpageref(kidpage) + ltx.__tag.trace.log("INFO TEX-MC-INSERT-KID: " .. mcnum .. + " insert KID " ..i.. + " with num " .. kidnum .. + " on page " .. kidpage.."/"..kidpageobjnum,3) + tex.sprint(catlatex,"<> " ) + end + if #ltx.__tag.mc[mcnum]["kids"] > 1 and single==1 then + tex.sprint("]") + end + else + -- this is typically not a problem, e.g. empty hbox in footer/header can + -- trigger this warning. + ltx.__tag.trace.log("WARN TEX-MC-INSERT-NO-KIDS: "..mcnum.." has no kids",2) + if single==1 then + tex.sprint("null") + end + end + else + ltx.__tag.trace.log("WARN TEX-MC-INSERT-MISSING: "..mcnum.." doesn't exist",0) + end +end +function ltx.__tag.func.store_struct_mcabs (structnum,mcnum) + ltx.__tag.struct[structnum]=ltx.__tag.struct[structnum] or { } + ltx.__tag.struct[structnum]["mc"]=ltx.__tag.struct[structnum]["mc"] or { } + -- a structure can contain more than on mc chunk, the content should be ordered + tableinsert(ltx.__tag.struct[structnum]["mc"],mcnum) + ltx.__tag.trace.log("INFO TEX-MC-INTO-STRUCT: ".. + mcnum.." inserted in struct "..structnum,3) + -- but every mc can only be in one structure + ltx.__tag.mc[mcnum]= ltx.__tag.mc[mcnum] or { } + ltx.__tag.mc[mcnum]["parent"] = structnum +end + +-- pay attention: lua counts arrays from 1, tex pages from one +-- mcid and arrays in pdf count from 0. +function ltx.__tag.func.store_mc_in_page (mcnum,mcpagecnt,page) + ltx.__tag.page[page] = ltx.__tag.page[page] or {} + ltx.__tag.page[page][mcpagecnt] = mcnum + ltx.__tag.trace.log("INFO TAG-MC-INTO-PAGE: page " .. page .. + ": inserting MCID " .. mcpagecnt .. " => " .. mcnum,3) +end +--[[ + Now follows the core function + It wades through the shipout box and checks the attributes + ARGUMENTS + box: is a box, + mcpagecnt: num, the current page cnt of mc (should start at -1 in shipout box), needed for recursion + mccntprev: num, the attribute cnt of the previous node/whatever - if different we have a chunk border + mcopen: num, records if some bdc/emc is open + These arguments are only needed for log messages, if not present are replaces by fix strings: + name: string to describe the box + mctypeprev: num, the type attribute of the previous node/whatever + + there are lots of logging messages currently. Should be cleaned up in due course. + One should also find ways to make the function shorter. +--]] + +function ltx.__tag.func.mark_page_elements (box,mcpagecnt,mccntprev,mcopen,name,mctypeprev) + local name = name or ("SOMEBOX") + local mctypeprev = mctypeprev or -1 + local abspage = status.total_pages + 1 -- the real counter is increased + -- inside the box so one off + -- if the callback is not used. (???) + ltx.__tag.trace.log ("INFO TAG-ABSPAGE: " .. abspage,3) + ltx.__tag.trace.log ("INFO TAG-ARGS: pagecnt".. mcpagecnt.. + " prev "..mccntprev .. + " type prev "..mctypeprev,4) + ltx.__tag.trace.log ("INFO TAG-TRAVERSING-BOX: ".. tostring(name).. + " TYPE ".. node.type(node.getid(box)),3) + local head = box.head -- ShipoutBox is a vlist? + if head then + mccnthead, mctypehead,taghead = __tag_get_mc_cnt_type_tag (head) + ltx.__tag.trace.log ("INFO TAG-HEAD: " .. + node.type(node.getid(head)).. + " MC"..tostring(mccnthead).. + " => TAG " .. tostring(mctypehead).. + " => ".. tostring(taghead),3) + else + ltx.__tag.trace.log ("INFO TAG-NO-HEAD: head is ".. + tostring(head),3) + end + for n in node.traverse(head) do + local mccnt, mctype, tag = __tag_get_mc_cnt_type_tag (n) + local spaceattr = nodegetattribute(n,iwspaceattributeid) or -1 + ltx.__tag.trace.log ("INFO TAG-NODE: ".. + node.type(node.getid(n)).. + " MC".. tostring(mccnt).. + " => TAG ".. tostring(mctype).. + " => " .. tostring(tag),3) + if n.id == HLIST + then -- enter the hlist + mcopen,mcpagecnt,mccntprev,mctypeprev= + ltx.__tag.func.mark_page_elements (n,mcpagecnt,mccntprev,mcopen,"INTERNAL HLIST",mctypeprev) + elseif n.id == VLIST then -- enter the vlist + mcopen,mcpagecnt,mccntprev,mctypeprev= + ltx.__tag.func.mark_page_elements (n,mcpagecnt,mccntprev,mcopen,"INTERNAL VLIST",mctypeprev) + elseif n.id == GLUE then -- at glue real space chars are inserted, but this has + -- been done if the previous shipout wandering, so here it is ignored + elseif n.id == LOCAL_PAR then -- local_par is ignored + elseif n.id == PENALTY then -- penalty is ignored + elseif n.id == KERN then -- kern is ignored + ltx.__tag.trace.log ("INFO TAG-KERN-SUBTYPE: ".. + node.type(node.getid(n)).." "..n.subtype,4) + else + -- math is currently only logged. + -- we could mark the whole as math + -- for inner processing the mlist_to_hlist callback is probably needed. + if n.id == MATH then + ltx.__tag.trace.log("INFO TAG-MATH-SUBTYPE: ".. + node.type(node.getid(n)).." "..__tag_get_mathsubtype(n),4) + end + -- endmath + ltx.__tag.trace.log("INFO TAG-MC-COMPARE: current ".. + mccnt.." prev "..mccntprev,4) + if mccnt~=mccntprev then -- a new mc chunk + ltx.__tag.trace.log ("INFO TAG-NEW-MC-NODE: ".. + node.type(node.getid(n)).. + " MC"..tostring(mccnt).. + " <=> PREVIOUS "..tostring(mccntprev),4) + if mcopen~=0 then -- there is a chunk open, close it (hope there is only one ... + box.list=__tag_insert_emc_node (box.list,n) + mcopen = mcopen - 1 + ltx.__tag.trace.log ("INFO TAG-INSERT-EMC: " .. + mcpagecnt .. " MCOPEN = " .. mcopen,3) + if mcopen ~=0 then + ltx.__tag.trace.log ("WARN TAG-OPEN-MC: " .. mcopen,1) + end + end + if ltx.__tag.mc[mccnt] then + if ltx.__tag.mc[mccnt]["artifact"] then + ltx.__tag.trace.log("INFO TAG-INSERT-ARTIFACT: ".. + tostring(ltx.__tag.mc[mccnt]["artifact"]),3) + if ltx.__tag.mc[mccnt]["artifact"] == "" then + box.list = __tag_insert_bmc_node (box.list,n,"Artifact") + else + box.list = __tag_insert_bdc_node (box.list,n,"Artifact", "/Type /"..ltx.__tag.mc[mccnt]["artifact"]) + end + else + ltx.__tag.trace.log("INFO TAG-INSERT-TAG: ".. + tostring(tag),3) + mcpagecnt = mcpagecnt +1 + ltx.__tag.trace.log ("INFO TAG-INSERT-BDC: "..mcpagecnt,3) + local dict= "/MCID "..mcpagecnt + if ltx.__tag.mc[mccnt]["raw"] then + ltx.__tag.trace.log("INFO TAG-USE-RAW: ".. + tostring(ltx.__tag.mc[mccnt]["raw"]),3) + dict= dict .. " " .. ltx.__tag.mc[mccnt]["raw"] + end + if ltx.__tag.mc[mccnt]["alt"] then + ltx.__tag.trace.log("INFO TAG-USE-ALT: ".. + tostring(ltx.__tag.mc[mccnt]["alt"]),3) + dict= dict .. " " .. ltx.__tag.mc[mccnt]["alt"] + end + if ltx.__tag.mc[mccnt]["actualtext"] then + ltx.__tag.trace.log("INFO TAG-USE-ACTUALTEXT: ".. + tostring(ltx.__tag.mc[mccnt]["actualtext"]),3) + dict= dict .. " " .. ltx.__tag.mc[mccnt]["actualtext"] + end + box.list = __tag_insert_bdc_node (box.list,n,tag, dict) + ltx.__tag.func.store_mc_kid (mccnt,mcpagecnt,abspage) + ltx.__tag.func.store_mc_in_page(mccnt,mcpagecnt,abspage) + ltx.__tag.trace.show_mc_data (mccnt,3) + end + mcopen = mcopen + 1 + else + if tagunmarkedbool.mode == truebool.mode then + ltx.__tag.trace.log("INFO TAG-NOT-TAGGED: this has not been tagged, using artifact",2) + box.list = __tag_insert_bmc_node (box.list,n,"Artifact") + mcopen = mcopen + 1 + else + ltx.__tag.trace.log("WARN TAG-NOT-TAGGED: this has not been tagged",1) + end + end + mccntprev = mccnt + end + end -- end if + end -- end for + if head then + mccnthead, mctypehead,taghead = __tag_get_mc_cnt_type_tag (head) + ltx.__tag.trace.log ("INFO TAG-ENDHEAD: " .. + node.type(node.getid(head)).. + " MC"..tostring(mccnthead).. + " => TAG "..tostring(mctypehead).. + " => "..tostring(taghead),4) + else + ltx.__tag.trace.log ("INFO TAG-ENDHEAD: ".. tostring(head),4) + end + ltx.__tag.trace.log ("INFO TAG-QUITTING-BOX ".. + tostring(name).. + " TYPE ".. node.type(node.getid(box)),4) + return mcopen,mcpagecnt,mccntprev,mctypeprev +end + +function ltx.__tag.func.mark_shipout (box) + mcopen = ltx.__tag.func.mark_page_elements (box,-1,-100,0,"Shipout",-1) + if mcopen~=0 then -- there is a chunk open, close it (hope there is only one ... + local emcnode = nodenew("whatsit","pdf_literal") + local list = box.list + emcnode.data = "EMC" + emcnode.mode=1 + if list then + list = node.insert_after (list,node.tail(list),emcnode) + mcopen = mcopen - 1 + ltx.__tag.trace.log ("INFO SHIPOUT-INSERT-LAST-EMC: MCOPEN " .. mcopen,3) + else + ltx.__tag.trace.log ("WARN SHIPOUT-UPS: this shouldn't happen",0) + end + if mcopen ~=0 then + ltx.__tag.trace.log ("WARN SHIPOUT-MC-OPEN: " .. mcopen,1) + end + end +end + +function ltx.__tag.func.fill_parent_tree_line (page) + -- we need to get page-> i=kid -> mcnum -> structnum + -- pay attention: the kid numbers and the page number in the parent tree start with 0! + local numsentry ="" + local pdfpage = page-1 + if ltx.__tag.page[page] and ltx.__tag.page[page][0] then + mcchunks=#ltx.__tag.page[page] + ltx.__tag.trace.log("INFO PARENTTREE-NUM: page ".. + page.." has "..mcchunks.."+1 Elements ",4) + for i=0,mcchunks do + -- what does this log?? + ltx.__tag.trace.log("INFO PARENTTREE-CHUNKS: ".. + ltx.__tag.page[page][i],4) + end + if mcchunks == 0 then + -- only one chunk so no need for an array + local mcnum = ltx.__tag.page[page][0] + local structnum = ltx.__tag.mc[mcnum]["parent"] + local propname = "g__tag_struct_"..structnum.."_prop" + --local objref = ltx.__tag.tables[propname]["objref"] or "XXXX" + local objref = __tag_pdf_object_ref('__tag/struct/'..structnum) + ltx.__tag.trace.log("INFO PARENTTREE-STRUCT-OBJREF: =====>".. + tostring(objref),5) + numsentry = pdfpage .. " [".. objref .. "]" + ltx.__tag.trace.log("INFO PARENTTREE-NUMENTRY: page " .. + page.. " num entry = ".. numsentry,3) + else + numsentry = pdfpage .. " [" + for i=0,mcchunks do + local mcnum = ltx.__tag.page[page][i] + local structnum = ltx.__tag.mc[mcnum]["parent"] or 0 + local propname = "g__tag_struct_"..structnum.."_prop" + --local objref = ltx.__tag.tables[propname]["objref"] or "XXXX" + local objref = __tag_pdf_object_ref('__tag/struct/'..structnum) + numsentry = numsentry .. " ".. objref + end + numsentry = numsentry .. "] " + ltx.__tag.trace.log("INFO PARENTTREE-NUMENTRY: page " .. + page.. " num entry = ".. numsentry,3) + end + else + ltx.__tag.trace.log ("INFO PARENTTREE-NO-DATA: page "..page,3) + end + return numsentry +end + +function ltx.__tag.func.output_parenttree (abspage) + for i=1,abspage do + line = ltx.__tag.func.fill_parent_tree_line (i) .. "^^J" + tex.sprint(catlatex,line) + end +end +-- +-- End of File `tagpdf.lua'. diff --git a/texmf/tex/latex/tagpdf/tagpdf.sty b/texmf/tex/latex/tagpdf/tagpdf.sty new file mode 100644 index 000000000..064ce5998 --- /dev/null +++ b/texmf/tex/latex/tagpdf/tagpdf.sty @@ -0,0 +1,2701 @@ +%% +%% This is file `tagpdf.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% tagpdf.dtx (with options: `package') +%% tagpdf-checks.dtx (with options: `package') +%% tagpdf-mc-shared.dtx (with options: `shared') +%% tagpdf.dtx (with options: `mcloading') +%% tagpdf-tree.dtx (with options: `package') +%% tagpdf-roles.dtx (with options: `package') +%% tagpdf-struct.dtx (with options: `package') +%% tagpdf-space.dtx (with options: `package') +%% tagpdf-user.dtx (with options: `package') +%% +%% Copyright (C) 2019-2022 Ulrike Fischer +%% +%% It may be distributed and/or modified under the conditions of +%% the LaTeX Project Public License (LPPL), either version 1.3c of +%% this license or (at your option) any later version. The latest +%% version of this license is in the file: +%% +%% https://www.latex-project.org/lppl.txt +%% +%% This file is part of the "tagpdf bundle" (The Work in LPPL) +%% and all files in that bundle must be distributed together. +%% +%% File: tagpdf.dtx +\ProvidesExplPackage {tagpdf} {2022-08-24} {0.97} + { A package to experiment with pdf tagging } + +\bool_if:nF + { + \bool_lazy_and_p:nn + {\cs_if_exist_p:N \pdfmanagement_if_active_p:} + { \pdfmanagement_if_active_p: } + } + { %error for now, perhaps warning later. + \PackageError{tagpdf} + { + PDF~resource~management~is~no~active!\MessageBreak + tagpdf~will~no~work. + } + { + Activate~it~with \MessageBreak + \string\RequirePackage{pdfmanagement-testphase}\MessageBreak + \string\DocumentMetadata{}\MessageBreak + before~\string\documentclass + } + } +\prop_gput:Nnn \g_msg_module_name_prop { tag }{ tagpdf } +\bool_new:N\g__tag_mode_lua_bool +\DeclareOption {luamode} { \sys_if_engine_luatex:T { \bool_gset_true:N \g__tag_mode_lua_bool } } +\DeclareOption {genericmode}{ \bool_gset_false:N\g__tag_mode_lua_bool } +\ExecuteOptions{luamode} +\ProcessOptions +\RequirePackage{l3ref-tmp} +\RequirePackage{tagpdf-base} +\cs_if_free:NT \pdf_object_write:nnn + { + \cs_new_protected:Npn \pdf_object_new:n #1 + { \pdf_object_new:nn{#1}{dict} } + \cs_new_protected:Npn \pdf_object_write:nnn #1#2#3 + { + \pdf_object_write:nn {#1}{#3} + } + \str_if_eq:VnT \c_sys_backend_str {pdftex} + { + \cs_set_protected:Npn \pdf_object_write:nnn #1#2#3 + { + \tex_immediate:D \tex_pdfobj:D + useobjnum ~ + \int_use:c + { c__pdf_backend_object_ \tl_to_str:n {#1} _int } + { << ~ \exp_not:n {#3} ~ >> } + } + } + \str_if_eq:VnT \c_sys_backend_str {luatex} + { + \cs_set_protected:Npn \pdf_object_write:nnn #1#2#3 + { + \tex_immediate:D \tex_pdfextension:D obj ~ + useobjnum ~ + \int_use:c + { c__pdf_backend_object_ \tl_to_str:n {#1} _int } + { << ~ \exp_not:n {#3} ~ >> } + } + } + } +\cs_generate_variant:Nn \pdf_object_write:nnn {nnx} + + + \cs_new_protected:Npn \__tag_lastpagelabel: + { + \legacy_if:nT { @filesw } + { + \exp_args:NNnx \exp_args:NNx\iow_now:Nn \@auxout + { + \token_to_str:N \newlabeldata + {__tag_LastPage} + { + {abspage} { \int_use:N \g_shipout_readonly_int} + {tagmcabs}{ \int_use:N \c@g__tag_MCID_abs_int } + } + } + } + } + + \AddToHook{enddocument/afterlastpage} + {\__tag_lastpagelabel:} + \cs_if_exist:NF \ref_value:nnn + { + \cs_new:Npn \ref_value:nnn #1#2#3 + { + \exp_args:Nee + \__ref_value:nnn + { \tl_to_str:n {#1} } { \tl_to_str:n {#2} } {#3} + } + \cs_new:Npn \__ref_value:nnn #1#2#3 + { + \tl_if_exist:cTF { g__ref_label_ #1 _ #2 _tl } + { \tl_use:c { g__ref_label_ #1 _ #2 _tl } } + { + #3 + } + } + } + +\tl_new:N \l__tag_tmpa_tl +\tl_new:N \l__tag_tmpb_tl +\str_new:N \l__tag_tmpa_str +\prop_new:N \l__tag_tmpa_prop +\seq_new:N \l__tag_tmpa_seq +\seq_new:N \l__tag_tmpb_seq +\clist_new:N \l__tag_tmpa_clist +\int_new:N \l__tag_tmpa_int +\box_new:N \l__tag_tmpa_box +\box_new:N \l__tag_tmpb_box +\clist_const:Nn \c__tag_refmc_clist {tagabspage,tagmcabs,tagmcid} +\clist_const:Nn \c__tag_refstruct_clist {tagstruct,tagstructobj} +\int_new:N \l__tag_loglevel_int +\bool_new:N \g__tag_active_space_bool +\bool_new:N \g__tag_active_mc_bool +\bool_new:N \g__tag_active_tree_bool +\bool_new:N \g__tag_active_struct_bool +\bool_new:N \g__tag_active_struct_dest_bool +\bool_gset_true:N \g__tag_active_struct_dest_bool + +\bool_new:N \l__tag_active_mc_bool +\bool_set_true:N \l__tag_active_mc_bool +\bool_new:N \l__tag_active_struct_bool +\bool_set_true:N \l__tag_active_struct_bool +\bool_new:N \g__tag_tagunmarked_bool +\prg_generate_conditional_variant:Nnn \pdf_object_if_exist:n {e}{T,F} +\cs_generate_variant:Nn \pdf_object_ref:n {e} +\cs_generate_variant:Nn \pdfannot_dict_put:nnn {nnx} +\cs_generate_variant:Nn \pdffile_embed_stream:nnn {nxx,oxx} +\cs_generate_variant:Nn \prop_gput:Nnn {Nxx,Nen} +\cs_generate_variant:Nn \prop_put:Nnn {Nxx} +\cs_generate_variant:Nn \prop_item:Nn {No} +\cs_generate_variant:Nn \ref_label:nn { nv } +\cs_generate_variant:Nn \seq_set_split:Nnn{Nne} +\cs_generate_variant:Nn \str_set_convert:Nnnn {Nonn, Noon, Nnon } +\cs_generate_variant:Nn \clist_map_inline:nn {on} + +\ref_attribute_gset:nnnn { tagstruct } {0} { now } + { \int_use:N \c@g__tag_struct_abs_int } +\ref_attribute_gset:nnnn { tagstructobj } {} { now } + { + \pdf_object_if_exist:eT {__tag/struct/\int_use:N \c@g__tag_struct_abs_int} + { + \pdf_object_ref:e{__tag/struct/\int_use:N \c@g__tag_struct_abs_int} + } + } +\ref_attribute_gset:nnnn { tagabspage } {0} { shipout } + { \int_use:N \g_shipout_readonly_int } +\ref_attribute_gset:nnnn { tagmcabs } {0} { now } + { \int_use:N \c@g__tag_MCID_abs_int } +\ref_attribute_gset:nnnn {tagmcid } {0} { now } + { \int_use:N \g__tag_MCID_tmp_bypage_int } +\cs_new_protected:Npn \__tag_ref_label:nn #1 #2 %#1 label, #2 name of list mc or struct + { + \@bsphack + \ref_label:nv {#1}{c__tag_ref#2_clist} + \@esphack + } +\cs_generate_variant:Nn \__tag_ref_label:nn {en} +\cs_new:Npn \__tag_ref_value:nnn #1 #2 #3 %#1 label, #2 attribute, #3 default + { + \ref_value:nnn {#1}{#2}{#3} + } +\cs_generate_variant:Nn \__tag_ref_value:nnn {enn} +\cs_new:Npn \__tag_ref_value_lastpage:nn #1 #2 + { + \ref_value:nnn {__tag_LastPage}{#1}{#2} + } + +\cs_set_eq:NN \__tag_prop_new:N \prop_new:N +\cs_set_eq:NN \__tag_seq_new:N \seq_new:N +\cs_set_eq:NN \__tag_prop_gput:Nnn \prop_gput:Nnn +\cs_set_eq:NN \__tag_seq_gput_right:Nn \seq_gput_right:Nn +\cs_set_eq:NN \__tag_seq_item:cn \seq_item:cn +\cs_set_eq:NN \__tag_prop_item:cn \prop_item:cn +\cs_set_eq:NN \__tag_seq_show:N \seq_show:N +\cs_set_eq:NN \__tag_prop_show:N \prop_show:N + +\cs_generate_variant:Nn \__tag_prop_gput:Nnn { Nxn , Nxx, Nnx , cnn, cxn, cnx, cno} +\cs_generate_variant:Nn \__tag_seq_gput_right:Nn { Nx , No, cn, cx } +\cs_generate_variant:Nn \__tag_prop_new:N { c } +\cs_generate_variant:Nn \__tag_seq_new:N { c } +\cs_generate_variant:Nn \__tag_seq_show:N { c } +\cs_generate_variant:Nn \__tag_prop_show:N { c } +\cs_new_protected:Npn \tag_stop_group_begin: + { + \group_begin: + \bool_set_false:N \l__tag_active_struct_bool + \bool_set_false:N \l__tag_active_mc_bool + } +\cs_set_eq:NN \tag_stop_group_end: \group_end: +\cs_set_protected:Npn \tag_stop: + { + \bool_set_false:N \l__tag_active_struct_bool + \bool_set_false:N \l__tag_active_mc_bool + } +\cs_set_protected:Npn \tag_start: + { + \bool_set_true:N \l__tag_active_struct_bool + \bool_set_true:N \l__tag_active_mc_bool + } +\prop_new:N\g__tag_state_prop +\cs_set_protected:Npn \tag_stop:n #1 + { + \tag_if_active:TF + { + \bool_set_false:N \l__tag_active_struct_bool + \bool_set_false:N \l__tag_active_mc_bool + \prop_gput:Nnn \g__tag_state_prop { #1 }{ 1 } + } + { + \prop_gremove:Nn \g__tag_state_prop { #1 } + } + } +\cs_set_protected:Npn \tag_start:n #1 + { + \prop_gpop:NnN \g__tag_state_prop {#1}\l__tag_tmpa_tl + \quark_if_no_value:NF \l__tag_tmpa_tl + { + \bool_set_true:N \l__tag_active_struct_bool + \bool_set_true:N \l__tag_active_mc_bool + } + } + +\keys_define:nn { __tag / setup } + { + activate-space .bool_gset:N = \g__tag_active_space_bool, + activate-mc .bool_gset:N = \g__tag_active_mc_bool, + activate-tree .bool_gset:N = \g__tag_active_tree_bool, + activate-struct .bool_gset:N = \g__tag_active_struct_bool, + activate-all .meta:n = + {activate-mc={#1},activate-tree={#1},activate-struct={#1}}, + activate-all .default:n = true, + no-struct-dest .bool_gset_inverse:N = \g__tag_active_struct_dest_bool, + % \end{macrocode} + log .choice:, + log / none .code:n = {\int_set:Nn \l__tag_loglevel_int { 0 }}, + log / v .code:n = + { + \int_set:Nn \l__tag_loglevel_int { 1 } + \cs_set_protected:Nn \__tag_check_typeout_v:n { \iow_term:x {##1} } + }, + log / vv .code:n = {\int_set:Nn \l__tag_loglevel_int { 2 }}, + log / vvv .code:n = {\int_set:Nn \l__tag_loglevel_int { 3 }}, + log / all .code:n = {\int_set:Nn \l__tag_loglevel_int { 10 }}, + tagunmarked .bool_gset:N = \g__tag_tagunmarked_bool, + tagunmarked .initial:n = true, + tabsorder .choice:, + tabsorder / row .code:n = + \pdfmanagement_add:nnn { Page } {Tabs}{/R}, + tabsorder / column .code:n = + \pdfmanagement_add:nnn { Page } {Tabs}{/C}, + tabsorder / structure .code:n = + \pdfmanagement_add:nnn { Page } {Tabs}{/S}, + tabsorder / none .code:n = + \pdfmanagement_remove:nn {Page} {Tabs}, + tabsorder .initial:n = structure, + uncompress .code:n = { \pdf_uncompress: }, + } +\sys_if_engine_luatex:T + { + \file_input:n {tagpdf-luatex.def} + } +%% File: tagpdf-checks.dtx +\msg_new:nnn { tag } {mc-nested} { nested~marked~content~found~-~mcid~#1 } +\msg_new:nnn { tag } {mc-tag-missing} { required~tag~missing~-~mcid~#1 } +\msg_new:nnn { tag } {mc-label-unknown} + { label~#1~unknown~or~has~been~already~used.\\ + Either~rerun~or~remove~one~of~the~uses. } +\msg_new:nnn { tag } {mc-used-twice} { mc~#1~has~been~already~used } +\msg_new:nnn { tag } {mc-not-open} { there~is~no~mc~to~end~at~#1 } +\msg_new:nnn { tag } {mc-pushed} { #1~has~been~pushed~to~the~mc~stack} +\msg_new:nnn { tag } {mc-popped} { #1~has~been~removed~from~the~mc~stack } +\msg_new:nnn { tag } {mc-current} + { current~MC:~ + \bool_if:NTF\g__tag_in_mc_bool + {abscnt=\__tag_get_mc_abs_cnt:,~tag=\g__tag_mc_key_tag_tl} + {no~MC~open,~current~abscnt=\__tag_get_mc_abs_cnt:"} + } +\msg_new:nnn { tag } {struct-unknown} + { structure~with~number~#1~doesn't~exist\\ #2 } +\msg_new:nnn { tag } {struct-no-objnum} { objnum~missing~for~structure~#1 } +\msg_new:nnn { tag } + {struct-faulty-nesting} + { there~is~no~open~structure~on~the~stack } +\msg_new:nnn { tag } {struct-missing-tag} { a~structure~must~have~a~tag! } +\msg_new:nnn { tag } {struct-used-twice} + { structure~with~label~#1~has~already~been~used} +\msg_new:nnn { tag } {struct-label-unknown} + { structure~with~label~#1~is~unknown~rerun} +\msg_new:nnn { tag } {struct-show-closing} + { closing~structure~#1~tagged~\prop_item:cn{g__tag_struct_#1_prop}{S} } +\msg_new:nnn { tag } {attr-unknown} { attribute~#1~is~unknown} +\msg_new:nnn { tag } {role-missing} { tag~#1~has~no~role~assigned } +\msg_new:nnn { tag } {role-unknown} { role~#1~is~not~known } +\msg_new:nnn { tag } {role-unknown-tag} { tag~#1~is~not~known } +\msg_new:nnn { tag } {role-tag} { mapping~tag~#1~to~role~#2 } +\msg_new:nnn { tag } {new-tag} { adding~new~tag~#1 } +\msg_new:nnn { tag } {tree-mcid-index-wrong} + {something~is~wrong~with~the~mcid--rerun} +\msg_new:nnn { tag } {sys-no-interwordspace} + {engine/output~mode~#1~doesn't~support~the~interword~spaces} +\cs_set_eq:NN \__tag_check_typeout_v:n \use_none:n +\msg_new:nnnn { tag } {para-hook-count-wrong} + {The~number~of~automatic~begin~(#1)~and~end~(#2)~para~hooks~differ!} + {This~quite~probably~a~coding~error~and~the~structure~will~be~wrong!} +\prg_set_conditional:Npnn \tag_if_active: { p , T , TF, F } + { + \bool_lazy_all:nTF + { + {\g__tag_active_struct_bool} + {\g__tag_active_mc_bool} + {\g__tag_active_tree_bool} + {\l__tag_active_struct_bool} + {\l__tag_active_mc_bool} + } + { + \prg_return_true: + } + { + \prg_return_false: + } + } +\prg_new_conditional:Npnn \__tag_check_if_active_mc: {T,F,TF} + { + \bool_lazy_and:nnTF { \g__tag_active_mc_bool } { \l__tag_active_mc_bool } + { + \prg_return_true: + } + { + \prg_return_false: + } + } +\prg_new_conditional:Npnn \__tag_check_if_active_struct: {T,F,TF} + { + \bool_lazy_and:nnTF { \g__tag_active_struct_bool } { \l__tag_active_struct_bool } + { + \prg_return_true: + } + { + \prg_return_false: + } + } +\cs_new_protected:Npn \__tag_check_structure_has_tag:n #1 %#1 struct num + { + \prop_if_in:cnF { g__tag_struct_#1_prop } + {S} + { + \msg_error:nn { tag } {struct-missing-tag} + } + } +\cs_new_protected:Npn \__tag_check_structure_tag:N #1 + { + \prop_if_in:NoF \g__tag_role_tags_prop {#1} + { + \msg_warning:nnx { tag } {role-unknown-tag} {#1} + } + } +\cs_new_protected:Npn \__tag_check_info_closing_struct:n #1 %#1 struct num + { + \int_compare:nNnT {\l__tag_loglevel_int} > { 0 } + { + \msg_info:nnn { tag } {struct-show-closing} {#1} + } + } + +\cs_generate_variant:Nn \__tag_check_info_closing_struct:n {o,x} +\cs_new_protected:Npn \__tag_check_no_open_struct: + { + \msg_error:nn { tag } {struct-faulty-nesting} + } +\cs_new_protected:Npn \__tag_check_struct_used:n #1 %#1 label + { + \prop_get:cnNT + {g__tag_struct_\__tag_ref_value:enn{tagpdfstruct-#1}{tagstruct}{unknown}_prop} + {P} + \l_tmpa_tl + { + \msg_warning:nnn { tag } {struct-used-twice} {#1} + } + } +\cs_new_protected:Npn \__tag_check_add_tag_role:nn #1 #2 %#1 tag, #2 role + { + \tl_if_empty:nTF {#2} + { + \msg_warning:nnn { tag } {role-missing} {#1} + } + { + \prop_get:NnNTF \g__tag_role_tags_prop {#2} \l_tmpa_tl + { + \int_compare:nNnT {\l__tag_loglevel_int} > { 0 } + { + \msg_info:nnnn { tag } {role-tag} {#1} {#2} + } + } + { + \msg_warning:nnn { tag } {role-unknown} {#2} + } + } + } +\cs_new_protected:Npn \__tag_check_mc_if_nested: + { + \__tag_mc_if_in:T + { + \msg_warning:nnx { tag } {mc-nested} { \__tag_get_mc_abs_cnt: } + } + } + +\cs_new_protected:Npn \__tag_check_mc_if_open: + { + \__tag_mc_if_in:F + { + \msg_warning:nnx { tag } {mc-not-open} { \__tag_get_mc_abs_cnt: } + } + } +\cs_new_protected:Npn \__tag_check_mc_pushed_popped:nn #1 #2 + { + \int_compare:nNnT + { \l__tag_loglevel_int } ={ 2 } + { \msg_info:nnx {tag}{mc-#1}{#2} } + \int_compare:nNnT + { \l__tag_loglevel_int } > { 2 } + { + \msg_info:nnx {tag}{mc-#1}{#2} + \seq_log:N \g__tag_mc_stack_seq + } + } +\cs_new_protected:Npn \__tag_check_mc_tag:N #1 %#1 is var with a tag name in it + { + \tl_if_empty:NT #1 + { + \msg_error:nnx { tag } {mc-tag-missing} { \__tag_get_mc_abs_cnt: } + } + \prop_if_in:NoF \g__tag_role_tags_NS_prop {#1} + { + \msg_warning:nnx { tag } {role-unknown-tag} {#1} + } + } +\cs_new_protected:Npn \__tag_check_init_mc_used: + { + \intarray_new:Nn \g__tag_check_mc_used_intarray { 65536 } + \cs_gset_eq:NN \__tag_check_init_mc_used: \prg_do_nothing: + } +\cs_new_protected:Npn \__tag_check_mc_used:n #1 %#1 mcid abscnt + { + \int_compare:nNnT {\l__tag_loglevel_int} > { 2 } + { + \__tag_check_init_mc_used: + \intarray_gset:Nnn \g__tag_check_mc_used_intarray + {#1} + { \intarray_item:Nn \g__tag_check_mc_used_intarray {#1} + 1 } + \int_compare:nNnT + { + \intarray_item:Nn \g__tag_check_mc_used_intarray {#1} + } + > + { 1 } + { + \msg_warning:nnn { tag } {mc-used-twice} {#1} + } + } + } +\cs_new_protected:Npn \__tag_check_show_MCID_by_page: + { + \tl_set:Nx \l__tag_tmpa_tl + { + \__tag_ref_value_lastpage:nn + {abspage} + {-1} + } + \int_step_inline:nnnn {1}{1} + { + \l__tag_tmpa_tl + } + { + \seq_clear:N \l_tmpa_seq + \int_step_inline:nnnn + {1} + {1} + { + \__tag_ref_value_lastpage:nn + {tagmcabs} + {-1} + } + { + \int_compare:nT + { + \__tag_ref_value:enn + {mcid-####1} + {tagabspage} + {-1} + = + ##1 + } + { + \seq_gput_right:Nx \l_tmpa_seq + { + Page##1-####1- + \__tag_ref_value:enn + {mcid-####1} + {tagmcid} + {-1} + } + } + } + \seq_show:N \l_tmpa_seq + } + } +\prg_new_conditional:Npnn \__tag_check_if_mc_in_galley: { T,F,TF } + { + \tl_if_eq:NNTF \l__tag_mc_firstmarks_seq \l__tag_mc_botmarks_seq + { \prg_return_false: } + { \prg_return_true: } + } + +\prg_new_conditional:Npnn \__tag_check_if_mc_tmb_missing: { T,F,TF } + { + \bool_if:nTF + { + \str_if_eq_p:ee {\seq_item:Nn \l__tag_mc_firstmarks_seq {1}}{e-} + || + \str_if_eq_p:ee {\seq_item:Nn \l__tag_mc_firstmarks_seq {1}}{b+} + } + { \prg_return_true: } + { \prg_return_false: } + } + +\prg_new_conditional:Npnn \__tag_check_if_mc_tme_missing: { T,F,TF } + { + \str_if_eq:eeTF {\seq_item:Nn \l__tag_mc_botmarks_seq {1}}{b+} + { \prg_return_true: } + { \prg_return_false: } + } +%% File: tagpdf-mc-shared.dtx + +\newcounter { g__tag_MCID_abs_int } +\cs_new:Npn \__tag_get_mc_abs_cnt: { \int_use:N \c@g__tag_MCID_abs_int } +\int_new:N \g__tag_MCID_tmp_bypage_int +\bool_new:N \g__tag_in_mc_bool +\__tag_prop_new:N \g__tag_mc_parenttree_prop +\seq_new:N \g__tag_mc_stack_seq +\tl_new:N \l__tag_mc_artifact_type_tl +\bool_new:N \l__tag_mc_key_stash_bool +\bool_new:N \l__tag_mc_artifact_bool +\tl_new:N \l__tag_mc_key_tag_tl +\tl_new:N \g__tag_mc_key_tag_tl +\tl_new:N \l__tag_mc_key_label_tl +\tl_new:N \l__tag_mc_key_properties_tl +\cs_new:Nn \__tag_mc_handle_mc_label:n + { + \__tag_ref_label:en{tagpdf-#1}{mc} + } +\cs_new_protected:Npn \__tag_mc_set_label_used:n #1 %#1 labelname + { + \tl_new:c { g__tag_mc_label_\tl_to_str:n{#1}_used_tl } + } +\cs_set_protected:Npn \tag_mc_use:n #1 %#1: label name + { + \__tag_check_if_active_struct:T + { + \tl_set:Nx \l__tag_tmpa_tl { \__tag_ref_value:nnn{tagpdf-#1}{tagmcabs}{} } + \tl_if_empty:NTF\l__tag_tmpa_tl + { + \msg_warning:nnn {tag} {mc-label-unknown} {#1} + } + { + \cs_if_free:cTF { g__tag_mc_label_\tl_to_str:n{#1}_used_tl } + { + \__tag_mc_handle_stash:x { \l__tag_tmpa_tl } + \__tag_mc_set_label_used:n {#1} + } + { + \msg_warning:nnn {tag}{mc-used-twice}{#1} + } + } + } + } +\cs_set_protected:Npn \tag_mc_artifact_group_begin:n #1 + { + \tag_mc_end_push: + \tag_mc_begin:n {artifact=#1} + \tag_stop_group_begin: + } + +\cs_set_protected:Npn \tag_mc_artifact_group_end: + { + \tag_stop_group_end: + \tag_mc_end: + \tag_mc_begin_pop:n{} + } +\cs_set_protected:Npn \tag_mc_end_push: + { + \__tag_check_if_active_mc:T + { + \__tag_mc_if_in:TF + { + \seq_gpush:Nx \g__tag_mc_stack_seq { \tag_get:n {mc_tag} } + \__tag_check_mc_pushed_popped:nn + { pushed } + { \tag_get:n {mc_tag} } + \tag_mc_end: + } + { + \seq_gpush:Nn \g__tag_mc_stack_seq {-1} + \__tag_check_mc_pushed_popped:nn { pushed }{-1} + } + } + } + +\cs_set_protected:Npn \tag_mc_begin_pop:n #1 + { + \__tag_check_if_active_mc:T + { + \seq_gpop:NNTF \g__tag_mc_stack_seq \l__tag_tmpa_tl + { + \tl_if_eq:NnTF \l__tag_tmpa_tl {-1} + { + \__tag_check_mc_pushed_popped:nn {popped}{-1} + } + { + \__tag_check_mc_pushed_popped:nn {popped}{\l__tag_tmpa_tl} + \tag_mc_begin:n {tag=\l__tag_tmpa_tl,#1} + } + } + { + \__tag_check_mc_pushed_popped:nn {popped}{empty~stack,~nothing} + } + } + } +\keys_define:nn { __tag / mc } + { + stash .bool_set:N = \l__tag_mc_key_stash_bool, + __artifact-bool .bool_set:N = \l__tag_mc_artifact_bool, + __artifact-type .choice:, + __artifact-type / pagination .code:n = + { + \tl_set:Nn \l__tag_mc_artifact_type_tl { Pagination } + }, + __artifact-type / pagination/header .code:n = + { + \tl_set:Nn \l__tag_mc_artifact_type_tl { Pagination/Subtype/Header } + }, + __artifact-type / pagination/footer .code:n = + { + \tl_set:Nn \l__tag_mc_artifact_type_tl { Pagination/Subtype/Footer } + }, + __artifact-type / layout .code:n = + { + \tl_set:Nn \l__tag_mc_artifact_type_tl { Layout } + }, + __artifact-type / page .code:n = + { + \tl_set:Nn \l__tag_mc_artifact_type_tl { Page } + }, + __artifact-type / background .code:n = + { + \tl_set:Nn \l__tag_mc_artifact_type_tl { Background } + }, + __artifact-type / notype .code:n = + { + \tl_set:Nn \l__tag_mc_artifact_type_tl {} + }, + __artifact-type / .code:n = + { + \tl_set:Nn \l__tag_mc_artifact_type_tl {} + }, + } +%% File: tagpdf.dtx + + +\bool_if:NTF \g__tag_mode_lua_bool + { + \RequirePackage {tagpdf-mc-code-lua} + } + { + \RequirePackage {tagpdf-mc-code-generic} % + } +%% File: tagpdf-tree.dtx +\hook_gput_code:nnn{begindocument}{tagpdf} + { + \bool_if:NT \g__tag_active_tree_bool + { + \sys_if_output_pdf:TF + { + \AddToHook{enddocument/end} { \__tag_finish_structure: } + } + { + \AddToHook{shipout/lastpage} { \__tag_finish_structure: } + } + } + } +\pdf_object_new:n { __tag/struct/0 } +\hook_gput_code:nnn{shipout/lastpage}{tagpdf} + { + \bool_if:NT \g__tag_active_tree_bool + { + \pdfmanagement_add:nnn { Catalog / MarkInfo } { Marked } { true } + \pdfmanagement_add:nnx + { Catalog } + { StructTreeRoot } + { \pdf_object_ref:n { __tag/struct/0 } } + } + } +\cs_new_protected:Npn \__tag_tree_write_structtreeroot: + { + \__tag_prop_gput:cnx + { g__tag_struct_0_prop } + { ParentTree } + { \pdf_object_ref:n { __tag/tree/parenttree } } + \__tag_prop_gput:cnx + { g__tag_struct_0_prop } + { RoleMap } + { \pdf_object_ref:n { __tag/tree/rolemap } } + \__tag_struct_write_obj:n { 0 } + } +\cs_new_protected:Npn \__tag_tree_write_structelements: + { + \int_step_inline:nnnn {1}{1}{\c@g__tag_struct_abs_int} + { + \__tag_struct_write_obj:n { ##1 } + } + } +\pdf_object_new:n { __tag/tree/parenttree } +\newcounter { g__tag_parenttree_obj_int } +\hook_gput_code:nnn{begindocument}{tagpdf} + { + \int_gset:Nn + \c@g__tag_parenttree_obj_int + { \__tag_ref_value_lastpage:nn{abspage}{100} } + } +\tl_new:N \g__tag_parenttree_objr_tl +\cs_new_protected:Npn \__tag_parenttree_add_objr:nn #1 #2 %#1 StructParent number, #2 objref + { + \tl_gput_right:Nx \g__tag_parenttree_objr_tl + { + #1 \c_space_tl #2 ^^J + } + } +\tl_new:N \l__tag_parenttree_content_tl + +\cs_new_protected:Npn \__tag_tree_fill_parenttree: + { + \int_step_inline:nnnn{1}{1}{\__tag_ref_value_lastpage:nn{abspage}{-1}} %not quite clear if labels are needed. See lua code + { %page ##1 + \prop_clear:N \l__tag_tmpa_prop + \int_step_inline:nnnn{1}{1}{\__tag_ref_value_lastpage:nn{tagmcabs}{-1}} + { + %mcid####1 + \int_compare:nT + {\__tag_ref_value:enn{mcid-####1}{tagabspage}{-1}=##1} %mcid is on current page + {% yes + \prop_put:Nxx + \l__tag_tmpa_prop + {\__tag_ref_value:enn{mcid-####1}{tagmcid}{-1}} + {\prop_item:Nn \g__tag_mc_parenttree_prop {####1}} + } + } + \tl_put_right:Nx\l__tag_parenttree_content_tl + { + \int_eval:n {##1-1}\c_space_tl + [\c_space_tl %] + } + \int_step_inline:nnnn + {0} + {1} + { \prop_count:N \l__tag_tmpa_prop -1 } + { + \prop_get:NnNTF \l__tag_tmpa_prop {####1} \l__tag_tmpa_tl + {% page#1:mcid##1:\l__tag_tmpa_tl :content + \tl_put_right:Nx \l__tag_parenttree_content_tl + { + \pdf_object_if_exist:eT { __tag/struct/\l__tag_tmpa_tl } + { + \pdf_object_ref:e { __tag/struct/\l__tag_tmpa_tl } + } + \c_space_tl + } + } + { + \msg_warning:nn { tag } {tree-mcid-index-wrong} + } + } + \tl_put_right:Nn + \l__tag_parenttree_content_tl + {%[ + ]^^J + } + } + } +\cs_new_protected:Npn \__tag_tree_lua_fill_parenttree: + { + \tl_set:Nn \l__tag_parenttree_content_tl + { + \lua_now:e + { + ltx.__tag.func.output_parenttree + ( + \int_use:N\g_shipout_readonly_int + ) + } + } + } +\cs_new_protected:Npn \__tag_tree_write_parenttree: + { + \bool_if:NTF \g__tag_mode_lua_bool + { + \__tag_tree_lua_fill_parenttree: + } + { + \__tag_tree_fill_parenttree: + } + \tl_put_right:NV \l__tag_parenttree_content_tl\g__tag_parenttree_objr_tl + \pdf_object_write:nnx { __tag/tree/parenttree }{dict} + { + /Nums\c_space_tl [\l__tag_parenttree_content_tl] + } + } +\pdf_object_new:n { __tag/tree/rolemap } +\cs_new_protected:Npn \__tag_tree_write_rolemap: + { + \pdf_object_write:nnx { __tag/tree/rolemap }{dict} + { + \pdfdict_use:n{g__tag_role/RoleMap_dict} + } + } +\cs_new_protected:Npn \__tag_tree_write_classmap: + { + \tl_clear:N \l__tag_tmpa_tl + \seq_gremove_duplicates:N \g__tag_attr_class_used_seq + \seq_set_map:NNn \l__tag_tmpa_seq \g__tag_attr_class_used_seq + { + ##1\c_space_tl + << + \prop_item:Nn + \g__tag_attr_entries_prop + {##1} + >> + } + \tl_set:Nx \l__tag_tmpa_tl + { + \seq_use:Nn + \l__tag_tmpa_seq + { \iow_newline: } + } + \tl_if_empty:NF + \l__tag_tmpa_tl + { + \pdf_object_new:n { __tag/tree/classmap } + \pdf_object_write:nnx + { __tag/tree/classmap } + {dict} + { \l__tag_tmpa_tl } + \__tag_prop_gput:cnx + { g__tag_struct_0_prop } + { ClassMap } + { \pdf_object_ref:n { __tag/tree/classmap } } + } + } +\pdf_object_new:nn{ __tag/tree/namespaces }{array} +\cs_new_protected:Npn \__tag_tree_write_namespaces: + { + \prop_map_inline:Nn \g__tag_role_NS_prop + { + \pdfdict_if_empty:nF {g__tag_role/RoleMapNS_##1_dict} + { + \pdf_object_write:nnx {__tag/RoleMapNS/##1}{dict} + { + \pdfdict_use:n {g__tag_role/RoleMapNS_##1_dict} + } + \pdfdict_gput:nnx{g__tag_role/Namespace_##1_dict} + {RoleMapNS}{\pdf_object_ref:n {__tag/RoleMapNS/##1}} + } + \pdf_object_write:nnx{tag/NS/##1}{dict} + { + \pdfdict_use:n {g__tag_role/Namespace_##1_dict} + } + } + \pdf_object_write:nx {__tag/tree/namespaces} %array + { + \prop_map_tokens:Nn \g__tag_role_NS_prop{\use_ii:nn} + } + } +\hook_new:n {tagpdf/finish/before} +\cs_new_protected:Npn \__tag_finish_structure: + { + \bool_if:NT\g__tag_active_tree_bool + { + \hook_use:n {tagpdf/finish/before} + \__tag_tree_write_parenttree: + \__tag_tree_write_rolemap: + \__tag_tree_write_classmap: + \__tag_tree_write_namespaces: + \__tag_tree_write_structelements: %this is rather slow!! + \__tag_tree_write_structtreeroot: + } + } +\hook_gput_code:nnn{begindocument}{tagpdf} + { + \bool_if:NT\g__tag_active_tree_bool + { + \hook_gput_code:nnn{shipout/before} { tagpdf/structparents } + { + \pdfmanagement_add:nnx + { Page } + { StructParents } + { \int_eval:n { \g_shipout_readonly_int} } + } + } + } +%% File: tagpdf-roles.dtx +\__tag_seq_new:N \g__tag_role_tags_seq %to get names (type/NS) from numbers +\__tag_prop_new:N \g__tag_role_tags_prop %to get numbers from names (type/NS) +\prop_new:N \g__tag_role_tags_NS_prop %to namespace info +\prop_new:N \g__tag_role_NS_prop % collect namespaces +\tl_new:N \l__tag_role_tag_tmpa_tl +\tl_new:N \l__tag_role_tag_namespace_tmpa_tl +\tl_new:N \l__tag_role_role_tmpa_tl +\tl_new:N \l__tag_role_role_namespace_tmpa_tl +\cs_new_protected:Npn \__tag_role_NS_new:nnn #1 #2 #3 + { + \pdf_object_new:n {tag/NS/#1} + \pdfdict_new:n {g__tag_role/Namespace_#1_dict} + \pdf_object_new:n {__tag/RoleMapNS/#1} + \pdfdict_new:n {g__tag_role/RoleMapNS_#1_dict} + \pdfdict_gput:nnn + {g__tag_role/Namespace_#1_dict} + {Type} + {/Namespace} + \pdf_string_from_unicode:nnN{utf8/string}{#2}\l_tmpa_str + \tl_if_empty:NF \l_tmpa_str + { + \pdfdict_gput:nnx + {g__tag_role/Namespace_#1_dict} + {NS} + {\l_tmpa_str} + } + %RoleMapNS is added in tree + \tl_if_empty:nF {#3} + { + \pdfdict_gput:nnx{g__tag_role/Namespace_#1_dict} + {Schema}{#3} + } + \prop_gput:Nnx \g__tag_role_NS_prop {#1}{\pdf_object_ref:n{tag/NS/#1}~} + } +\str_const:Nx \c__tag_role_userNS_id_str + { data:, + \int_to_Hex:n{\int_rand:n {65535}} + \int_to_Hex:n{\int_rand:n {65535}} + - + \int_to_Hex:n{\int_rand:n {65535}} + - + \int_to_Hex:n{\int_rand:n {65535}} + - + \int_to_Hex:n{\int_rand:n {65535}} + - + \int_to_Hex:n{\int_rand:n {16777215}} + \int_to_Hex:n{\int_rand:n {16777215}} + } +\pdf_version_compare:NnT > {1.9} + { + \__tag_role_NS_new:nnn {pdf} {http://iso.org/pdf/ssn}{} + \__tag_role_NS_new:nnn {pdf2} {http://iso.org/pdf2/ssn}{} + \__tag_role_NS_new:nnn {mathml}{http://www.w3.org/1998/Math/MathML}{} + %\__tag_role_NS_new:nnn {latex} {https://www.latex-project.org/ns/2022}{} + \exp_args:Nnx + \__tag_role_NS_new:nnn {user}{\c__tag_role_userNS_id_str}{} + } +\clist_const:Nn \c__tag_role_sttags_pdf_pdfII_clist + { + Document, %A complete document. This is the root element + %of any structure tree containing + %multiple parts or multiple articles. + Part, %A large-scale division of a document. + Sect, %A container for grouping related content elements. + Div, %A generic block-level element or group of elements + Caption, %A brief portion of text describing a table or figure. + Index, + NonStruct, %probably not needed + H, + H1, + H2, + H3, + H4, + H5, + H6, + P, + L, %list + LI, %list item (around label and list item body) + Lbl, %list label + LBody, %list item body + Table, + TR, %table row + TH, %table header cell + TD, %table data cell + THead, %table header (n rows) + TBody, %table rows + TFoot, %table footer + Span, %generic inline marker + Link, % + Annot, + Figure, + Formula, + Form, + % ruby warichu etc .. + Ruby, + RB, + RT, + Warichu, + WT, + WP, + Artifact % only MC-tag ?... + } + +\clist_const:Nn \c__tag_role_sttags_only_pdf_clist + { + Art, %A relatively self-contained body of text + %constituting a single narrative or exposition + BlockQuote, %A portion of text consisting of one or more paragraphs + %attributed to someone other than the author of the + %surrounding text. + TOC, %A list made up of table of contents item entries + %(structure tag TOCI; see below) and/or other + %nested table of contents entries + TOCI, %An individual member of a table of contents. + %This entry's children can be any of the following structure tags: + %Lbl,Reference,NonStruct,P,TOC + Index, + Private, + Quote, %inline quote + Note, %footnote, endnote. Lbl can be child + Reference, %A citation to content elsewhere in the document. + BibEntry, %bibentry + Code + } + +\clist_const:Nn \c__tag_role_sttags_only_pdfII_clist + { + DocumentFragment + ,Aside + ,H7 + ,H8 + ,H9 + ,H10 + ,Title + ,FENote + ,Sub + ,Em + ,Strong + ,Artifact + } + +\clist_const:Nn \c__tag_role_sttags_mathml_clist + { + abs + ,and + ,annotation + ,apply + ,approx + ,arccos + ,arccosh + ,arccot + ,arccoth + ,arccsc + ,arccsch + ,arcsec + ,arcsech + ,arcsin + ,arcsinh + ,arctan + ,arctanh + ,arg + ,bind + ,bvar + ,card + ,cartesianproduct + ,cbytes + ,ceiling + ,cerror + ,ci + ,cn + ,codomain + ,complexes + ,compose + ,condition + ,conjugate + ,cos + ,cosh + ,cot + ,coth + ,cs + ,csc + ,csch + ,csymbol + ,curl + ,declare + ,degree + ,determinant + ,diff + ,divergence + ,divide + ,domain + ,domainofapplication + ,emptyset + ,eq + ,equivalent + ,eulergamma + ,exists + ,exp + ,exponentiale + ,factorial + ,factorof + ,false + ,floor + ,fn + ,forall + ,gcd + ,geq + ,grad + ,gt + ,ident + ,image + ,imaginary + ,imaginaryi + ,implies + ,in + ,infinity + ,int + ,integers + ,intersect + ,interval + ,inverse + ,lambda + ,laplacian + ,lcm + ,leq + ,limit + ,ln + ,log + ,logbase + ,lowlimit + ,lt + ,maction + ,maligngroup + ,malignmark + ,math + ,matrix + ,matrixrow + ,max + ,mean + ,median + ,menclose + ,merror + ,mfenced + ,mfrac + ,mglyph + ,mi + ,min + ,minus + ,mlabeledtr + ,mlongdiv + ,mmultiscripts + ,mn + ,mo + ,mode + ,moment + ,momentabout + ,mover + ,mpadded + ,mphantom + ,mprescripts + ,mroot + ,mrow + ,ms + ,mscarries + ,mscarry + ,msgroup + ,msline + ,mspace + ,msqrt + ,msrow + ,mstack + ,mstyle + ,msub + ,msubsup + ,msup + ,mtable + ,mtd + ,mtext + ,mtr + ,munder + ,munderover + ,naturalnumbers + ,neq + ,none + ,not + ,notanumber + ,notin + ,notprsubset + ,notsubset + ,or + ,otherwise + ,outerproduct + ,partialdiff + ,pi + ,piece + ,piecewise + ,plus + ,power + ,primes + ,product + ,prsubset + ,quotient + ,rationals + ,real + ,reals + ,reln + ,rem + ,root + ,scalarproduct + ,sdev + ,sec + ,sech + ,selector + ,semantics + ,sep + ,set + ,setdiff + ,share + ,sin + ,sinh + ,subset + ,sum + ,tan + ,tanh + ,tendsto + ,times + ,transpose + ,true + ,union + ,uplimit + ,variance + ,vector + ,vectorproduct + ,xor + } + +\prop_const_from_keyval:Nn \c__tag_role_sttags_pdfII_to_pdf_prop + { + DocumentFragment = Art, + Aside = Note, + Title = H1, + Sub = Span, + H7 = H6 , + H8 = H6 , + H9 = H6 , + H10 = H6, + FENote= Note, + Em = Span, + Strong= Span, + } + +\clist_map_inline:Nn \c__tag_role_sttags_pdf_pdfII_clist + { + \__tag_seq_gput_right:Nn \g__tag_role_tags_seq { #1 } + \prop_gput:Nnn \g__tag_role_tags_NS_prop { #1 }{ pdf2 } + } +\clist_map_inline:Nn \c__tag_role_sttags_only_pdf_clist + { + \__tag_seq_gput_right:Nn \g__tag_role_tags_seq { #1 } + \prop_gput:Nnn \g__tag_role_tags_NS_prop { #1 }{ pdf } + } +\clist_map_inline:Nn \c__tag_role_sttags_only_pdfII_clist + { + \__tag_seq_gput_right:Nn \g__tag_role_tags_seq { #1 } + \prop_gput:Nnn \g__tag_role_tags_NS_prop { #1 }{ pdf2 } + } +\pdf_version_compare:NnT > {1.9} + { + \clist_map_inline:Nn \c__tag_role_sttags_mathml_clist + { + \__tag_seq_gput_right:Nn \g__tag_role_tags_seq { #1 } + \prop_gput:Nnn \g__tag_role_tags_NS_prop { #1 }{ mathml } + } + } +\int_step_inline:nnnn { 1 }{ 1 }{ \seq_count:N \g__tag_role_tags_seq } + { + \__tag_prop_gput:Nxn \g__tag_role_tags_prop + { + \seq_item:Nn \g__tag_role_tags_seq { #1 } + } + { #1 } + } +\pdfdict_new:n {g__tag_role/RoleMap_dict} +\cs_new_protected:Nn \__tag_role_add_tag:nn %(new) name, reference to old + { + \prop_if_in:NnF \g__tag_role_tags_prop {#1} + { + \int_compare:nNnT {\l__tag_loglevel_int} > { 0 } + { + \msg_info:nnn { tag }{new-tag}{#1} + } + \__tag_seq_gput_right:Nn \g__tag_role_tags_seq { #1 } + \__tag_prop_gput:Nnx \g__tag_role_tags_prop { #1 } + { + \seq_count:N \g__tag_role_tags_seq + } + \prop_gput:Nnn \g__tag_role_tags_NS_prop { #1 }{ user } + } + \__tag_check_add_tag_role:nn {#1}{#2} + \tl_if_empty:nF { #2 } + { + \pdfdict_gput:nnx {g__tag_role/RoleMap_dict} + {#1} + {\pdf_name_from_unicode_e:n{#2}} + } + } +\cs_generate_variant:Nn \__tag_role_add_tag:nn {VV} + +\pdf_version_compare:NnT < {2.0} + { + \prop_map_inline:Nn \c__tag_role_sttags_pdfII_to_pdf_prop + { + \__tag_role_add_tag:nn {#1}{#2} + } + } + +\cs_new_protected:Nn \__tag_role_add_tag:nnnn %tag/namespace/role/namespace + { + \int_compare:nNnT {\l__tag_loglevel_int} > { 0 } + { + \msg_info:nnn { tag }{new-tag}{#1} + } + \__tag_seq_gput_right:Nn \g__tag_role_tags_seq { #1 } + \__tag_prop_gput:Nnx \g__tag_role_tags_prop { #1 } + { + \seq_count:N \g__tag_role_tags_seq + } + \prop_gput:Nnn \g__tag_role_tags_NS_prop { #1 }{ #2 } + \__tag_check_add_tag_role:nn {#1}{#3} + \pdfdict_gput:nnx {g__tag_role/RoleMapNS_#2_dict}{#1} + { + [ + \pdf_name_from_unicode_e:n{#3} + \c_space_tl + \pdf_object_ref:n {tag/NS/#4} + ] + } + } +\cs_generate_variant:Nn \__tag_role_add_tag:nnnn {VVVV} +\keys_define:nn { __tag / tag-role } + { + ,tag .tl_set:N = \l__tag_role_tag_tmpa_tl + ,tag-namespace .tl_set:N = \l__tag_role_tag_namespace_tmpa_tl + ,role .tl_set:N = \l__tag_role_role_tmpa_tl + ,role-namespace .tl_set:N = \l__tag_role_role_namespace_tmpa_tl + } + +\keys_define:nn { __tag / setup } + { + add-new-tag .code:n = + { + \keys_set_known:nnnN + {__tag/tag-role} + { + tag-namespace=user, + role-namespace=, %so that we can test for it. + #1 + }{__tag/tag-role}\l_tmpa_tl + \tl_if_empty:NF \l_tmpa_tl + { + \exp_args:NNno \seq_set_split:Nnn \l_tmpa_seq { / } {\l_tmpa_tl/} + \tl_set:Nx \l__tag_role_tag_tmpa_tl { \seq_item:Nn \l_tmpa_seq {1} } + \tl_set:Nx \l__tag_role_role_tmpa_tl { \seq_item:Nn \l_tmpa_seq {2} } + } + \tl_if_empty:NT \l__tag_role_role_namespace_tmpa_tl + { + \prop_get:NVNTF + \g__tag_role_tags_NS_prop + \l__tag_role_role_tmpa_tl + \l__tag_role_role_namespace_tmpa_tl + { + \prop_if_in:NVF\g__tag_role_NS_prop \l__tag_role_role_namespace_tmpa_tl + { + \tl_set:Nn \l__tag_role_role_namespace_tmpa_tl {user} + } + } + { + \tl_set:Nn \l__tag_role_role_namespace_tmpa_tl {user} + } + } + \pdf_version_compare:NnTF < {2.0} + { + %TODO add check for emptyness? + \__tag_role_add_tag:VV + \l__tag_role_tag_tmpa_tl + \l__tag_role_role_tmpa_tl + } + { + \__tag_role_add_tag:VVVV + \l__tag_role_tag_tmpa_tl + \l__tag_role_tag_namespace_tmpa_tl + \l__tag_role_role_tmpa_tl + \l__tag_role_role_namespace_tmpa_tl + } + } + } +%% File: tagpdf-struct.dtx +\__tag_seq_new:N \g__tag_struct_objR_seq + +\__tag_prop_new:N \g__tag_struct_cont_mc_prop +\seq_new:N \g__tag_struct_stack_seq +\seq_gpush:Nn \g__tag_struct_stack_seq {0} +\seq_new:N \g__tag_struct_tag_stack_seq +\seq_gpush:Nn \g__tag_struct_tag_stack_seq {Root} +\tl_new:N \l__tag_struct_stack_parent_tmpa_tl + +\seq_const_from_clist:Nn \c__tag_struct_StructTreeRoot_entries_seq + {%p. 857/858 + Type, % always /StructTreeRoot + K, % kid, dictionary or array of dictionaries + IDTree, % currently unused + ParentTree, % required,obj ref to the parent tree + ParentTreeNextKey, % optional + RoleMap, + ClassMap, + Namespaces, + AF %pdf 2.0 + } + +\seq_const_from_clist:Nn \c__tag_struct_StructElem_entries_seq + {%p 858 f + Type, %always /StructElem + S, %tag/type + P, %parent + ID, %optional + Ref, %optional, pdf 2.0 Use? + Pg, %obj num of starting page, optional + K, %kids + A, %attributes, probably unused + C, %class "" + %R, %attribute revision number, irrelevant for us as we + % don't update/change existing PDF and (probably) + % deprecated in PDF 2.0 + T, %title, value in () or <> + Lang, %language + Alt, % value in () or <> + E, % abreviation + ActualText, + AF, %pdf 2.0, array of dict, associated files + NS, %pdf 2.0, dict, namespace + PhoneticAlphabet, %pdf 2.0 + Phoneme %pdf 2.0 + } +\tl_new:N \g__tag_struct_tag_tl +\tl_new:N \g__tag_struct_tag_NS_tl +\tl_new:N \l__tag_struct_key_label_tl +\bool_new:N \l__tag_struct_elem_stash_bool +\cs_new:Npn \__tag_struct_output_prop_aux:nn #1 #2 %#1 num, #2 key + { + \prop_if_in:cnT + { g__tag_struct_#1_prop } + { #2 } + { + \c_space_tl/#2~ \prop_item:cn{ g__tag_struct_#1_prop } { #2 } + } + } + +\cs_new_protected:Npn \__tag_new_output_prop_handler:n #1 + { + \cs_new:cn { __tag_struct_output_prop_#1:n } + { + \__tag_struct_output_prop_aux:nn {#1}{##1} + } + } +\tl_gset:Nn \g__tag_struct_stack_current_tl {0} + +\__tag_prop_new:c { g__tag_struct_0_prop } +\__tag_new_output_prop_handler:n {0} +\__tag_seq_new:c { g__tag_struct_kids_0_seq } + +\__tag_prop_gput:cnn + { g__tag_struct_0_prop } + { Type } + { /StructTreeRoot } + +\__tag_prop_gput:cnx + { g__tag_struct_0_prop } + { Namespaces } + { \pdf_object_ref:n { __tag/tree/namespaces } } +\cs_new:Npn \__tag_struct_mcid_dict:n #1 %#1 MCID absnum + { + << + /Type \c_space_tl /MCR \c_space_tl + /Pg + \c_space_tl + \pdf_pageobject_ref:n { \__tag_ref_value:enn{mcid-#1}{tagabspage}{1} } + /MCID \c_space_tl \__tag_ref_value:enn{mcid-#1}{tagmcid}{1} + >> + } +\cs_new_protected:Npn \__tag_struct_kid_mc_gput_right:nn #1 #2 %#1 structure num, #2 MCID absnum% + { + \__tag_seq_gput_right:cx + { g__tag_struct_kids_#1_seq } + { + \__tag_struct_mcid_dict:n {#2} + } + \__tag_seq_gput_right:cn + { g__tag_struct_kids_#1_seq } + { + \prop_item:Nn \g__tag_struct_cont_mc_prop {#2} + } + } +\cs_generate_variant:Nn \__tag_struct_kid_mc_gput_right:nn {nx} + +\cs_new_protected:Npn\__tag_struct_kid_struct_gput_right:nn #1 #2 %#1 num of parent struct, #2 kid struct + { + \__tag_seq_gput_right:cx + { g__tag_struct_kids_#1_seq } + { + \pdf_object_ref:n { __tag/struct/#2 } + } + } + +\cs_generate_variant:Nn \__tag_struct_kid_struct_gput_right:nn {xx} +\cs_new_protected:Npn\__tag_struct_kid_OBJR_gput_right:nnn #1 #2 #3 %#1 num of parent struct, + %#2 obj reference + %#3 page object reference + { + \pdf_object_unnamed_write:nn + { dict } + { + /Type/OBJR/Obj~#2/Pg~#3 + } + \__tag_seq_gput_right:cx + { g__tag_struct_kids_#1_seq } + { + \pdf_object_ref_last: + } + } + +\cs_generate_variant:Nn\__tag_struct_kid_OBJR_gput_right:nnn { xxx } + +\cs_new_protected:Npn\__tag_struct_exchange_kid_command:N #1 %#1 = seq var + { + \seq_gpop_left:NN #1 \l__tag_tmpa_tl + \regex_replace_once:nnN + { \c{\__tag_mc_insert_mcid_kids:n} } + { \c{\__tag_mc_insert_mcid_single_kids:n} } + \l__tag_tmpa_tl + \seq_gput_left:NV #1 \l__tag_tmpa_tl + } + +\cs_generate_variant:Nn\__tag_struct_exchange_kid_command:N { c } +\cs_new_protected:Npn \__tag_struct_fill_kid_key:n #1 %#1 is the struct num + { + \bool_if:NF\g__tag_mode_lua_bool + { + \seq_clear:N \l__tag_tmpa_seq + \seq_map_inline:cn { g__tag_struct_kids_#1_seq } + { \seq_put_right:Nx \l__tag_tmpa_seq { ##1 } } + %\seq_show:c { g__tag_struct_kids_#1_seq } + %\seq_show:N \l__tag_tmpa_seq + \seq_remove_all:Nn \l__tag_tmpa_seq {} + %\seq_show:N \l__tag_tmpa_seq + \seq_gset_eq:cN { g__tag_struct_kids_#1_seq } \l__tag_tmpa_seq + } + + \int_case:nnF + { + \seq_count:c + { + g__tag_struct_kids_#1_seq + } + } + { + { 0 } + { } %no kids, do nothing + { 1 } % 1 kid, insert + { + % in this case we need a special command in + % luamode to get the array right. See issue #13 + \bool_if:NT\g__tag_mode_lua_bool + { + \__tag_struct_exchange_kid_command:c + {g__tag_struct_kids_#1_seq} + } + \__tag_prop_gput:cnx { g__tag_struct_#1_prop } {K} + { + \seq_item:cn + { + g__tag_struct_kids_#1_seq + } + {1} + } + } % + } + { %many kids, use an array + \__tag_prop_gput:cnx { g__tag_struct_#1_prop } {K} + { + [ + \seq_use:cn + { + g__tag_struct_kids_#1_seq + } + { + \c_space_tl + } + ] + } + } + } + +\cs_new_protected:Npn \__tag_struct_get_dict_content:nN #1 #2 %#1: stucture num + { + \tl_clear:N #2 + \seq_map_inline:cn + { + c__tag_struct_ + \int_compare:nNnTF{#1}={0}{StructTreeRoot}{StructElem} + _entries_seq + } + { + \tl_put_right:Nx + #2 + { + \prop_if_in:cnT + { g__tag_struct_#1_prop } + { ##1 } + { + \c_space_tl/##1~ + \cs_if_exist_use:cTF {__tag_struct_format_##1:e} + { + { \prop_item:cn{ g__tag_struct_#1_prop } { ##1 } } + } + { + \prop_item:cn{ g__tag_struct_#1_prop } { ##1 } + } + } + } + } + } +\cs_new:Nn\__tag_struct_format_Ref:n{[#1]} +\cs_generate_variant:Nn\__tag_struct_format_Ref:n{e} +\cs_new_protected:Npn \__tag_struct_write_obj:n #1 % #1 is the struct num + { + \pdf_object_if_exist:nTF { __tag/struct/#1 } + { + \__tag_struct_fill_kid_key:n { #1 } + \__tag_struct_get_dict_content:nN { #1 } \l__tag_tmpa_tl + \exp_args:Nx + \pdf_object_write:nnx + { __tag/struct/#1 } + {dict} + { + \l__tag_tmpa_tl + } + } + { + \msg_error:nnn { tag } { struct-no-objnum } { #1} + } + } +\cs_new_protected:Npn \__tag_struct_insert_annot:nn #1 #2 %#1 object reference to the annotation/xform + %#2 structparent number + { + \bool_if:NT \g__tag_active_struct_bool + { + %get the number of the parent structure: + \seq_get:NNF + \g__tag_struct_stack_seq + \l__tag_struct_stack_parent_tmpa_tl + { + \msg_error:nn { tag } { struct-faulty-nesting } + } + %put the obj number of the annot in the kid entry, this also creates + %the OBJR object + \ref_label:nn {__tag_objr_page_#2 }{ tagabspage } + \__tag_struct_kid_OBJR_gput_right:xxx + { + \l__tag_struct_stack_parent_tmpa_tl + } + { + #1 % + } + { + \pdf_pageobject_ref:n { \__tag_ref_value:nnn {__tag_objr_page_#2 }{ tagabspage }{1} } + } + % add the parent obj number to the parent tree: + \exp_args:Nnx + \__tag_parenttree_add_objr:nn + { + #2 + } + { + \pdf_object_ref:e { __tag/struct/\l__tag_struct_stack_parent_tmpa_tl } + } + % increase the int: + \stepcounter{ g__tag_parenttree_obj_int } + } + } +\cs_new:Npn \__tag_get_data_struct_tag: + { + \exp_args:Ne + \tl_tail:n + { + \prop_item:cn {g__tag_struct_\g__tag_struct_stack_current_tl _prop}{S} + } + } +\keys_define:nn { __tag / struct } + { + label .tl_set:N = \l__tag_struct_key_label_tl, + stash .bool_set:N = \l__tag_struct_elem_stash_bool, + parent .code:n = + { + \bool_lazy_and:nnTF + { + \prop_if_exist_p:c { g__tag_struct_\int_eval:n {#1}_prop } + } + { + \int_compare_p:nNn {#1}<{\c@g__tag_struct_abs_int} + } + { \tl_set:Nx \l__tag_struct_stack_parent_tmpa_tl { \int_eval:n {#1} } } + { + \msg_warning:nnxx { tag } { struct-unknown } + { \int_eval:n {#1} } + { parent~key~ignored } + } + }, + parent .default:n = {-1}, + tag .code:n = % S property + { + \seq_set_split:Nne \l__tag_tmpa_seq { / } {#1/\prop_item:No\g__tag_role_tags_NS_prop{#1}} + \tl_gset:Nx \g__tag_struct_tag_tl { \seq_item:Nn\l__tag_tmpa_seq {1} } + \tl_gset:Nx \g__tag_struct_tag_NS_tl { \seq_item:Nn\l__tag_tmpa_seq {2} } + \__tag_check_structure_tag:N \g__tag_struct_tag_tl + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { S } + { \pdf_name_from_unicode_e:n{ \g__tag_struct_tag_tl} } % + \prop_get:NVNT \g__tag_role_NS_prop\g__tag_struct_tag_NS_tl\l__tag_tmpa_tl + { + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { NS } + { \l__tag_tmpa_tl } % + } + }, + title .code:n = % T property + { + \str_set_convert:Nnnn + \l__tag_tmpa_str + { #1 } + { default } + { utf16/hex } + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { T } + { <\l__tag_tmpa_str> } + }, + title-o .code:n = % T property + { + \str_set_convert:Nonn + \l__tag_tmpa_str + { #1 } + { default } + { utf16/hex } + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { T } + { <\l__tag_tmpa_str> } + }, + alt .code:n = % Alt property + { + \str_set_convert:Noon + \l__tag_tmpa_str + { #1 } + { default } + { utf16/hex } + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { Alt } + { <\l__tag_tmpa_str> } + }, + alttext .meta:n = {alt=#1}, + actualtext .code:n = % ActualText property + { + \str_set_convert:Noon + \l__tag_tmpa_str + { #1 } + { default } + { utf16/hex } + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { ActualText } + { <\l__tag_tmpa_str>} + }, + lang .code:n = % Lang property + { + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { Lang } + { (#1) } + }, + ref .code:n = % ref property + { + \tl_clear:N\l__tag_tmpa_tl + \clist_map_inline:on {#1} + { + \tl_put_right:Nx \l__tag_tmpa_tl + {~\ref_value:nn{tagpdfstruct-##1}{tagstructobj} } + } + \__tag_struct_gput_data_ref:ee { \int_eval:n {\c@g__tag_struct_abs_int} } {\l__tag_tmpa_tl} + }, + E .code:n = % E property + { + \str_set_convert:Nnon + \l__tag_tmpa_str + { #1 } + { default } + { utf16/hex } + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { E } + { <\l__tag_tmpa_str> } + }, + } +\cs_new_protected:Npn \__tag_struct_add_AF:nn #1 #2 % #1 struct num #2 object name + { + \tl_if_exist:cTF + { + g__tag_struct_#1_AF_tl + } + { + \tl_gput_right:cx + { g__tag_struct_#1_AF_tl } + { ~ \pdf_object_ref:n {#2} } + } + { + \tl_new:c + { g__tag_struct_#1_AF_tl } + \tl_gset:cx + { g__tag_struct_#1_AF_tl } + { \pdf_object_ref:n {#2} } + } + } +\cs_generate_variant:Nn \__tag_struct_add_AF:nn {en,ee} +\keys_define:nn { __tag / struct } + { + AF .code:n = % AF property + { + \pdf_object_if_exist:nTF {#1} + { + \__tag_struct_add_AF:en { \int_eval:n {\c@g__tag_struct_abs_int} }{#1} + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { AF } + { + [ + \tl_use:c + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_AF_tl } + ] + } + } + { + + } + }, + ,AFinline .code:n = + { + \group_begin: + \pdf_object_if_exist:eF {__tag/fileobj\int_use:N\c@g__tag_struct_abs_int} + { + \pdffile_embed_stream:nxx + {#1} + {tag-AFfile\int_use:N\c@g__tag_struct_abs_int.txt} + {__tag/fileobj\int_use:N\c@g__tag_struct_abs_int} + \__tag_struct_add_AF:ee + { \int_eval:n {\c@g__tag_struct_abs_int} } + { __tag/fileobj\int_use:N\c@g__tag_struct_abs_int } + \__tag_prop_gput:cnx + { g__tag_struct_\int_use:N\c@g__tag_struct_abs_int _prop } + { AF } + { + [ + \tl_use:c + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_AF_tl } + ] + } + } + \group_end: + } + ,AFinline-o .code:n = + { + \group_begin: + \pdf_object_if_exist:eF {__tag/fileobj\int_use:N\c@g__tag_struct_abs_int} + { + \pdffile_embed_stream:oxx + {#1} + {tag-AFfile\int_use:N\c@g__tag_struct_abs_int.txt} + {__tag/fileobj\int_use:N\c@g__tag_struct_abs_int} + \__tag_struct_add_AF:ee + { \int_eval:n {\c@g__tag_struct_abs_int} } + { __tag/fileobj\int_use:N\c@g__tag_struct_abs_int } + \__tag_prop_gput:cnx + { g__tag_struct_\int_use:N\c@g__tag_struct_abs_int _prop } + { AF } + { + [ + \tl_use:c + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_AF_tl } + ] + } + } + \group_end: + } + } +\keys_define:nn { __tag / setup } + { + root-AF .code:n = + { + \pdf_object_if_exist:nTF {#1} + { + \__tag_struct_add_AF:en { 0 }{#1} + \__tag_prop_gput:cnx + { g__tag_struct_0_prop } + { AF } + { + [ + \tl_use:c + { g__tag_struct_0_AF_tl } + ] + } + } + { + + } + }, + } +\cs_set_protected:Npn \tag_struct_begin:n #1 %#1 key-val + { +\__tag_check_if_active_struct:T + { + \group_begin: + \int_gincr:N \c@g__tag_struct_abs_int + \__tag_prop_new:c { g__tag_struct_\int_eval:n { \c@g__tag_struct_abs_int }_prop } + \__tag_new_output_prop_handler:n {\int_eval:n { \c@g__tag_struct_abs_int }} + \__tag_seq_new:c { g__tag_struct_kids_\int_eval:n { \c@g__tag_struct_abs_int }_seq} + \exp_args:Ne + \pdf_object_new:n + { __tag/struct/\int_eval:n { \c@g__tag_struct_abs_int } } + \__tag_prop_gput:cno + { g__tag_struct_\int_eval:n { \c@g__tag_struct_abs_int }_prop } + { Type } + { /StructElem } + \tl_set:Nn \l__tag_struct_stack_parent_tmpa_tl {-1} + \keys_set:nn { __tag / struct} { #1 } + \__tag_check_structure_has_tag:n { \int_eval:n {\c@g__tag_struct_abs_int} } + \tl_if_empty:NF + \l__tag_struct_key_label_tl + { + \__tag_ref_label:en{tagpdfstruct-\l__tag_struct_key_label_tl}{struct} + } + \int_compare:nNnT { \l__tag_struct_stack_parent_tmpa_tl } = { -1 } + { + \seq_get:NNF + \g__tag_struct_stack_seq + \l__tag_struct_stack_parent_tmpa_tl + { + \msg_error:nn { tag } { struct-faulty-nesting } + } + } + \seq_gpush:NV \g__tag_struct_stack_seq \c@g__tag_struct_abs_int + \seq_gpush:NV \g__tag_struct_tag_stack_seq \g__tag_struct_tag_tl + \tl_gset:NV \g__tag_struct_stack_current_tl \c@g__tag_struct_abs_int + %\seq_show:N \g__tag_struct_stack_seq + \bool_if:NF + \l__tag_struct_elem_stash_bool + {%set the parent + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { P } + { + \pdf_object_ref:e { __tag/struct/\l__tag_struct_stack_parent_tmpa_tl } + } + %record this structure as kid: + %\tl_show:N \g__tag_struct_stack_current_tl + %\tl_show:N \l__tag_struct_stack_parent_tmpa_tl + \__tag_struct_kid_struct_gput_right:xx + { \l__tag_struct_stack_parent_tmpa_tl } + { \g__tag_struct_stack_current_tl } + %\prop_show:c { g__tag_struct_\g__tag_struct_stack_current_tl _prop } + %\seq_show:c {g__tag_struct_kids_\l__tag_struct_stack_parent_tmpa_tl _seq} + } + %\prop_show:c { g__tag_struct_\g__tag_struct_stack_current_tl _prop } + %\seq_show:c {g__tag_struct_kids_\l__tag_struct_stack_parent_tmpa_tl _seq} + \group_end: + } + } +\cs_set_protected:Nn \tag_struct_end: + { %take the current structure num from the stack: + %the objects are written later, lua mode hasn't all needed info yet + %\seq_show:N \g__tag_struct_stack_seq +\__tag_check_if_active_struct:T + { + \seq_gpop:NN \g__tag_struct_tag_stack_seq \l__tag_tmpa_tl + \seq_gpop:NNTF \g__tag_struct_stack_seq \l__tag_tmpa_tl + { + \__tag_check_info_closing_struct:o { \g__tag_struct_stack_current_tl } + } + { \__tag_check_no_open_struct: } + % get the previous one, shouldn't be empty as the root should be there + \seq_get:NNTF \g__tag_struct_stack_seq \l__tag_tmpa_tl + { + \tl_gset:NV \g__tag_struct_stack_current_tl \l__tag_tmpa_tl + } + { + \__tag_check_no_open_struct: + } + \seq_get:NNT \g__tag_struct_tag_stack_seq \l__tag_tmpa_tl + { + \tl_gset:NV \g__tag_struct_tag_tl \l__tag_tmpa_tl + } + } + } +\cs_set_protected:Npn \tag_struct_use:n #1 %#1 is the label + { + \__tag_check_if_active_struct:T + { + \prop_if_exist:cTF + { g__tag_struct_\__tag_ref_value:enn{tagpdfstruct-#1}{tagstruct}{unknown}_prop } % + { + \__tag_check_struct_used:n {#1} + %add the label structure as kid to the current structure (can be the root) + \__tag_struct_kid_struct_gput_right:xx + { \g__tag_struct_stack_current_tl } + { \__tag_ref_value:enn{tagpdfstruct-#1}{tagstruct}{0} } + %add the current structure to the labeled one as parents + \__tag_prop_gput:cnx + { g__tag_struct_\__tag_ref_value:enn{tagpdfstruct-#1}{tagstruct}{0}_prop } + { P } + { + \pdf_object_ref:e { __tag/struct/\g__tag_struct_stack_current_tl } + } + } + { + \msg_warning:nnn{ tag }{struct-label-unknown}{#1} + } + } + } +\cs_new:Npn \tag_struct_object_ref:n #1 + { + \pdf_object_ref:n {__tag/struct/#1} + } +\cs_generate_variant:Nn \tag_struct_object_ref:n {e} +\cs_new_protected:Npn \tag_struct_gput:nnn #1 #2 #3 + { + \cs_if_exist_use:cF {__tag_struct_gput_data_#2:nn} + { %warning?? + \use_none:nn + } + {#1}{#3} + } +\cs_generate_variant:Nn \tag_struct_gput:nnn {ene,nne} +\cs_new_protected:Npn \__tag_struct_gput_data_ref:nn #1 #2 + % #1 receiving struct num, #2 list of object ref + { + \prop_get:cnN + { g__tag_struct_#1_prop } + {Ref} + \l__tag_tmpb_tl + \__tag_prop_gput:cnx + { g__tag_struct_#1_prop } + { Ref } + { \quark_if_no_value:NF\l__tag_tmpb_tl { \l__tag_tmpb_tl\c_space_tl }#2 } + } +\cs_generate_variant:Nn \__tag_struct_gput_data_ref:nn {ee} + +\cs_new_protected:Npn \tag_struct_insert_annot:nn #1 #2 %#1 should be an object reference + %#2 struct parent num + { + \__tag_check_if_active_struct:T + { + \__tag_struct_insert_annot:nn {#1}{#2} + } + } + +\cs_generate_variant:Nn \tag_struct_insert_annot:nn {xx} +\cs_new:Npn \tag_struct_parent_int: {\int_use:c { c@g__tag_parenttree_obj_int }} + + +\prop_new:N \g__tag_attr_entries_prop +\seq_new:N \g__tag_attr_class_used_seq +\tl_new:N \l__tag_attr_value_tl +\prop_new:N \g__tag_attr_objref_prop %will contain obj num of used attributes +\cs_new_protected:Npn \__tag_attr_new_entry:nn #1 #2 %#1:name, #2: content + { + \prop_gput:Nen \g__tag_attr_entries_prop + {\pdf_name_from_unicode_e:n{#1}}{#2} + } + +\keys_define:nn { __tag / setup } + { + newattribute .code:n = + { + \__tag_attr_new_entry:nn #1 + } + } +\keys_define:nn { __tag / struct } + { + attribute-class .code:n = + { + \clist_set:No \l__tag_tmpa_clist { #1 } + \seq_set_from_clist:NN \l__tag_tmpb_seq \l__tag_tmpa_clist + \seq_set_map_x:NNn \l__tag_tmpa_seq \l__tag_tmpb_seq + { + \pdf_name_from_unicode_e:n {##1} + } + \seq_map_inline:Nn \l__tag_tmpa_seq + { + \prop_if_in:NnF \g__tag_attr_entries_prop {##1} + { + \msg_error:nnn { tag } { attr-unknown } { ##1 } + } + \seq_gput_left:Nn\g__tag_attr_class_used_seq { ##1} + } + \tl_set:Nx \l__tag_tmpa_tl + { + \int_compare:nT { \seq_count:N \l__tag_tmpa_seq > 1 }{[} + \seq_use:Nn \l__tag_tmpa_seq { \c_space_tl } + \int_compare:nT { \seq_count:N \l__tag_tmpa_seq > 1 }{]} + } + \int_compare:nT { \seq_count:N \l__tag_tmpa_seq > 0 } + { + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { C } + { \l__tag_tmpa_tl } + %\prop_show:c { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + } + } + } +\keys_define:nn { __tag / struct } + { + attribute .code:n = % A property (attribute, value currently a dictionary) + { + \clist_set:No \l__tag_tmpa_clist { #1 } + \seq_set_from_clist:NN \l__tag_tmpb_seq \l__tag_tmpa_clist + \seq_set_map_x:NNn \l__tag_tmpa_seq \l__tag_tmpb_seq + { + \pdf_name_from_unicode_e:n {##1} + } + \tl_set:Nx \l__tag_attr_value_tl + { + \int_compare:nT { \seq_count:N \l__tag_tmpa_seq > 1 }{[}%] + } + \seq_map_inline:Nn \l__tag_tmpa_seq + { + \prop_if_in:NnF \g__tag_attr_entries_prop {##1} + { + \msg_error:nnn { tag } { attr-unknown } { ##1 } + } + \prop_if_in:NnF \g__tag_attr_objref_prop {##1} + {%\prop_show:N \g__tag_attr_entries_prop + \pdf_object_unnamed_write:nx + { dict } + { + \prop_item:Nn\g__tag_attr_entries_prop {##1} + } + \prop_gput:Nnx \g__tag_attr_objref_prop {##1} {\pdf_object_ref_last:} + } + \tl_put_right:Nx \l__tag_attr_value_tl + { + \c_space_tl + \prop_item:Nn \g__tag_attr_objref_prop {##1} + } + % \tl_show:N \l__tag_attr_value_tl + } + \tl_put_right:Nx \l__tag_attr_value_tl + { %[ + \int_compare:nT { \seq_count:N \l__tag_tmpa_seq > 1 }{]}% + } + % \tl_show:N \l__tag_attr_value_tl + \__tag_prop_gput:cnx + { g__tag_struct_\int_eval:n {\c@g__tag_struct_abs_int}_prop } + { A } + { \l__tag_attr_value_tl } + }, + } +%% File: tagpdf-space.dtx +\keys_define:nn { __tag / setup } + { + interwordspace .choices:nn = { true, on } + { \msg_warning:nnx {tag}{sys-no-interwordspace}{\c_sys_engine_str} }, + interwordspace .choices:nn = { false, off } + { \msg_warning:nnx {tag}{sys-no-interwordspace}{\c_sys_engine_str} }, + interwordspace .default:n = true, + show-spaces .bool_set:N = \l__tag_showspaces_bool + } +\sys_if_engine_pdftex:T + { + \sys_if_output_pdf:TF + { + \pdfglyphtounicode{space}{0020} + \keys_define:nn { __tag / setup } + { + interwordspace .choices:nn = { true, on } { \pdfinterwordspaceon }, + interwordspace .choices:nn = { false, off }{ \pdfinterwordspaceon }, + interwordspace .default:n = true, + show-spaces .bool_set:N = \l__tag_showspaces_bool + } + } + { + \keys_define:nn { __tag / setup } + { + interwordspace .choices:nn = { true, on, false, off } + { \msg_warning:nnn {tag}{sys-no-interwordspace}{dvi} }, + interwordspace .default:n = true, + show-spaces .bool_set:N = \l__tag_showspaces_bool + } + } + } + +\sys_if_engine_luatex:T + { + \keys_define:nn { __tag / setup } + { + interwordspace .choices:nn = + { true, on } + { + \bool_gset_true:N \g__tag_active_space_bool + \lua_now:e{ltx.__tag.func.markspaceon()} + }, + interwordspace .choices:nn = + { false, off } + { + \bool_gset_false:N \g__tag_active_space_bool + \lua_now:e{ltx.__tag.func.markspaceoff()} + }, + interwordspace .default:n = true, + show-spaces .choice:, + show-spaces / true .code:n = + {\lua_now:e{ltx.__tag.trace.showspaces=true}}, + show-spaces / false .code:n = + {\lua_now:e{ltx.__tag.trace.showspaces=nil}}, + show-spaces .default:n = true + } + } +\sys_if_engine_luatex:T + { + \cs_new_protected:Nn \__tag_fakespace: + { + \group_begin: + \lua_now:e{ltx.__tag.func.fakespace()} + \skip_horizontal:n{\c_zero_skip} + \group_end: + } + } +%% File: tagpdf-user.dtx + + +\RenewDocumentCommand \tagpdfsetup { m } + { + \keys_set:nn { __tag / setup } { #1 } + } + + +\NewDocumentCommand \tagmcifinTF { m m } + { + \tag_mc_if_in:TF { #1 } { #2 } + } +\NewDocumentCommand\ShowTagging { m } + { + \keys_set:nn { __tag / show }{ #1} + + } +\keys_define:nn { __tag / show } + { + mc-data .code:n = + { + \sys_if_engine_luatex:T + { + \lua_now:e{ltx.__tag.trace.show_all_mc_data(#1,\__tag_get_mc_abs_cnt:,0)} + } + } + ,mc-data .default:n = 1 + } + +\keys_define:nn { __tag / show } + { mc-current .code:n = + { + \bool_if:NTF \g__tag_mode_lua_bool + { + \sys_if_engine_luatex:T + { + \int_compare:nNnTF + { -2147483647 } + = + { + \lua_now:e + { + tex.print + (tex.getattribute + (luatexbase.attributes.g__tag_mc_cnt_attr)) + } + } + { + \lua_now:e + { + ltx.__tag.trace.log + ( + "mc-current:~no~MC~open,~current~abscnt + =\__tag_get_mc_abs_cnt:" + ,0 + ) + texio.write_nl("") + } + } + { + \lua_now:e + { + ltx.__tag.trace.log + ( + "mc-current:~abscnt=\__tag_get_mc_abs_cnt:==" + .. + tex.getattribute(luatexbase.attributes.g__tag_mc_cnt_attr) + .. + "~=>tag=" + .. + tostring + (ltx.__tag.func.get_tag_from + (tex.getattribute + (luatexbase.attributes.g__tag_mc_type_attr))) + .. + "=" + .. + tex.getattribute + (luatexbase.attributes.g__tag_mc_type_attr) + ,0 + ) + texio.write_nl("") + } + } + } + } + { + \msg_note:nn{ tag }{ mc-current } + } + } + } +\keys_define:nn { __tag / show } + { + mc-marks .choice: , + mc-marks / show .code:n = + { + \__tag_mc_get_marks: + \__tag_check_if_mc_in_galley:TF + { + \iow_term:n {Marks~from~this~page:~} + } + { + \iow_term:n {Marks~from~a~previous~page:~} + } + \seq_show:N \l__tag_mc_firstmarks_seq + \seq_show:N \l__tag_mc_botmarks_seq + \__tag_check_if_mc_tmb_missing:T + { + \iow_term:n {BDC~missing~on~this~page!} + } + \__tag_check_if_mc_tme_missing:T + { + \iow_term:n {EMC~missing~on~this~page!} + } + }, + mc-marks / use .code:n = + { + \__tag_mc_get_marks: + \__tag_check_if_mc_in_galley:TF + { Marks~from~this~page:~} + { Marks~from~a~previous~page:~} + \seq_use:Nn \l__tag_mc_firstmarks_seq {,~}\quad + \seq_use:Nn \l__tag_mc_botmarks_seq {,~}\quad + \__tag_check_if_mc_tmb_missing:T + { + BDC~missing~ + } + \__tag_check_if_mc_tme_missing:T + { + EMC~missing + } + }, + mc-marks .default:n = show + } +\keys_define:nn { __tag / show } + { + struct-stack .choice: + ,struct-stack / log .code:n = \seq_log:N \g__tag_struct_tag_stack_seq + ,struct-stack / show .code:n = \seq_show:N \g__tag_struct_tag_stack_seq + ,struct-stack .default:n = show + } +\cs_new_protected:Npn \__tag_add_document_structure:n #1 + { + \hook_gput_code:nnn{begindocument}{tagpdf}{\tagstructbegin{tag=#1}} + \hook_gput_code:nnn{tagpdf/finish/before}{tagpdf}{\tagstructend} + } +\keys_define:nn { __tag / setup} + { + activate .code:n = + { + \keys_set:nn { __tag / setup } + { activate-mc,activate-tree,activate-struct } + \__tag_add_document_structure:n {#1} + }, + activate .default:n = Document + } +\AddToHook{begindocument/before} + { + \bool_lazy_all:nT + { + { \g__tag_active_struct_dest_bool } + { \g__tag_active_struct_bool } + { \cs_if_exist_p:N \pdf_activate_structure_destination: } + { ! \pdf_version_compare_p:Nn < {2.0} } + } + { + \tl_set:Nn \l_pdf_current_structure_destination_tl { __tag/struct/\g__tag_struct_stack_current_tl } + \pdf_activate_structure_destination: + } + } + +\sys_if_engine_luatex:T + { + \NewDocumentCommand\pdffakespace { } + { + \__tag_fakespace: + } + } +\bool_new:N \l__tag_para_bool +\bool_new:N \l__tag_para_show_bool +\int_new:N \g__tag_para_begin_int +\int_new:N \g__tag_para_end_int +\tl_new:N \l__tag_para_tag_tl +\tl_set:Nn \l__tag_para_tag_tl { P } +\keys_define:nn { __tag / setup } + { + paratagging .bool_set:N = \l__tag_para_bool, + paratagging-show .bool_set:N = \l__tag_para_show_bool, + paratag .tl_set:N = \l__tag_para_tag_tl + } + +\AddToHook{para/begin} + { + \bool_if:NT \l__tag_para_bool + { + \int_gincr:N \g__tag_para_begin_int + \tag_struct_begin:n {tag=\l__tag_para_tag_tl} + \bool_if:NT \l__tag_para_show_bool + { \tag_mc_begin:n{artifact} + \llap{\color_select:n{red}\tiny\int_use:N\g__tag_para_begin_int\ } + \tag_mc_end: + } + \tag_mc_begin:n {tag=\l__tag_para_tag_tl} + } + } +\AddToHook{para/end} + { + \bool_if:NT \l__tag_para_bool + { + \int_gincr:N \g__tag_para_end_int + \tag_mc_end: + \bool_if:NT \l__tag_para_show_bool + { \tag_mc_begin:n{artifact} + \rlap{\color_select:n{red}\tiny\ \int_use:N\g__tag_para_end_int} + \tag_mc_end: + } + \tag_struct_end: + } + } +\AddToHook{enddocument/info} + { + \int_compare:nNnF {\g__tag_para_begin_int}={\g__tag_para_end_int} + { + \msg_error:nnxx + {tag} + {para-hook-count-wrong} + {\int_use:N\g__tag_para_begin_int} + {\int_use:N\g__tag_para_end_int} + } + } +\AddToHook{begindocument/before} + { + \bool_if:NF \g__tag_mode_lua_bool + { + \cs_if_exist:NT \@kernel@before@footins + { + \tl_put_right:Nn \@kernel@before@footins + { \__tag_add_missing_mcs_to_stream:Nn \footins {footnote} } + \tl_put_right:Nn \@kernel@before@cclv + { + \__tag_check_typeout_v:n {====>~In~\token_to_str:N \@makecol\c_space_tl\the\c@page} + \__tag_add_missing_mcs_to_stream:Nn \@cclv {main} + } + \tl_put_right:Nn \@mult@ptagging@hook + { + \__tag_check_typeout_v:n {====>~In~\string\page@sofar} + \process@cols\mult@firstbox + { + \__tag_add_missing_mcs_to_stream:Nn \count@ {multicol} + } + \__tag_add_missing_mcs_to_stream:Nn \mult@rightbox {multicol} + } + } + } + } +\renewcommand\tagpdfparaOn {\bool_set_true:N \l__tag_para_bool} +\renewcommand\tagpdfparaOff{\bool_set_false:N \l__tag_para_bool} +\NewDocumentCommand\tagpdfsuppressmarks{m} + {{\use:c{__tag_mc_disable_marks:} #1}} +\cs_new_protected:Npn\__tag_hook_kernel_before_head:{} +\cs_new_protected:Npn\__tag_hook_kernel_after_head:{} +\cs_new_protected:Npn\__tag_hook_kernel_before_foot:{} +\cs_new_protected:Npn\__tag_hook_kernel_after_foot:{} + +\AddToHook{begindocument} + { + \cs_if_exist:NT \@kernel@before@head + { + \tl_put_right:Nn \@kernel@before@head {\__tag_hook_kernel_before_head:} + \tl_put_left:Nn \@kernel@after@head {\__tag_hook_kernel_after_head:} + \tl_put_right:Nn \@kernel@before@foot {\__tag_hook_kernel_before_foot:} + \tl_put_left:Nn \@kernel@after@foot {\__tag_hook_kernel_after_foot:} + } + } + +\bool_new:N \g__tag_saved_in_mc_bool +\cs_new_protected:Npn \__tag_exclude_headfoot_begin: + { + \bool_set_false:N \l__tag_para_bool + \bool_if:NTF \g__tag_mode_lua_bool + { + \tag_mc_end_push: + } + { + \bool_gset_eq:NN \g__tag_saved_in_mc_bool \g__tag_in_mc_bool + \bool_gset_false:N \g__tag_in_mc_bool + } + \tag_mc_begin:n {artifact} + } +\cs_new_protected:Npn \__tag_exclude_headfoot_end: + { + \tag_mc_end: + \bool_if:NTF \g__tag_mode_lua_bool + { + \tag_mc_begin_pop:n{} + } + { + \bool_gset_eq:NN \g__tag_in_mc_bool\g__tag_saved_in_mc_bool + } + } +\__tag_attr_new_entry:nn {__tag/attr/pagination}{/O/Artifact/Type/Pagination} +\cs_new_protected:Npn \__tag_exclude_struct_headfoot_begin:n #1 + { + \bool_set_false:N \l__tag_para_bool + \bool_if:NTF \g__tag_mode_lua_bool + { + \tag_mc_end_push: + } + { + \bool_gset_eq:NN \g__tag_saved_in_mc_bool \g__tag_in_mc_bool + \bool_gset_false:N \g__tag_in_mc_bool + } + \tag_struct_begin:n{tag=Artifact,attribute-class=__tag/attr/#1} + \tag_mc_begin:n {artifact=#1} + } + +\cs_new_protected:Npn \__tag_exclude_struct_headfoot_end: + { + \tag_mc_end: + \tag_struct_end: + \bool_if:NTF \g__tag_mode_lua_bool + { + \tag_mc_begin_pop:n{} + } + { + \bool_gset_eq:NN \g__tag_in_mc_bool\g__tag_saved_in_mc_bool + } + } +\keys_define:nn { __tag / setup } + { + exclude-header-footer .choice:, + exclude-header-footer / true .code:n = + { + \cs_set_eq:NN \__tag_hook_kernel_before_head: \__tag_exclude_headfoot_begin: + \cs_set_eq:NN \__tag_hook_kernel_before_foot: \__tag_exclude_headfoot_begin: + \cs_set_eq:NN \__tag_hook_kernel_after_head: \__tag_exclude_headfoot_end: + \cs_set_eq:NN \__tag_hook_kernel_after_foot: \__tag_exclude_headfoot_end: + }, + exclude-header-footer / pagination .code:n = + { + \cs_set:Nn \__tag_hook_kernel_before_head: { \__tag_exclude_struct_headfoot_begin:n {pagination} } + \cs_set:Nn \__tag_hook_kernel_before_foot: { \__tag_exclude_struct_headfoot_begin:n {pagination} } + \cs_set_eq:NN \__tag_hook_kernel_after_head: \__tag_exclude_struct_headfoot_end: + \cs_set_eq:NN \__tag_hook_kernel_after_foot: \__tag_exclude_struct_headfoot_end: + }, + exclude-header-footer / false .code:n = + { + \cs_set_eq:NN \__tag_hook_kernel_before_head: \prg_do_nothing: + \cs_set_eq:NN \__tag_hook_kernel_before_foot: \prg_do_nothing: + \cs_set_eq:NN \__tag_hook_kernel_after_head: \prg_do_nothing: + \cs_set_eq:NN \__tag_hook_kernel_after_foot: \prg_do_nothing: + }, + exclude-header-footer .default:n = true, + exclude-header-footer .initial:n = true + } +\hook_gput_code:nnn + {pdfannot/link/URI/before} + {tagpdf} + { + \tag_mc_end_push: + \tag_struct_begin:n { tag=Link } + \tag_mc_begin:n { tag=Link } + \pdfannot_dict_put:nnx + { link/URI } + { StructParent } + { \tag_struct_parent_int: } + } + +\hook_gput_code:nnn + {pdfannot/link/URI/after} + {tagpdf} + { + \tag_struct_insert_annot:xx {\pdfannot_link_ref_last:}{\tag_struct_parent_int:} + \tag_mc_end: + \tag_struct_end: + \tag_mc_begin_pop:n{} + } + +\hook_gput_code:nnn + {pdfannot/link/GoTo/before} + {tagpdf} + { + \tag_mc_end_push: + \tag_struct_begin:n{tag=Link} + \tag_mc_begin:n{tag=Link} + \pdfannot_dict_put:nnx + { link/GoTo } + { StructParent } + { \tag_struct_parent_int: } + } + +\hook_gput_code:nnn + {pdfannot/link/GoTo/after} + {tagpdf} + { + \tag_struct_insert_annot:xx {\pdfannot_link_ref_last:}{\tag_struct_parent_int:} + \tag_mc_end: + \tag_struct_end: + \tag_mc_begin_pop:n{} + + } + +\pdfannot_dict_put:nnn + { link/URI } + { Contents } + { (url) } + +\pdfannot_dict_put:nnn + { link/GoTo } + { Contents } + { (ref) } + +%% +%% +%% End of file `tagpdf.sty'. diff --git a/texmf/tex/latex/tagpdf/tagpdfdocu-patches.sty b/texmf/tex/latex/tagpdf/tagpdfdocu-patches.sty new file mode 100644 index 000000000..e01f9fa59 --- /dev/null +++ b/texmf/tex/latex/tagpdf/tagpdfdocu-patches.sty @@ -0,0 +1,288 @@ +%\RequirePackage[enable-debug]{expl3}[2018/06/14] +\ProvidesExplPackage {tagpdfdocu-patches} {2022-08-24} {0.97} + {patches/commands for the tagpdf documentation} +\RequirePackage{etoolbox} + +%%%%% +% tableofcontents +% no internal patches, but KOMA-only solution +% TODO it must be checked how best practice is now for the table of contents. +%%%%% + + +\AfterTOCHead{\tag_struct_begin:n{tag=TOC}} +\AfterStartingTOC{\tag_struct_end:} %end TOC +%Marking the toc entries +%around the whole entry so only structure: +\newcommand\tagscrtocentry[1]{\tag_struct_begin:n{tag=TOCI}#1\tag_struct_end:} + +%leaf so structure and mc: +\newcommand\tagscrtocpagenumber[1] + { + \tag_struct_begin:n{tag=Reference} + \tag_mc_begin:n{tag=Reference} + #1 + \tag_mc_end: + \tag_struct_end: + } + + +\DeclareTOCStyleEntry[ + entryformat=\tagscrtocentry, + pagenumberformat=\tagscrtocpagenumber]{tocline}{section} +\DeclareTOCStyleEntry[ + entryformat=\tagscrtocentry, + pagenumberformat=\tagscrtocpagenumber]{tocline}{subsection} +\DeclareTOCStyleEntry[ + entryformat=\tagscrtocentry, + pagenumberformat=\tagscrtocpagenumber]{tocline}{subsubsection} +\DeclareTOCStyleEntry[ + entryformat=\tagscrtocentry, + pagenumberformat=\tagscrtocpagenumber]{tocline}{paragraph} + + +% \IfStr{#3} doesnt like a \label ... +\renewcommand{\addtocentrydefault}[3]{% +% \Ifstr{#3}{}{} +% {% + \Ifstr{#2}{} + {% + \addcontentsline{toc}{#1} + {% + \protect\nonumberline + \tagstructbegin{tag=P}% + \tagmcbegin{tag=P}% + #3% + \tagmcend + \tagstructend + }% + }% + {% + \addcontentsline{toc}{#1}{% + \tagstructbegin{tag=Lbl}% + \tagmcbegin{tag=Lbl}% + \protect\numberline{#2}% + \tagmcend\tagstructend + \tagstructbegin{tag=P}% + \tagmcbegin{tag=P}% + #3% + \tagmcend + \tagstructend + }% + }% + %} +}% + +% the dots must be marked too + +\renewcommand*{\TOCLineLeaderFill}[1][.]{ + \leaders\hbox{$\m@th + \mkern \@dotsep mu\hbox{\tag_mc_begin:n{artifact}#1\tag_mc_end:}\mkern \@dotsep + mu$}\hfill +} + +%%%%%%%%% +% Sectioning commands +% no internal patches, but KOMA-only solution +%%%%%%%% + +% the structure should be opened rather early to catch the refstepcounter! +\AddtoDoHook{heading/begingroup}{\tagpdfparaOff\use_none:n} +\cs_new_protected:Npn \__tag_struct_section_begin:n #1 {\tagstructbegin{tag=\prop_item:Nn\g_tag_section_level_prop{#1}}} +\AddtoDoHook{heading/postinit}{\__tag_struct_section_begin:n} +\AddtoDoHook{heading/endgroup}{\tagstructend\use_none:n} + +\prop_new:N \g_tag_section_level_prop +\prop_gput:Nnn \g_tag_section_level_prop {section}{H1} +\prop_gput:Nnn \g_tag_section_level_prop {subsection}{H2} +\prop_gput:Nnn \g_tag_section_level_prop {subsubsection}{H3} +\prop_gput:Nnn \g_tag_section_level_prop {paragraph}{H4} + +\renewcommand{\sectionlinesformat}[4] + { + \@hangfrom + {\hskip #2 + %\tl_if_empty:nF{#3} + \tl_if_in:nnF{\@empty}{#3} + { + \tag_mc_begin:n {tag=\prop_item:Nn\g_tag_section_level_prop{#1}} + #3 + \tag_mc_end: + } + } + {\tag_mc_begin:n {tag=\prop_item:Nn\g_tag_section_level_prop{#1}} + #4 + \tag_mc_end:}% + } + +% minisec is simply P, so we let paratagging handle this. +% But we add a strong structure +% TODO: Should changed to H6 as it is no longer required to have H6 only after H5 +% but need to check how to disable paratagging ... +\cs_new:Nn \__tag_docu_tag_minisec:n +{ + \tag_struct_begin:n {tag=Strong} + \tag_mc_begin:n{tag=Strong} + #1 + \tag_mc_end: + \tag_struct_end: +} +% +\addtokomafont{minisec}{\__tag_docu_tag_minisec:n} + + +%%%% +%% Lists +%% patches enumitem internals! +%%%% +\AddToHook{env/itemize/begin}{\par\tagstructbegin{tag=L}}%\par is needed to close the paragraph before. +\AddToHook{env/itemize/end}{\par\tagstructend\tagstructend\tagstructend}%LBody,LI,L +\AddToHook{env/enumerate/begin}{\par\tagstructbegin{tag=L}} +\AddToHook{env/enumerate/end}{\par\tagstructend\tagstructend\tagstructend}%LBody,LI,L +\AddToHook{env/description/begin}{\par\tagstructbegin{tag=L}} +\AddToHook{env/description/end}{\par\tagstructend\tagstructend\tagstructend}%LBody,LI,L + +\newcommand\tag@enit@format@preset[1]{% + \str_if_eq:eeTF { \prop_item:cn { g__tag_struct_\g__tag_struct_stack_current_tl _prop }{S} }{/L} + { + %\typeout{BEGIN~OF~LIST} + } + {%\typeout{NEXT ITEM} + \tagstructend\tagstructend} % for the LBody/LI + \tagstructbegin{tag=LI} + \tagstructbegin{tag=Lbl} + \tagmcbegin{tag=Lbl} + #1 + \tagmcend + \tagstructend + \tagstructbegin{tag=LBody} + } + +\ExplSyntaxOff +\xpatchcmd\enit@preset{\@firstofone}{\tag@enit@format@preset}{}{\fail} +%close mc from paratagging and reopen ... +\xpatchcmd\@item{\box\@labels}{\tagmcend \box\@labels \tagmcbegin{tag=P}}{}{\fail} +\ExplSyntaxOn + +% keys that overwrite \enit@format must be patched, eg like the follow +% the font key should be patched in a similar way +\enitkv@key{}{format}{% new enumitem don't use first argument + \def\enit@format{% + \tagstructbegin{tag=LI}% + \tagstructbegin{tag=Lbl}% + \tagmcbegin{tag=Lbl}% + #1\tagmcend}} + + +% listings + verbatim +% the paragraph code could create span around the lines ... +% it is quite unclear if Code is a sensible structure as it no longer exists in pdf 2.0 + +\AddToHook{env/lstlisting/begin}{\tagpdfparaOff\tagstructbegin{tag=Code}\tagmcbegin{tag=Code}} +\AddToHook{env/lstlisting/end}{\tagmcend\tagstructend} + +\AddToHook{env/verbatim/begin}{\tagpdfparaOff\tagstructbegin{tag=Code}\tagmcbegin{tag=Code}} +\AddToHook{env/verbatim/end}{\tagmcend\tagstructend} + + +% ======== marginnote ========== +% TODO marginnote has a bug (a \par is missing) so it messes up tagging. +% but currently unneeded as we marked them up as artifacts anyway as they don't contain +% meaningful contents + +\NewDocumentCommand\sidenote{m} + { + \tag_mc_artifact_group_begin:n{notype}\tagpdfparaOff\marginnote{#1}\tag_mc_artifact_group_end: + } + +% ======== tikzpicture ========== +% TODO this needs some investigation: it messes up the stack if one add paraOff + +\AddToHook{env/tikzpicture/begin}{} + +%======== tcolorbox ======== +% We switch of paratagging at the begin and reenable it locally in before upper. +% the before upper setting is dangerous as it can be overwritten by +% users. So a more stable hook is needed. +%\AddToHook{env/tcolorbox/begin}{\tagpdfparaOff} +%\AddToHook{env/docCommand/begin}{\tagpdfparaOff} +%\tcbset{before~upper=\tagpdfparaOn} +\AddToHook{env/tcolorbox/begin}{\tagpdfparaOff \tcbset{before~upper=\tagpdfparaOn}} +\AddToHook{env/docCommand/begin}{\tagpdfparaOff \tcbset{before~upper=\tagpdfparaOn}} + + %locally for now + + +% ======= footnote ======== +% TODO + +% ======= bibliography ======== +% biblatex. Creates some empty mc-chunks. +% no internal patches, but redefining begentry/finentry is not safe. +% better hook is needed. + +\newbibmacro*{begentry}{\tagstructbegin{tag=BibEntry}\tagmcbegin{tag=BibEntry}} +\newbibmacro*{finentry}{\finentry\tagmcend\tagstructend} + +% ====== hyperref ======== +% this should probably go into tagpdf, but it is related to +% problem of pdf strings and context .... + +\@ifpackageloaded{hyperref}{% +\pdfstringdefDisableCommands{ + \let\tagstructbegin\@gobble + \let\tagmcbegin\@gobble + \let\tagmcend\relax + \let\tagstructend\relax +}}{} + + +%====== pictures ======== +% PAC3 complained that the BBox is missing, so we are cheating for now +% and add a fix size. +\tagpdfsetup + { + newattribute = + {bbox}{/O /Layout /BBox [0~0~100~100]} + } + +%%%%% +%======== Role maps ========= +%%%%% +\tagpdfsetup{ add-new-tag=Title/H1,add-new-tag=Strong/Span } + + + +\endinput + +% ========================== +% now unneeded commands + +% we need a command which simply starts a paragraph \TagP^, +% a "continuation" variant which closes the previous paragraph (\TagP), +% and a final end. + +\NewDocumentCommand\TagP{t{^}} + { + \tag_mc_if_in:T { \tag_mc_end: } + \IfBooleanF{#1} + { + \tag_struct_end: + } + \tag_struct_begin:n{tag=P} + \tag_mc_begin:n{tag=P} + } +% close a mc and one structure + +\NewDocumentCommand\TagPend{} + { + \tag_mc_if_in:T { \tag_mc_end: } + \tag_struct_end: + } + + + \ExplSyntaxOn +\newcommand\showcurrentstruct{\prop_show:c { g__tag_struct_\g__tag_struct_stack_current_tl _prop }} + + +\ExplSyntaxOff