In [None]:
import sys
import docx
from docx2python import docx2python as dx2py


def ns_tag_name(node, name):
    if node.nsmap and node.prefix:
        return "{{{:s}}}{:s}".format(node.nsmap[node.prefix], name)
    return name


def descendants(node, desc_strs):
    if node is None:
        return []
    if not desc_strs:
        return [node]
    ret = {}
    for child_str in desc_strs[0]:
        for child in node.iterchildren(ns_tag_name(node, child_str)):
            descs = descendants(child, desc_strs[1:])
            if not descs:
                continue
            cd = ret.setdefault(child_str, [])
            if isinstance(descs, list):
                cd.extend(descs)
            else:
                cd.append(descs)
    return ret


def simplified_descendants(desc_dict):
    ret = []
    for vs in desc_dict.values():
        for v in vs:
            if isinstance(v, dict):
                ret.extend(simplified_descendants(v))
            else:
                ret.append(v)
    return ret


def process_list_data(attrs, dx2py_elem):
    #print(simplified_descendants(attrs))
    desc = simplified_descendants(attrs)[0]
    level = int(desc.attrib[ns_tag_name(desc, "val")])
    elem = [i for i in dx2py_elem[0].split("\t") if i][0]#.rstrip(")")
    return "    " * level + elem + " "


def main(*argv):
    fname = "/Users/senthil/Desktop/Senthil/myTesting/Project_Creation/list_testing.docx"
    docd = docx.Document(fname)
    docdpy = dx2py(fname)
    dr = docdpy.docx_reader
    #print(dr.files)  # !!! Check word/numbering.xml !!!
    docdpy_runs = docdpy.document_runs[0][0][0]
    if len(docd.paragraphs) != len(docdpy_runs):
        print("Lengths don't match. Abort")
        return -1
    subnode_tags = (("pPr",), ("numPr",), ("ilvl",))  # (("pPr",), ("numPr",), ("ilvl", "numId"))  # numId is for matching elements from word/numbering.xml
    for idx, (par, l) in enumerate(zip(docd.paragraphs, docdpy_runs)):
        #print(par.text, l)
        numbered_attrs = descendants(par._element, subnode_tags)
        #print(numbered_attrs)
        if numbered_attrs:
            print(process_list_data(numbered_attrs, l) + par.text)
        else:
            print(par.text)


if __name__ == "__main__":
    print("Python {:s} {:03d}bit on {:s}\n".format(" ".join(elem.strip() for elem in sys.version.split("\n")),
                                                   64 if sys.maxsize > 0x100000000 else 32, sys.platform))
    rc = main(*sys.argv[1:])
    print("\nDone.")
    sys.exit(rc)


In [None]:
import docx.package
import docx.parts.document
import docx.parts.numbering

package = docx.package.Package.open("/Users/senthil/Desktop/Senthil/myTesting/Project_Creation/list_testing.docx")

main_document_part = package.main_document_part
assert isinstance(main_document_part, docx.parts.document.DocumentPart)

numbering_part = main_document_part.numbering_part
assert isinstance(numbering_part, docx.parts.numbering.NumberingPart)

ct_numbering = numbering_part._element
print(ct_numbering)  # CT_Numbering
for num in ct_numbering.num_lst:
    print(num)  # CT_Num
    print(num.abstractNumId)  # CT_DecimalNumber


In [19]:
import docx
from lxml import etree

doc_path = "/Users/senthil/Desktop/Senthil/myTesting/Project_Creation/list_testing.docx"
document = docx.Document(doc_path)

numbering_part = document.part.numbering_part
ct_numbering = numbering_part._element

document_tree = etree.fromstring(document._element.xml)

nsmap = {
    "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
}

def get_numbering_format(num_id, level):
    abstract_num_id_elements = document_tree.xpath(f".//w:num[@w:numId='{num_id}']/w:abstractNumId/@w:val", namespaces=nsmap)
    if abstract_num_id_elements:
        abstract_num_id = abstract_num_id_elements[0]
        abstract_num_elements = ct_numbering.xpath(f".//w:abstractNum[@w:abstractNumId='{abstract_num_id}']", namespaces=nsmap)
        if abstract_num_elements:
            abstract_num = abstract_num_elements[0]
            numbering_format_elements = abstract_num.xpath(f".//w:lvl[@w:ilvl='{level}']/w:numFmt/@w:val", namespaces=nsmap)
            if numbering_format_elements:
                return numbering_format_elements[0]

    return None

def get_sequence_number(paragraph):
    element_tree = etree.fromstring(paragraph._element.xml)
    numbering_properties = element_tree.xpath(".//w:numPr", namespaces=nsmap)
    if numbering_properties:
        num_id_element = numbering_properties[0].find(".//w:numId", namespaces=nsmap)
        ilvl_element = numbering_properties[0].find(".//w:ilvl", namespaces=nsmap)
        if num_id_element is not None and ilvl_element is not None:
            num_id = num_id_element.attrib.get("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val")
            ilvl = ilvl_element.attrib.get("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val")
            numbering_format = get_numbering_format(num_id, ilvl)
            return numbering_format if numbering_format is not None else "None"

    return "None"

for paragraph in document.paragraphs:
    paragraph_text = paragraph.text
    sequence_number = get_sequence_number(paragraph)

    print(f"Paragraph: {paragraph_text}")
    print(f"Numbering Format: {sequence_number}")
    print()


IndexError: list index out of range