# rst markup checking
https://www.regular-expressions.info/tutorial.html

https://regex101.com/

In [1]:
import re

In [2]:
def readRST(rst_file):
    with open(rst_file,"r", encoding="utf8") as f:
        rst_text = f.read().split("\n\n")
    return rst_text

rst_file1= r"D:\00_Python_Scripts\00_Projects\QGIS328-Documentation\docs\user_manual\introduction\getting_started.rst"
rst_eng = readRST(rst_file1)

rst_file2= r"D:\00_Python_Scripts\00_Projects\QGIS328-Documentation\docs\user_manual\introduction\getting_started_mm.rst"
rst_mm = readRST(rst_file2)


In [3]:
# Regular expressions to extract markups
header1_pattern = re.compile(r'(.*?)(?:=?\n)(^=+$)', re.MULTILINE)  #(r'^(=+)(.*?)\1$', re.MULTILINE)  # Matches section headers
header2_pattern = re.compile(r'(.*?)(?:=?\n)(^-+$)', re.MULTILINE)
header3_pattern = re.compile(r'(.*?)(?:=?\n)(^\.+$)', re.MULTILINE)
header3_pattern = re.compile(r'(.*?)(?:=?\n)(^\^+$)', re.MULTILINE)

toc1_pattern = re.compile(r'(^\.\.\s.+:)', re.MULTILINE)    #.. only:: html
toc2_pattern = re.compile(r'(^\s{3}\.\..+:$)', re.MULTILINE)#   .. contents:: 
toc3_pattern = re.compile(r'(^\s{6}:.+:$)', re.MULTILINE)   #     :local:
toc4_pattern = re.compile(r'(^\s{6}\.\.\s.+:)', re.MULTILINE)

#markup1 = re.compile(r'(`.+`)', re.MULTILINE) # `example`
markup1 = re.compile(r'(:\w[^:]*:)', re.MULTILINE) # :example:

bullet1_pattern = re.compile(r'^\*\s.+', re.MULTILINE) # *example
bullet2_pattern = re.compile(r'^\s{3}\*\s.+', re.MULTILINE) #   *example
bullet3_pattern = re.compile(r'^\s{6}\*\s.+', re.MULTILINE) #   *example

number1_pattern = re.compile(r'^#\s.+', re.MULTILINE) # #example
number2_pattern = re.compile(r'^\s{2}#\s.+', re.MULTILINE) #    #example
number3_pattern = re.compile(r'^\s{4}#\s.+', re.MULTILINE) #    #example
     
italic_pattern = re.compile(r'(\*(.*?)\*)')  # *italic*
bold_pattern = re.compile(r'(\*\*(.*?)\*\*)')  # **bold**
highlight_pattern = re.compile(r'(`.*`)')  # `highlight`

icon_pattern = re.compile(r'(\|.*\|)')


patterns = {
            "headers": [header1_pattern, header2_pattern, header3_pattern],
            "tocs":[toc1_pattern,toc2_pattern,toc3_pattern,toc4_pattern],
            "bullets":[bullet1_pattern,bullet2_pattern,bullet3_pattern,number1_pattern,number2_pattern,number3_pattern],
            "formats":[italic_pattern,bold_pattern],
            "highlights":[highlight_pattern],
            "markups":[markup1],
            "icons":[icon_pattern]
            }

In [4]:
# extract markups in paragraph is both files
from itertools import zip_longest

def matchRSTs(patterns,txt1,txt2,para_index=1):
    para_count=1
    for para1,para2  in zip(txt1,txt2):
        if para_index>para_count:
            para_count+=1
            continue
        print("para: ",para_count)
        for pattern,regexes in patterns.items():
            match1 = []
            match2 = []

            for regex in regexes:
                matches = regex.findall(para1)

                for match in matches:
                    if len(match)==2:
                        match1.append(match[0])
                    else:
                        match1.append(match)
                matches = regex.findall(para2)

                for match in matches:
                    if len(match)==2:
                        match2.append(match[0])
                    else:
                        match2.append(match)

            if len(match1)+len(match2):
                print("######"+pattern)
                for m1,m2 in zip_longest(match1,match2):
                    print(m1)
                    print(f"\x1b[31m{m2}\x1b[0m")
        print("-------------------------------------------------------------------------")

        para_count+=1

In [5]:
rst_eng = readRST(rst_file1)
rst_mm = readRST(rst_file2)
matchRSTs(patterns,rst_eng, rst_mm, para_index=56)

para:  56
######highlights
`Zoom In` tool on the :guilabel:`Navigation`
[31m`Navigation`[0m
`lakes`
[31m`Zoom In`[0m
`Properties`
[31m`lakes`[0m
None
[31m`Properties`[0m
######markups
:sup:
[31m:guilabel:[0m
:guilabel:
[31m:sup:[0m
:file:
[31m:file:[0m
:guilabel:
[31m:guilabel:[0m
######icons
|zoomIn|
[31m|zoomIn|[0m
-------------------------------------------------------------------------
para:  57
######highlights
`Symbology`
[31m`Symbology`[0m
######markups
:guilabel:
[31m:guilabel:[0m
######icons
|symbology|
[31m|symbology|[0m
-------------------------------------------------------------------------
para:  58
######tocs
      .. _figure_selectColor:
[31m      .. _figure_selectColor:[0m
-------------------------------------------------------------------------
para:  59
######tocs
      .. figure::
[31m      .. figure::[0m
######markups
:align:
[31m:align:[0m
-------------------------------------------------------------------------
para:  60
------------