In [126]:
import os
from os.path import expanduser, join, isdir, basename
from pprint import pprint
import re
import sys

In [131]:
def read_all_text(filename):
    with open(filename) as fp:
        return fp.read()
    
def read_all_lines(filename):
    with open(filename) as fp:
        return fp.readlines()
    
def print_with_indices(an_iterable):
    for i, item in enumerate(an_iterable):
        pprint(f"{i:>4} {item}", width=sys.maxsize)

print_all_text = read_all_text ..> print_with_indices
print_all_lines = read_all_lines ..> print_with_indices

NEWLINE_RE = re.compile(r"\n+")
WORD_RE = re.compile("(\w+|\-)")

replace_newlines_with_space = NEWLINE_RE.sub$(" ", ?)

In [None]:
TEMP_DIR = expanduser("~/temp")

sort_ci = sorted$(?, key=s -> s.lower())

def listdircontents(directory) =
    directory |> expanduser |> os.listdir

def listfolders(directory) =
    (directory 
     |> listdircontents 
     |> map$(join$(directory, ?))
     |> filter$(isdir)
     |> sort_ci)
    
(TEMP_DIR
 |> listfolders
 |> map$(basename)
 |> list
 |> pprint)

In [None]:
"~/temp/comparisons" |> listdircontents 

In [None]:
FILE1 = "~/temp/comparisons/00725bf.tex" |> expanduser
FILE2 = "~/temp/comparisons/7153e19.tex" |> expanduser

In [None]:
FILE1 |> print_all_lines
FILE2 |> print_all_lines

In [None]:
file1_lines = FILE1 |> read_all_lines
slice1 = slice(83, 97)
file1_to_compare = (file1_lines[slice1] |> map$(s -> s[:-1]) |> list |> " ".join 
                    |> (s -> s.split()) |> enumerate)
# file1_to_compare |> print

file2_lines = FILE2 |> read_all_lines
slice2 = slice(82, 96)
file2_to_compare = (file2_lines[slice2] |> map$(s -> s[:-1]) |> list |> " ".join 
                    |> (s -> s.split()) |> enumerate)
# file2_to_compare |> print
for (i1, w1),(i2, w2) in zip_longest(file1_to_compare[0:], file2_to_compare[0:], fillvalue=("","")):
    print(f"{i1:>5} {w1:<30} {i2:>5} {w2:<30}")

In [276]:
FILEPATH = "~/Repos/music-and-letters/strauss-in-context-review.tex" 
TEX_RE = re.compile(r"\\\w+{(?P<content>.+?)}")
WORD_RE = re.compile(r"\b(\w+-\w+\b|\b\w+|\b\d+\b)('s)?\b")

text = (FILEPATH 
         |> expanduser
         |> read_all_lines
         |> enumerate$(?, start=1)
         |> dropwhile$(t -> t[0] < 40)
         |> takewhile$(t -> t[1].strip() != "\\end{document}")
         |> map$(t -> t[1])
         |> list
         |> " ".join
         |> replace_newlines_with_space
         |> TEX_RE.sub$(r"\1", ?))

    
words = text |> WORD_RE.finditer |> map$(m -> m[1]) |> list

print(f"There are {len(words)} words")


There are 912 words
  Until a little over three decades ago, Richard Strauss occupied a  position on the margins of German- and English-language music  scholarship, a status at odds with Strauss's towering stature in the  quarter century leading up to the First World War. The English-language  situation started to change as scholars began to re-examine historical  narratives that had emerged with the advent of the European post-war  avant-garde. The best known of these stories had equated an increase in the  dissonance level of compositions with their historical and aesthetic  legitimacy. Strauss, who had continued steadfastly to compose in his  tonal, post-Wagnerian idiom until his death in 1949 --- the year after  Pierre Boulez completed his Piano Sonata No.~2 --- was a casualty of  these narratives. The German-language situation was complicated to a greater  degree that the English by Strauss's activities in service of the National  Socialist regime; combined with his adherence to c

In [171]:
range(10) |> takewhile$(i -> i != 6) |> list |

[0, 1, 2, 3, 4, 5]


In [244]:
TEST_STRING = "this is a \\emph{dog} and this is a wolf and this is a \\emph{cat}."
PATTERN_STRING = r"\\\w+{(?P<content>.+?)}"

PATTERN_STRING |> print

(re.finditer(PATTERN_STRING, TEST_STRING) 
 |> map$(m -> m["content"]) 
 |> list 
 |> print)

TEX_RE = re.compile(r"\\\w+{(?P<content>.+?)}")
TEX_RE.sub(m -> m[1], TEST_STRING) |> print

\\\w+{(?P<content>.+?)}
['dog', 'cat']
this is a dog and this is a wolf and this is a cat.


In [203]:
re.sub(r"\((.+?)\)", m -> m[1], "(This) is (parenthetical) content")

'This is parenthetical content'

In [242]:
re.findall(r"\\\w+{(?P<content>.+?)}", "\emph{cat} and \emph{dog}")

['cat', 'dog']