In [2]:
from bs4 import BeautifulSoup
import sys
from urllib.parse import urlparse
import argparse

In [18]:
if 'ipykernel' in sys.modules:
    sys.argv = [
        sys.argv[0],
        "ola.html",
        "--element", "div",
        "--id", "bookchapter",
    ]

parser = argparse.ArgumentParser(description="Process a file and extract information based on optional HTML attributes.")

# Positional argument (required)
parser.add_argument("file_name", type=str, help="The path to the input file.")

# Optional arguments
parser.add_argument("--element", type=str, help="Specify the HTML element to search for (e.g., div, p, span).")
parser.add_argument("--id", type=str, help="Specify the HTML id attribute to search for.")
parser.add_argument("--class", type=str, dest="class_", help="Specify the HTML class attribute to search for. (Note: 'class' is a Python keyword, so we use 'dest' to avoid conflict.)")

args = parser.parse_args()

pathOrUrl = args.file_name

if pathOrUrl.startswith('http'):
    parsed_url = urlparse(pathOrUrl)
    path = parsed_url.path

    # Split the path by slashes and take the last element
    if path[-1] == '/':
        path = path[:-1]

    output_file_stem = path.split('/')[-1]

    # Fetch the HTML content
    import requests
    response = requests.get(pathOrUrl)
    html_content = response.text
else:
    from pathlib import Path
    path = Path(pathOrUrl)
    output_file_stem = path.stem
    with open(pathOrUrl) as f:
        print(sys.version)
        print(f'{path.suffix=}')
        if path.suffix == '.epub':
            from ebooklib import epub
            import ebooklib
            book = epub.read_epub(pathOrUrl)
            text = []
            for item in book.get_items():
                if item.get_type() == ebooklib.ITEM_DOCUMENT:
                    soup = BeautifulSoup(item.get_content(), 'html.parser')
                    text.append(soup.get_text())
        
            html_content = '\n'.join(text)
        else:
            html_content = f.read()

html_content

3.12.3 (main, Feb  4 2025, 14:48:35) [GCC 13.3.0]
path.suffix='.html'


'<html><head>\n        <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">\n    <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">\n    <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">\n    <link rel="manifest" href="/site.webmanifest">\n    <link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">\n    <meta name="msapplication-TileColor" content="#2b5797">\n    <meta name="theme-color" content="#ffffff">\n    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n    <title>Overlap-Add (OLA)\nSTFT Processing | Spectral Audio Signal Processing</title>\n    <meta name="description" content="">\n\n    <link href="https://fonts.googleapis.com/css?family=Roboto:400,300italic,700,400italic,500,500italic,700italic,900|Open+Sans:300italic,400italic,600italic,700italic,800italic,700,300,600,800,400|Open+Sans+Condensed:700|Roboto+Condensed:400,700" rel="stylesheet" type="text/css" media="all" onlo

In [19]:
# Parse the HTML content with Beautiful Soup
soup = BeautifulSoup(html_content, 'html.parser')

print(f"{len(sys.argv)=}")
if args.element is None:
    article = soup
else:
    article = soup.find(args.element, class_=args.class_, id=args.id)

if not article:
    print("no atricle element")
    body = soup.body
    # for header in body.find_all('script'):
    #     header.decompose()
    
    # for script in body.find_all('script'):
    #     script.decompose()

    # for svg in body.find_all('svg'):
    #     svg.decompose()

    print(body)
    print(sys.argv[1:])
    exit(1)

# Find and remove all <code> elements
for code_tag in article.find_all(['pre', 'math', 'code']):
    code_tag.decompose()

# Extract text and remove leading/trailing whitespace
text_content = article.get_text(separator='\n', strip=True)
print(text_content)

len(sys.argv)=6
Overlap-Add (OLA)
STFT Processing
This chapter discusses use of the
Short-Time Fourier Transform
(
STFT
) to implement
linear
filtering
in the
frequency domain
.
Due to the speed of
FFT
convolution
, the STFT provides the most
efficient single-CPU implementation engine for most
FIR filters
encountered in audio
signal
processing.
Recall from §
7.1
the STFT:
(9.1)
where
We noted that if the window
has the
constant overlap-add
property
at hop-size
,
(9.2)
then the sum of the successive
DTFTs
over time equals the DTFT of the
whole signal
:
(9.3)
Consequently, the inverse-STFT is simply the inverse-DTFT of this sum:
We may now introduce
spectral modifications
by multiplying each
spectral frame
by some filter
frequency response
to get
(9.4)
Note that
can be different for each frame, giving a certain
class of
time-varying filters
.  The filtered output signal
spectrum
is then
(9.5)
so that
(9.6)
where
(9.7)
This chapter discusses practical implementation of the above
relations

In [20]:
from nltk.data import load
from TTS.api import TTS
tts = TTS(model_name="tts_models/en/vctk/vits", gpu=True)
sample_rate = tts.synthesizer.output_sample_rate

  from .autonotebook import tqdm as notebook_tqdm


In [21]:
tokenizer = load(f"tokenizers/punkt/english.pickle")
sentences = tokenizer.tokenize(text_content)
print(sentences)

['Overlap-Add (OLA)\nSTFT Processing\nThis chapter discusses use of the\nShort-Time Fourier Transform\n(\nSTFT\n) to implement\nlinear\nfiltering\nin the\nfrequency domain\n.', 'Due to the speed of\nFFT\nconvolution\n, the STFT provides the most\nefficient single-CPU implementation engine for most\nFIR filters\nencountered in audio\nsignal\nprocessing.', 'Recall from §\n7.1\nthe STFT:\n(9.1)\nwhere\nWe noted that if the window\nhas the\nconstant overlap-add\nproperty\nat hop-size\n,\n(9.2)\nthen the sum of the successive\nDTFTs\nover time equals the DTFT of the\nwhole signal\n:\n(9.3)\nConsequently, the inverse-STFT is simply the inverse-DTFT of this sum:\nWe may now introduce\nspectral modifications\nby multiplying each\nspectral frame\nby some filter\nfrequency response\nto get\n(9.4)\nNote that\ncan be different for each frame, giving a certain\nclass of\ntime-varying filters\n.', 'The filtered output signal\nspectrum\nis then\n(9.5)\nso that\n(9.6)\nwhere\n(9.7)\nThis chapter discu

In [22]:
import subprocess
from TTS.tts.utils.synthesis import synthesis

command = [
            'ffmpeg', '-y',
            '-f', 'f32le', 
            '-ar', str(sample_rate),  # sample rate
            '-ac', '1',  # number of audio channels
            '-i', '-',  # The input comes from stdin
            '-acodec', 'copy',  # audio codec for M4A
            output_file_stem + '.wav',
        ]
print(" ".join(command))

process = subprocess.Popen(command, stdin=subprocess.PIPE)

for sentence in sentences:
    wav = synthesis(
            model=tts.synthesizer.tts_model,
            text=sentence,
            CONFIG=tts.synthesizer.tts_config,
            use_cuda=True,
            speaker_id=77, # p307
            use_griffin_lim=tts.synthesizer.vocoder_model is None,
            )["wav"]
    format = wav
    process.stdin.write(wav.tobytes())

process.stdin.close()
process.wait()
print("------------\n", format)

ffmpeg -y -f f32le -ar 22050 -ac 1 -i - -acodec copy ola.wav


ffmpeg version 6.1.1-3ubuntu5 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)
  configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --ena

------------
 [-0.00083777 -0.00110925 -0.00109444 ...  0.00221328  0.00147218
  0.00090384]


[out#0/wav @ 0x5584ba23dc00] video:0kB audio:169202kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.000066%
size=  169202kB time=00:32:44.40 bitrate= 705.6kbits/s speed=76.4x    


In [19]:
print(sys.argv)

['/home/a/.local/lib/python3.10/site-packages/ipykernel_launcher.py', '--f=/home/a/.local/share/jupyter/runtime/kernel-v2-17921kGKUpKVYOTJo.json']
