In [2]:
rm -rf ~/.wdm #Clear the cache

In [4]:
# IT WORKS

# -------------------------------------------------------------------
# SCRIPT: Extracts and translates MathML equations
#
# DESCRIPTION:
# This script uses Selenium to extract visible text and MathML equations from the MathJax-rendered webpage.
# It replaces each math equation with a placeholder (e.g., [[MATH_0]]), converts the MathML to spoken math
# using the Speech Rule Engine (SRE) in Clearspeak mode, and outputs a final readable script with
# the math translated into natural language.
#
# OUTPUT FILES:
# - final_script_with_translated_math.txt: Text from the webpage with spoken math inserted
#
# REQUIREMENTS:
# - Python packages:
#     pip install selenium webdriver-manager 
#     pip install gtts
# - Google Chrome browser
# - Node.js installed from https://nodejs.org/
# - Speech Rule Engine (SRE):
#     npm install -g speech-rule-engine
#     Confirm SRE is installed using: which sre
# - Adjust path to SRE in the code if it's not globally available
# -------------------------------------------------------------------

# Imports
import time
import re
import os
import subprocess
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

# Set up Headless Chrome WebDriver
options = webdriver.ChromeOptions()
options.add_argument("--headless") 
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage") 

# Automatically installs and uses the correct ChromeDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Loads the page
url = "file:///Users/tom/Documents/starmast/docs/studyguides/multivariatechainrule.html" # CHANGE THIS TO THE WEBPAGE OF THE GUIDE YOU WANT 
print(f"Opening {url}")
driver.get(url)
time.sleep(10) # Wait for MathJax to finish rendering

# Wait for MathJax to finish rendering
# Makes sure all math has rendered before continuing
driver.execute_script("return MathJax.startup.promise")

# This replaces MathML with numbered unique placeholders: [[MATH_0]], [[MATH_1]], etc.
# and stores the original MathML in a list for later translation
mathml_list = driver.execute_script("""
  const mathElements = Array.from(document.querySelectorAll("mjx-container, script[type^='math/']"));
  const mathList = [];

  mathElements.forEach((el, index) => {
    const placeholderText = `[[MATH_${index}]]`;
    const placeholderNode = document.createTextNode(placeholderText);
    el.parentNode.replaceChild(placeholderNode, el);
    mathList.push(el.outerHTML);
  });

  return mathList;
""")

# This extracts readable content from headings, paragraphs, list items, callouts, and metadata
# Filters out nested duplicates to avoid repeated content
script_parts = driver.execute_script("""
  const tags = Array.from(document.querySelectorAll("p, li, h1, h2, h3, div.callout-title-container.flex-fill, div.title, div.quarto-title-meta-heading, div.quarto-title-meta-content, div.abstract, div.summary, div.quarto-category, span.author, div.author, div.abstract-title, div.categories"));

  const uniqueTags = tags.filter((el, i, arr) => {
    return !arr.some(otherEl => otherEl !== el && otherEl.contains(el));
  });

  const output = [];
  uniqueTags.forEach(el => {
    const text = el.innerText.trim();
    if (text) output.push(text);
  });

  return output;
""")


# Closes the browser
driver.quit()

# Sets up PATH to include Node + SRE location
# Note: May need to adjust this based on system and where `sre` is installed
os.environ["PATH"] += os.pathsep + "/Users/tom/node_modules/speech-rule-engine/bin"  # Update this path if needed
sre_path = "/Users/tom/node_modules/speech-rule-engine/bin/sre"  # Replace with output from `which sre`

# This converts Each MathML Entry into Spoken Math Using SRE (Clearspeak)
spoken_math = []
for i, mathml in enumerate(mathml_list):
    try:
        # Extracts the core MathML from within assistive markup
        mathml_clean = mathml.split('<mjx-assistive-mml')[1].split('</mjx-assistive-mml>')[0]  # Extract MathML portion

        # Uses subprocess to run SRE with Clearspeak
        result = subprocess.run(
            [sre_path, "--speech", "--domain", "clearspeak"],
            input=mathml_clean.encode("utf-8"),  # Pass the MathML as input to SRE
            capture_output=True
        )

        # Get the spoken output
        spoken = result.stdout.decode("utf-8").strip()  # Get the spoken math output
        if spoken:
            spoken_math.append(spoken)  
        else:
            spoken_math.append("(Empty output)")  
    except Exception as e:
        # Log errors if conversion fails
        spoken_math.append(f"(Error: {e})") 

# Replace placeholders with translated spoken math
final_script = []
for part in script_parts:
    for i, spoken in enumerate(spoken_math):
        # Replace each placeholder with the corresponding translated spoken math
        part = part.replace(f"[[MATH_{i}]]", spoken)
    final_script.append(part)

# Save the final script with spoken math
with open("final_script_with_translated_math.txt", "w", encoding="utf-8") as f:
    f.write("\n\n".join(final_script))

print("Done!")

# Open the file to read the script
with open("final_script_with_translated_math.txt", "r", encoding="utf-8") as f:
    script = f.read()

# Define the start and end markers
start_marker = "Multivariate chain rule" # CHANGE THIS TO THE TITLE OF THE GUIDE
end_marker = "Version history" # CHANGE THIS TO THE QUESTIONS AT THE END OF FURTHER READING

# Use regex to find the content between the start and end markers
pattern = re.compile(rf"{re.escape(start_marker)}(.*?){re.escape(end_marker)}", re.DOTALL)

# Search for the content between the markers
match = pattern.search(script)

# Check if a match was found
if match:
    extracted_content = match.group(0)  # Get the matched content
    
    # Export to a new txt file
    with open("multivariatechainrule.txt", "w", encoding="utf-8") as output_file: # CHANGE THIS ACCORDING TO THE GUIDE NAME
        output_file.write(extracted_content)  # Write the content to the file
    
    print("Content successfully extracted and saved to 'multivariatechainrule.txt'.")
else:
    print("No matching content found.")

# If you want to print the script to check for errors 
with open("multivariatechainrule.txt", "r", encoding="utf-8") as f:
    script = f.read()

print(script)


Opening file:///Users/tom/Documents/starmast/docs/studyguides/multivariatechainrule.html
Done!
Content successfully extracted and saved to 'multivariatechainrule.txt'.
Multivariate chain rule

CALCULUS

AUTHOR

Donald Campbell

SUMMARY
The multivariate chain rule is used in calculus to differentiate a function when its variables depend on other variables. It shows how the change in one variable affects the whole function by considering how the intermediate variables change. It is useful in modelling systems where one quantity depends on several factors.

Before reading this guide, it is recommended that you read Guide: Introduction to partial differentiation and Guide: The chain rule.

What is the multivariate chain rule?

As seen in Guide: The chain rule, the chain rule tells you how to differentiate a function y equals y of x with respect to another variable t when x depends on t. normal d y over normal d t equals normal d y over normal d x normal d x over normal d t

This rule can b

In [6]:
# INSTALLS GOOGLE TEXT TO SPEECH
!pip install gtts

from gtts import gTTS
print("Processing...") # Just to ensure its running because generating an MP3 file takes time
# Read the text file
with open('multivariatechainrule.txt', 'r') as file:
    text = file.read()

# Convert the text to speech
tts = gTTS(text)

# Save the speech as an MP3 file
tts.save('multivariatechainrule.mp3')

print("MP3 file has been made!")

zsh:1: command not found: pip
Processing...
MP3 file has been made!
