In [1]:
import requests

In [2]:
def save_tex(link: str, path: str):
    assert link[-4:] == '.tex'
    response = requests.get(link)
    file_path = path + '/' + link.split('/')[-1]
    if response.status_code == 200:
        pdf_file = response.content
        with open(file_path, 'wb') as f:
            f.write(pdf_file)
        return True
    else:
        return False

In [4]:
link_format = "https://web.evanchen.cc/exams/IMO-{year}-notes.tex"
success = True
year = 2002
path = './evan_chen'
while success:
    success = save_tex(link_format.format(year=year), path)
    year += 1
    

# Splitting

In [7]:
import re
from pylatexenc.latexwalker import LatexWalker, LatexEnvironmentNode

def split_latex_by_regex(latex_content, regex_pattern):
    """
    Splits LaTeX content by sections.
    
    Args:
        latex_content (str): The LaTeX document as a string.
    
    Returns:
        dict: A dictionary where each key is a section title, and value is the LaTeX content for that section.
    """
    matches = re.finditer(regex_pattern, latex_content)
    splits = {}
    start_idx = None
    current_section = None

    for match in matches:
        if current_section is not None:
            # Add the previous section
            splits[current_section] = latex_content[start_idx:match.start()].strip()
        current_section = match.group(1)
        start_idx = match.end()
    
    if current_section is not None:
        # Add the last section
        splits[current_section] = latex_content[start_idx:]
    
    return splits

def save_files(files, output_dir="unnamed problems"):
    """
    Saves each LaTeX section to a separate file.
    
    Args:
        sections (dict): Dictionary of sections and their LaTeX content.
        output_dir (str): Directory to save section files.
    """
    import os
    os.makedirs(output_dir, exist_ok=True)

    for file, content in files.items():
        pattern = r"\d+/\d"
        matches = re.findall(pattern, file)
        if matches:
            filename = matches[0].replace('/', '_P') + '.tex'
        else:
            filename = file.replace(' ', '_') + '.tex'

        filename = os.path.join(output_dir, filename)
        with open(filename, "w", encoding="utf-8") as f:
            f.write(content)
        print(f"Saved section '{file}' to {filename}")

if __name__ == "__main__":
    # Example usage
    for i in range(2002, 2025):
        filepath_header = f"./evan_chen/IMO-{i}-notes"
        filepath_tail = ".tex"
        with open(filepath_header + filepath_tail, "r", encoding="utf-8") as file:
            latex_content = file.read()

        section_pattern = r"\\section\{(.+?)\}"
        subsection_pattern = r"\\subsection\{(.+?)\}"
        enumerate_item_pattern = r"\\item %% (.+)"

        sections = split_latex_by_regex(latex_content, section_pattern)
        problem_section = sections['Problems']
        problems = split_latex_by_regex(problem_section, enumerate_item_pattern)

        solutions = split_latex_by_regex(latex_content, subsection_pattern)
        save_files(problems, filepath_header), save_files(solutions, filepath_header)


Saved section 'Problem 1' to ./evan_chen/IMO-2002-notes\Problem_1.tex
Saved section 'Problem 2' to ./evan_chen/IMO-2002-notes\Problem_2.tex
Saved section 'Problem 3' to ./evan_chen/IMO-2002-notes\Problem_3.tex
Saved section 'Problem 4' to ./evan_chen/IMO-2002-notes\Problem_4.tex
Saved section 'Problem 5' to ./evan_chen/IMO-2002-notes\Problem_5.tex
Saved section 'Problem 6' to ./evan_chen/IMO-2002-notes\Problem_6.tex
Saved section 'IMO 2002/1, proposed by Federico Ardila (COL)' to ./evan_chen/IMO-2002-notes\2002_P1.tex
Saved section 'IMO 2002/2, proposed by Hojoo Lee (KOR)' to ./evan_chen/IMO-2002-notes\2002_P2.tex
Saved section 'IMO 2002/3, proposed by Laurentiu Panaitopol (ROU)' to ./evan_chen/IMO-2002-notes\2002_P3.tex
Saved section 'IMO 2002/4, proposed by Mihai Manea (ROU)' to ./evan_chen/IMO-2002-notes\2002_P4.tex
Saved section 'IMO 2002/5, proposed by Belur Jana Venkatachala (IND)' to ./evan_chen/IMO-2002-notes\2002_P5.tex
Saved section 'IMO 2002/6, proposed by Vyacheslav Yasinsk