In [None]:
from os import path
import re
import os
import pandas as pd
import shutil
from lib.utils import BASE_DIR,DATA_DIR,OUTPUT_DIR,get_full_outcomes
from collections.abc import Callable


def load_markdown(filename:str):
    with open(filename) as f:
        text = f.read()
    return text

def add_title_level(markdown_text:str,level_to_add:int):
    prefix = "#" * level_to_add
    return re.sub(r"^(#+) ",prefix+r"\1 ",markdown_text, flags=re.MULTILINE)

def replace_image_path(markdown_text:str,replace_func:Callable[[str],str])->str:
    def match_fn(match):
        new_path = replace_func(match.group(2))
        return f"![{match.group(1)}]({new_path})"
    return re.sub(r"!\[(.*?)\] *\((.+?)\)",match_fn,markdown_text )


os.makedirs(OUTPUT_DIR,exist_ok=True)

data_path = path.join(DATA_DIR,"documents")
dirs = [f for f in os.listdir(data_path) if not os.path.isfile(os.path.join(data_path, f))] 
for dir in dirs:
    shutil.copytree(os.path.join(data_path, dir),os.path.join(OUTPUT_DIR,dir),dirs_exist_ok=True)


document_index = pd.read_csv(path.join(DATA_DIR,"documents_index.csv"))
document_index

output_dict = {}
for item in document_index.itertuples():
    filename = path.join(DATA_DIR,"documents",item.filename+".md")
    text = load_markdown(filename)
    text = add_title_level(text,1)
    title = "\n\n# "+item.title +"\n\n"
    prev = output_dict.get(item.section,"")
    
    output_dict[item.section] = prev+"\n\n"+title+text

bundle = ""
for key,item in output_dict.items():
    sections = list(key.split("/"))
    item = add_title_level(item,len(sections))
    title = "#" * len(sections) +" "+sections[-1]+"\n\n"
    bundle += title + item +"\n\n"

bundle = replace_image_path(bundle,lambda p: os.path.join("output",p))

with open(path.join(OUTPUT_DIR,"bundle_documents.md"),"w") as f:
    f.write(bundle)

with open(path.join(OUTPUT_DIR,"documents_for_tex.md"),"w") as f:
    f.write(bundle)

with open(path.join(OUTPUT_DIR,"documents_for_docx.md"),"w") as f:
    f.write(bundle)


output_dict