In [None]:
import os
import yaml
import re
import pandas as pd
from lib.utils import BASE_DIR,DATA_DIR,OUTPUT_DIR,get_full_outcomes
from lib.dataframe_to_text import dataframe_to_text

data_path = os.path.join(DATA_DIR,"documents")
def get_target_competencies(target:str)->list[str]:
    filename = os.path.join(data_path,target,"meta.yaml")
    if not os.path.isfile(filename):
        return []
    with open(filename, 'r') as yml:
        config = yaml.safe_load(yml)
    if config == None:
        return []
    competencies = config.get("competencies",[])
    return competencies

def load_competencies_for_uid():
    l1 = pd.read_csv(os.path.join(DATA_DIR,"outcomes_l1.csv")).rename(columns={"index":"l1_index"})
    l2 = pd.read_csv(os.path.join(DATA_DIR,"outcomes_l2.csv")).rename(columns={"index":"l2_index"})
    l3 = pd.read_csv(os.path.join(DATA_DIR,"outcomes_l3.csv")).rename(columns={"index":"l3_index"})
    l4 = pd.read_csv(os.path.join(DATA_DIR,"outcomes_l4.csv")).rename(columns={"index":"l4_index"})
    l2 = pd.merge(l1.rename(columns={"UID":"l1_UID"}),l2,on="l1_index")
    l3 = pd.merge(l2.rename(columns={"UID":"l2_UID"}),l3,on="l2_index")
    l4 = pd.merge(l3.rename(columns={"UID":"l3_UID"}),l4,on="l3_index")
    l1 = l1.rename(columns={"l1_index":"index"})
    l2 = l2.rename(columns={"l2_index":"index"})
    l3 = l3.rename(columns={"l3_index":"index"})
    l4 = l4.rename(columns={"l4_index":"index"})
    united = pd.concat([l1,l2,l3,l4]).fillna("")
    united["order"] = united.reset_index().index
    return united.set_index("UID")

competencies_for_uid = load_competencies_for_uid()
def select_target_competencies_row(target:str):
    competencies = get_target_competencies(target)
    if len(competencies)==0:
        return pd.DataFrame([])
    competencies = list(set(competencies) & set(competencies_for_uid.index))
    result = competencies_for_uid\
        .loc[competencies,:]
    result = result\
        .sort_values("order")\
        .loc[:,["l1","l2","l3","l4"]]
    result["l2"] = "    "+result["l2"]
    result["l3"] = "        "+result["l3"]
    result["l4"] = "            "+result["l4"]
    return result

def get_target_competencies_text(target:str):
    df = select_target_competencies_row(target)
    if df.empty:
        return ""
    text = dataframe_to_text(df)
    text = re.sub(r"^\s*\n","",text,flags=re.MULTILINE)
    text = re.sub(r"^(\s*)(.+)",r"\1- \2",text,flags=re.MULTILINE)
    return text

#get_target_competencies("case_PR")
#select_target_competencies_row("case_PR")
#print(get_target_competencies_text("case_PR"))


In [None]:
from os import path
import re
import os
import pandas as pd
import shutil
import textwrap
from lib.utils import BASE_DIR,DATA_DIR,OUTPUT_DIR,get_full_outcomes
from collections.abc import Callable


def load_markdown(filename:str):
    with open(filename) as f:
        text = f.read()
    return text

def add_title_level(markdown_text:str,level_to_add:int):
    prefix = "#" * level_to_add
    return re.sub(r"^(#+) ",prefix+r"\1 ",markdown_text, flags=re.MULTILINE)

def replace_image_path(markdown_text:str,replace_func:Callable[[str],str])->str:
    def match_fn(match):
        new_path = replace_func(match.group(2))
        return f"![{match.group(1)}]({new_path})"
    return re.sub(r"!\[(.*?)\] *\((.+?)\)",match_fn,markdown_text )


os.makedirs(OUTPUT_DIR,exist_ok=True)

data_path = path.join(DATA_DIR,"documents")
dirs = [f for f in os.listdir(data_path) if not os.path.isfile(os.path.join(data_path, f))] 
for dir in dirs:
    shutil.copytree(os.path.join(data_path, dir),os.path.join(OUTPUT_DIR,dir),dirs_exist_ok=True)


document_index = pd.read_csv(path.join(DATA_DIR,"documents_index.csv"))
document_index

output_dict = {}
for item in document_index.itertuples():
    filename = path.join(DATA_DIR,"documents",item.filename+".md")
    text = load_markdown(filename)
    text = add_title_level(text,1)
    title = "\n\n# "+item.title +"\n\n"
    competencies = get_target_competencies_text(item.filename)
    if not competencies=="":
        competencies = "\n## 学修する資質・能力\n\n"+competencies+"\n\n"
    text = competencies+text
    prev = output_dict.get(item.section,"")
    if prev:
        prev = prev + "\n\\newpage"
    output_dict[item.section] = prev+"\n\n"+title+text

bundle = ""
for key,item in output_dict.items():
    sections = list(key.split("/"))
    item = add_title_level(item,len(sections))
    title = "#" * len(sections) +" "+sections[-1]+"\n\n"
    bundle += "\n\\newpage\n" + title + competencies+item +"\n\n"

bundle = replace_image_path(bundle,lambda p: os.path.join("output",p))

with open(path.join(OUTPUT_DIR,"bundle_documents.md"),"w") as f:
    f.write(bundle)

def replace_boxed_paragraph(markdown_text:str)->str:
    before_start = r"^:::* \{ *\.note *\}"
    before_end = r"^:::*"
    before = f"{before_start}(.+?){before_end}"
    def match_fn(match:re.Match):
        text = match.group(1)
        text = text.replace("<br>","\\\n\n")
        after_start = "\n\n"+"-"*60+"\n"
        after_end = "\n"+"-"*60+"\n\n"
        result = after_start+text.strip()+after_end
        print(result)
        return result
    return re.sub(before,match_fn,markdown_text,flags=(re.MULTILINE | re.DOTALL))

documents_for_tex = bundle
documents_for_tex = replace_boxed_paragraph(documents_for_tex)
with open(path.join(OUTPUT_DIR,"documents_for_tex.md"),"w") as f:
    f.write(documents_for_tex)

with open(path.join(OUTPUT_DIR,"documents_for_docx.md"),"w") as f:
    f.write(bundle)


output_dict