In [23]:
import pandas as pd
import glob
import re
import csv
import os
from lib.utils import BASE_DIR,SHEETS_DIR,SHEETS_OUTCOMES_DIR,OUTCOME_TABLE_SOURCE_DIR

os.makedirs( f"{OUTCOME_TABLE_SOURCE_DIR}", exist_ok=True)
file_list = glob.glob(f"{SHEETS_OUTCOMES_DIR}/*編集用/別表-*.csv")
for file in file_list:
    name = re.search(r"別表\-(.+)\.csv",file).group(1)
    df = pd.read_csv(file,encoding="utf_8_sig")
    df.to_csv(f"{OUTCOME_TABLE_SOURCE_DIR}/{name}.csv",encoding="utf_8_sig",quoting=csv.QUOTE_NONNUMERIC,index=False)
    print(f"output... {OUTCOME_TABLE_SOURCE_DIR}/{name}.csv")



output... /workspaces/core-curriculum-gdrive/sheets/outcome_tables_source/疾患(救急のみ).csv
output... /workspaces/core-curriculum-gdrive/sheets/outcome_tables_source/知識.csv
output... /workspaces/core-curriculum-gdrive/sheets/outcome_tables_source/疾患.csv
output... /workspaces/core-curriculum-gdrive/sheets/outcome_tables_source/主要な臨床・画像検査.csv
output... /workspaces/core-curriculum-gdrive/sheets/outcome_tables_source/基本診療科.csv
output... /workspaces/core-curriculum-gdrive/sheets/outcome_tables_source/基本的臨床手技.csv
output... /workspaces/core-curriculum-gdrive/sheets/outcome_tables_source/主要症候.csv
output... /workspaces/core-curriculum-gdrive/sheets/outcome_tables_source/身体診察.csv


In [24]:
import re
import os
import pandas as pd
from lib.utils import BASE_DIR,SHEETS_GENERAL_DIR,SHEETS_OUTCOMES_DIR,OUTPUT_DIR,OUTCOME_TABLE_SOURCE_DIR,TABLE_FORMATTED_DIR
from lib.apply_condition_to_dataframe  import apply_condition_to_dataframe

os.makedirs(OUTPUT_DIR,exist_ok=True)


df = pd.read_csv(f"{SHEETS_GENERAL_DIR}/略語集/略語.csv",encoding="utf_8_sig")
df.to_csv(f"{OUTPUT_DIR}/definitions.csv",encoding="utf_8_sig",index=False)
print(f"output... definitions.csv")

table_index = pd.read_csv(f"{SHEETS_OUTCOMES_DIR}/別表一覧/別表一覧.csv",encoding="utf_8_sig")


os.makedirs(TABLE_FORMATTED_DIR,exist_ok=True)
for row in table_index.itertuples():
    source = pd.read_csv(f"{OUTCOME_TABLE_SOURCE_DIR}/{row.データ元}.csv")
    source = apply_condition_to_dataframe(source,row.条件)
    source["index"]=source.reset_index().index+1
    source["index"]=f"TBL-{row.id}-"+source["index"].astype(str).str.zfill(3)
    source = source\
        .loc[:,[*re.split(r" *, *",row.列),"index","UID","H28対応項目"] ]\
        .rename(columns={"H28対応項目":"H28ID"})
    source.to_csv(f"{TABLE_FORMATTED_DIR}/{row.id}.csv",index=False)

table_index = table_index.drop("データ元",axis=1)
table_index.to_csv(f"{OUTPUT_DIR}/table_index.csv",encoding="utf_8_sig",index=False)
print(f"output... table_index.csv")


output... definitions.csv
output... table_index.csv


In [25]:
import csv
import os
import pandas as pd
import re
import glob
from lib.utils import BASE_DIR,SHEETS_OUTCOMES_DIR,OUTPUT_DIR
from lib.dataframe_to_grouped_numbers import dataframe_to_grouped_numbers

def get_glob_file(glob_path:str)->str:
    files = glob.glob(glob_path)
    if len(files)==0:
        raise Exception(f"Cannot find {glob_path}")
    return list(glob.glob(glob_path))[0]

os.makedirs(f"{OUTPUT_DIR}",exist_ok=True)


# reading layer 1
r4_l1=pd.read_csv(f"{SHEETS_OUTCOMES_DIR}/第1層/第1層.csv")
r4_l1=r4_l1.rename(columns={"第1層イニシャル":"index","第1層フルスペル":"l1_spell","第1層":"l1","第1層説明":"l1_desc"})
r4_l1=r4_l1.loc[:,["index","UID","l1_spell","l1","l1_desc"]]
r4_l1.to_csv(f"{OUTPUT_DIR}/outcomes_l1.csv",encoding="utf_8_sig",quoting=csv.QUOTE_NONNUMERIC,index=False)
print("output... ./output/outcomes_l1.csv")


l1_indexes = r4_l1["index"]
# reading layer 2
r4_l2 =  pd.DataFrame(data=[],columns=[])
for i, row in r4_l1.iterrows():
    l1_index = row["index"]
    l1_UID = row["UID"]
    filename = get_glob_file(f"{SHEETS_OUTCOMES_DIR}/{l1_index}*/第2層.csv")
    r4_l2_unit =pd.read_csv(filename,encoding="utf_8_sig") 
    r4_l2_unit=r4_l2_unit.rename(columns={"第2層":"l2","第2層説明":"l2_desc"})
    r4_l2_unit["index"] = l1_index+"-"+(r4_l2_unit.index+1).astype("str").str.zfill(2)
    r4_l2_unit["l1_index"] = l1_index
    r4_l2_unit["l1_UID"] = l1_UID
    r4_l2_unit = r4_l2_unit.loc[:,["index","UID","l2","l2_desc","l1_index","l1_UID"]]
    r4_l2=pd.concat([r4_l2,r4_l2_unit])
r4_l2.to_csv(f"{OUTPUT_DIR}/outcomes_l2.csv",encoding="utf_8_sig",quoting=csv.QUOTE_NONNUMERIC,index=False)
print("output... ./output/outcomes_l2.csv")

# reading layer 3 and 4
r4_l3 =  pd.DataFrame(data=[],columns=[])
r4_l4 =  pd.DataFrame(data=[],columns=[])
for l1_index in l1_indexes:
    filename = get_glob_file(f"{SHEETS_OUTCOMES_DIR}/{l1_index}*/第2から4層.csv")
    r4_l234_unit=pd.read_csv(filename)
    r4_l234_unit=r4_l234_unit.rename(columns={"第2層":"l2","第3層":"l3","第4層":"l4","H28対応項目":"H28ID"})
    r4_l234_unit = pd.merge(r4_l234_unit,r4_l2.rename(columns={"UID":"l2_UID","index":"l2_index"}),how="left",on="l2")
    ids=dataframe_to_grouped_numbers(r4_l234_unit,["l2","l3","l4"])

    # reading layer 3
    r4_l234_unit["l3_index"]=r4_l234_unit["l2_index"]+"-"+ids["l3"].astype("str").str.zfill(2)
    r4_l3_unit = r4_l234_unit.loc[:,["l3_index","l3_UID","l3","l2_index","l2_UID"]]
    r4_l3_unit = r4_l3_unit.rename(columns={"l3_index":"index","l3_UID":"UID"})
    r4_l3 = pd.concat([r4_l3,r4_l3_unit.drop_duplicates(subset=["index"])]) 

    # reading layer 4
    r4_l234_unit["l4_index"]=r4_l234_unit["l3_index"]+"-"+ids["l4"].astype("str").str.zfill(2)
    r4_l4_unit = r4_l234_unit.loc[:,["l4_index","UID","l4","l3_index","l3_UID","H28ID"]]
    r4_l4_unit = r4_l4_unit.rename(columns={"l4_index":"index"})
    r4_l4 = pd.concat([r4_l4,r4_l4_unit]) 


r4_l3.to_csv(f"{OUTPUT_DIR}/outcomes_l3.csv",encoding="utf_8_sig",quoting=csv.QUOTE_NONNUMERIC,index=False)
print("output... ./output/outcomes_l3.csv")

table_index = pd.read_csv(f"{OUTPUT_DIR}/table_index.csv",encoding="utf_8_sig")
def format_table_ref(x:str)->str:
    def name_to_label(name:str):
        try:
            return table_index.set_index("表名").at[name,"id"]
        except KeyError:
            return ""

    def replace_func(reg:re.match)->str:
        name = reg.group(1)
        whole = reg.group(0)
        label = name_to_label(name)
        if label:
            return f"[@tbl:{label}]"
        else:
            return whole
    return re.sub(r"表\[([^\]]+)\]",replace_func,x)


r4_l4["l4"] = r4_l4["l4"].map(format_table_ref)

r4_l4.to_csv(f"{OUTPUT_DIR}/outcomes_l4.csv",encoding="utf_8_sig",quoting=csv.QUOTE_NONNUMERIC,index=False)
print("output... ./output/outcomes_l4.csv")



output... ./output/outcomes_l1.csv
output... ./output/outcomes_l2.csv
output... ./output/outcomes_l3.csv
output... ./output/outcomes_l4.csv


In [26]:
import pandas as pd
import glob
import re
import csv
import os
from lib.utils import BASE_DIR,SHEETS_OUTCOMES_DIR,OUTPUT_DIR

def get_glob_file(glob_path:str)->str:
    files = glob.glob(glob_path)
    if len(files)==0:
        raise Exception(f"Cannot find {glob_path}")
    return list(glob.glob(glob_path))[0]


os.makedirs(f"{OUTPUT_DIR}/tables", exist_ok=True)

r4_l1=pd.read_csv(f"{OUTPUT_DIR}/outcomes_l1.csv")
df = pd.DataFrame([])
for row in r4_l1.itertuples():
    filename = get_glob_file(f"{SHEETS_OUTCOMES_DIR}/{row.index}*/行き先がないID.csv")
    unit = pd.read_csv(filename,encoding="utf_8_sig")
    unit["l1_index"]=row.index
    df= pd.concat([df,unit.loc[:,["H28ID","理由・コメント","l1_index"]]])

df = df.dropna(subset=["H28ID"])    
df.to_csv(f"{OUTPUT_DIR}/deleted_or_moved.csv",encoding="utf_8_sig",quoting=csv.QUOTE_NONNUMERIC,index=False)
print(f"output... ./output/deleted_or_moved.csv")



output... ./output/deleted_or_moved.csv


In [27]:
import pandas as pd
import re
import os
from lib.utils import BASE_DIR,SHEETS_GENERAL_DIR,OUTPUT_DIR

raw=pd.read_csv(f"{SHEETS_GENERAL_DIR}/H28/H28.csv", index_col=0)
raw


data=pd.DataFrame([])
data["id1"]=raw["第1層（大項目）"].str.extract(r"^(.)")
data["text1"]=raw["第1層（大項目）"].str.extract(r"^. ?(.+)")
data["id2"]=raw["第2層（中項目）"].str.extract(r"^.\-(\d+)")
data["id2"]=data["id1"]+"-"+data["id2"].str.zfill(2)
data["text2"]=raw["第2層（中項目）"].str.extract(r"^.\-\d+ (.+)")
data["id3"]=raw["第3層（小項目）"].str.extract(r"^.\-\d+\-(\d+)")
data["id3"]=data["id2"]+"-"+data["id3"].str.zfill(2)
data["text3"]=raw["第3層（小項目）"].str.extract(r"^.\-\d+\-\d+\) (.+)")
raw["id3"]=data["id3"]

id4_list=[]
text4_list=[]
current_parent=""
prev_text=""
current_index=0
for index,row in raw.iterrows():
  text=row["第4層（細小項目）"]
  parent=row["id3"]
  if parent!= current_parent:
    current_index=0
    prev_text=""
  if prev_text!= text:
    current_index=current_index+1
  current_parent=parent
  prev_text=text
  if text=="なし":
    id4_list.append(f"{parent}-na")
    text4_list.append(text)
  else:
    id4_list.append(f"{parent}-{str(current_index).zfill(2)}")
    text4_list.append(re.sub(r"^.\-\d+\-\d+\)\-\(\d+\) ","",str(text)))

data["id4"]=id4_list
data["text4"]=text4_list
raw["id4"]=data["id4"]

id5_list=[]
text5_list=[]
current_parent=""
prev_text=""
current_index=0
for index,row in raw.iterrows():
  text=row["第5層（学修目標）"]
  parent=row["id4"]
  if parent!= current_parent:
    current_index=0
    prev_text=""
  if prev_text!= text:
    current_index=current_index+1
  current_parent=parent
  prev_text=text
  if text=="なし":
    id5_list.append(f"{parent}-na")
    text5_list.append(text)
  else:
    id5_list.append(f"{parent}-{str(current_index).zfill(2)}")
    item_text=re.sub(r"^([.０-９0-9]{1,2})( |\.|．)","",str(text))
    item_text=re.sub(r"^[①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮⑯⑰⑱⑲⑳㉑㉒㉓㉔㉕㉖]","",str(item_text))
    text5_list.append(item_text)

data["id5"]=id5_list
data["text5"]=text5_list

distdir=f"{OUTPUT_DIR}/2016"
os.makedirs(distdir,exist_ok=True)
data.to_csv(f"{distdir}/goals.csv", encoding = "utf_8_sig", index=False)
data

Unnamed: 0_level_0,id1,text1,id2,text2,id3,text3,id4,text4,id5,text5
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,A,医師として求められる基本的な資質・能力,A-01,プロフェッショナリズム,A-01-01,医の倫理と生命倫理,A-01-01-na,なし,A-01-01-na-01,医学・医療の歴史的な流れとその意味を概説できる。
2.0,A,医師として求められる基本的な資質・能力,A-01,プロフェッショナリズム,A-01-01,医の倫理と生命倫理,A-01-01-na,なし,A-01-01-na-02,臨床倫理や生と死に関わる倫理的問題を概説できる。
3.0,A,医師として求められる基本的な資質・能力,A-01,プロフェッショナリズム,A-01-01,医の倫理と生命倫理,A-01-01-na,なし,A-01-01-na-03,ヒポクラテスの誓い、ジュネーブ宣言、医師の職業倫理指針、医師憲章等医療の倫理に関する規範を概...
4.0,A,医師として求められる基本的な資質・能力,A-01,プロフェッショナリズム,A-01-02,患者中心の視点,A-01-02-na,なし,A-01-02-na-01,リスボン宣言等に示された患者の基本的権利を説明できる。
5.0,A,医師として求められる基本的な資質・能力,A-01,プロフェッショナリズム,A-01-02,患者中心の視点,A-01-02-na,なし,A-01-02-na-02,患者の自己決定権の意義を説明できる。
...,...,...,...,...,...,...,...,...,...,...
2002.0,G,臨床実習,G-04,診療科臨床実習,G-04-04,シミュレーション教育,G-04-04-na,なし,G-04-04-na-01,シミュレータを用いて反復練習をすることで、臨床技能を磨く。
2003.0,G,臨床実習,G-04,診療科臨床実習,G-04-04,シミュレーション教育,G-04-04-na,なし,G-04-04-na-02,模擬患者の協力を得て、臨床技能（コミュニケーションスキルを含む）や医療者に求められる態度を身...
2004.0,G,臨床実習,G-04,診療科臨床実習,G-04-04,シミュレーション教育,G-04-04-na,なし,G-04-04-na-03,シナリオを用いたトレーニングを通して、状況判断、意思決定能力を獲得する。
2005.0,G,臨床実習,G-04,診療科臨床実習,G-04-04,シミュレーション教育,G-04-04-na,なし,G-04-04-na-04,チームトレーニングによって、チーム医療の実践能力を高める。
