---

### for me

 - [YAML Cheatsheet](https://quickref.me/yaml.html)
 - [YAML Viewer](https://jsonformatter.org/yaml-viewer)

# Notes

 - provide links and URIs wherever possible
 - _extended_ YAML with inline links in Markdown style [text](https://www.example.org)
 - 

# Feedback

 - the more you annotate _what_ the information in the guides _is_ (types, meta-info), the more information we can extract and potentially link together with the Datahub
 - try to keep structures _across_ guides of different levels _and_ pieces of information as uniform as possible
   -> makes info _predictable_ for others and allows to link more into the Datahub
 - 

# Problems

 - which URIs?
   -> definitely the Datahub ones _at some point_ but those aren't ready yet
  - which thesaurus/thesauri?

# Thoughts, Ideas

 - link directly to other search guides (across levels)
 - link directly to example objects that the current guide pertains to (is examplary of)
   -> this serves the "points of acces"/"portals" motivation of the research guides

---

# YAML to Markdown Parsing - custom-built

In [115]:
from datetime import datetime

class ResearchAid:
    levels = [Level0, Level1, Level2, Level3]

    def __init__(self, yml, raise_parsing_error=False):
        self.level_id = int(yml["Level"])
        self.title = yml["Title"]
        self.author = "Wiebe Reints (@wreints)"
        self.time = datetime.today().strftime("%Y-%m-%d")
        try:
            self.level = self.levels[self.level_id](yml)
            self._parsed = True
        except Exception as e:
            self._parsed = False
            self._error_msg = e
            if raise_parsing_error:
                raise e

    def __call__(self):
        if not self._parsed:
            return None
        return f"""
_This is a level {self.level_id} Research Aid_  
_author: {self.author}_  
_last edited: {self.time}_  

# {self.title}

{self.level()}
        """

    
    def get_markdown_content(self, yml):
        if not (yml["content-type"] == 'text/markdown'):
            raise ValueError(f"only Markdown can be interpreted as markdown! got {yml}")
        return yml["content"]

    
    def parse_related_dict(self, yml):
        # key = next(yml.keys())
        # value_dict = next(yml.values())
        item_title, item_value_dict = tuple(yml.items())[0]
        rel_type = item_value_dict["rel_type"]
        if rel_type.lower()  == "see also":
            return f"_see also: [{item_title}]({item_value_dict["link"]})_  \n"
        elif rel_type == "broader":
            return f"_broader: [{item_title}]({item_value_dict["link"]})_  \n"
        elif rel_type == "narrower":
            return f"_narrower: [{item_title}]({item_value_dict["link"]})_  \n"
        else:
            print(rel_type.lower())
            raise ValueError(f"{item_value_dict}")

    
    def parse_anything(self, yml, result_md="", level=0):
        if isinstance(yml, (str, int, float, bool)):
            return result_md + yml
        elif isinstance(yml, list):
            ls_md = ", ".join(self.parse_anything(x) for x in yml)
            return result_md + "\n" + ls_md + "\n"
        elif isinstance(yml, dict):
            dict_md = ",\n  ".join(self.parse_anything(v, 
                                                    result_md=self.parse_anything(k, result_md="")+":\n  "
                                                   ) for k, v in yml.items())
            return result_md + dict_md
    


class Level0(ResearchAid):
    def __init__(self, yml):
        self.sub = yml["Subtitle"]
        self.main_text = self.get_markdown_content(yml["Content"])
        self.breakdown = yml["Breakdown"]


    def parse_topic(self, yml):
        # print( "---", yml, "---")
        if isinstance(yml, list): return ""
        # item_title, item_fields = tuple(yml.items())[0]
        # if item_fields["rel_type"] == "see also":
        #     md = f"""_see also {item_title} ({item_fields["link"]})_
        #     """
        # else:
        #     raise ValueError(f"{item_fields}")
        md = self.parse_related_dict(yml)

        subtopics = item_fields.get("Subtopics", None)
        if subtopics:
            md +=self.parse_topic(subtopics)
        return md
    
        
    def parse_breakdown(self, yml):
        for title, ls in yml.items():
            md = f"""### {title}
            """
            for d in ls:
                md += self.parse_topic(d)
            yield md
                
    
    def __call__(self):
        breakdown = "\n".join(self.parse_breakdown(self.breakdown))
        return f"""
        ## {self.sub}

        {self.main_text}

        {breakdown}
        """
    
class Level1(ResearchAid):
    def __init__(self, yml):
        self.abstract = yml["Abstract"]
        self.main_text = self.get_markdown_content(yml["Main-text"])
        self.related_aids = self.parse_related_aids(yml["RelatedAides"])

    def parse_related_aids(self, yml):
        md = ""
        for d in yml:
            md += self.parse_related_dict(d)
        return md

    def __call__(self):
        return f"""
## Abstract

{self.abstract}

{self.main_text}

## Related Aids

{self.related_aids}
"""
    
class Level2(Level1):
    def __init__(self, yml):
        self.abstract = yml["Abstract"]
        self.main_text = self.get_markdown_content(yml["Main-text"])
        self.related_aids = self.parse_related_aids(yml["RelatedAides"])
        self.relevant_data = self.parse_anything(yml["Relevant data"])
        self.sources = self.parse_sources(yml["Sources"])

    def parse_source_links(self, yml):
        for d in yml:
            for k, v in d.items():
                if k == "ISBN":
                    yield f"[ISBN {v}](https://isbnsearch.org/isbn/{v})"
                elif k == "ISSN":
                    yield f"[ISBN {v}](https://portal.issn.org/resource/ISSN/{v})"
                elif k == "OCLC":
                    yield f"[WorldCat {v}](https://search.worldcat.org/title/{v})"
                elif k == 'Google Books ID':
                    yield f"[Google Books ID {v}](https://books.google.nl/books?id={v})"
                elif k == "DOI":
                    yield f"DOI: {v}"
                elif k == "URL":
                    yield f"[{v}]({v})"
                else:
                    raise ValueError(f"link dict {d} unknown")
            
    
    def parse_sources(self, yml):
        md = ""
        for source_lvl, source_ls in yml.items():
            md += f"## {source_lvl}\n\n"
            for source in source_ls:
                'Type of source', 'Name', 'Link', 'Description and remarks'
                source_md = f"**{source['Type of source']}**: {source['Name']}"
                # links_md = ", ".join([f"{v} (_{k}_)" for d in source['Link'] for k, v in d.items()])
                links_md = ", ".join(self.parse_source_links(source['Link']))
                # links_md = "(" + links_md + ")"
                md += f"{source_md}  \n{links_md}  \n"
                if"Description and remarks" in source:
                    md+= f"_{source["Description and remarks"]}_  \n\n"
        return md
                
    def __call__(self):
        md = super().__call__()
        return md+f"""{self.sources}

---
## Relevant Data 
{self.relevant_data}"""
            

class Level3(Level2):
    """
    Level 3 and Level 2 have the same keys.
    """
    def __init__(self, yml):
        super().__init__(yml)
        # self.abstract = yml["Abstract"]
        # self.main_text = self.get_markdown_content(yml["Main-text"])
        # self.related_aids = self.parse_related_aids(yml["RelatedAides"])

In [116]:
from glob import glob
import yaml
# f0 = "../../research-guides-dev/published/niveau0/English/TopLevel_20240606.yml"
# f1 = "../../research-guides-dev/published/niveau1/English/DoingResearch_20240425.yml"
# f2 = "../../research-guides-dev/published/niveau2/English/Science_20240821.yml"
# f3 = "../../research-guides-dev/published/niveau3/English/WMAmsterdam_20240809.yml"
# yamls = []
# for f in [f0, f1, f2, f3]:
#     with open(f) as handle:
#         yamls.append(yaml.safe_load(handle))


BASE_DIR = "../published"
eng = glob(f"{BASE_DIR}/*/English/*.yml")
dutch = glob(f"{BASE_DIR}/*/Dutch/*.yml")
# top = glob(f"{BASE_DIR}/TopLevel/*.yml")

yaml_files = sorted(dutch + eng)

In [117]:
for filename in yaml_files:
    with open(filename) as handle:
        yml = yaml.safe_load(handle)

        try:
            ResearchAid(yml, raise_parsing_error=True)
        except KeyError as e:
            print(filename, e)

            if "remarks" in str(e):
                print(filename, e)
                raise
            else:
                print(filename, e)

../published/niveau0/Dutch/TopLevel_20240606.yml 'content-type'
../published/niveau0/Dutch/TopLevel_20240606.yml 'content-type'
../published/niveau0/English/TopLevel_20240606.yml 'content-type'
../published/niveau0/English/TopLevel_20240606.yml 'content-type'
../published/niveau2/Dutch/MilitaryAndNavy_20240326.yml 'Link'
../published/niveau2/Dutch/MilitaryAndNavy_20240326.yml 'Link'
../published/niveau2/English/MilitaryAndNavy_20240417.yml 'Link'
../published/niveau2/English/MilitaryAndNavy_20240417.yml 'Link'
../published/niveau3/English/NZG_20240508.yml 'Link'
../published/niveau3/English/NZG_20240508.yml 'Link'


In [113]:
yml

{'Level': 3,
 'Title': 'Groote Koninklijke Bazar',
 'Abstract': 'De Groote Koninklijke Bazar was een warenhuis in Den Haag. De handel werd gerund door Dirk Boer, die in 1825 begon met de verkoop voor Aziatische (kunst)artikelen. In 1843 opende hij zijn winkel aan de Zeestraat.',
 'RelatedAides': [{'Handel': {'link': 'niveau2/Dutch/Handel_20240326.yml',
    'rel_type': 'see also'}},
  {'Koninklijk Kabinet van Zeldzaamheden': {'link': 'niveau3/Dutch/KKZ_20240313.yml',
    'rel_type': 'see also'}},
  {'Wereldmuseum Leiden': {'link': 'niveau3/Dutch/WMLeiden_20240327.yml',
    'rel_type': 'see also'}},
  {'Kunsthandel Van Lier': {'link': 'niveau3/Dutch/KunsthandelVanLier_20240313.yml',
    'rel_type': 'see also'}}],
 'Relevant data': {'Identifiers': ['https://www.wikidata.org/entity/Q13648189',
   'https://hdl.handle.net/20.500.11840/pi56363',
   'https://rkd.nl/nl/explore/artists/349840'],
  'Name variations': ['Grand Bazar Royal',
   'Dirk Aartsz. Boer',
   'D. Boer & Zonen'],
  'Tags': {

In [3]:
for y in yamls:
    print(y.keys())

dict_keys(['Level', 'Title', 'Subtitle', 'Content', 'Breakdown'])
dict_keys(['Level', 'Title', 'Abstract', 'RelatedAides', 'Main-text'])
dict_keys(['Level', 'Title', 'Abstract', 'RelatedAides', 'Relevant data', 'Main-text', 'Sources'])
dict_keys(['Level', 'Title', 'Abstract', 'RelatedAides', 'Relevant data', 'Main-text', 'Sources'])


In [4]:
# print(YAML2MD(yamls[1])())
# yamls[3]["Relevant data"]#["Tags"]
print([s.keys() for s in yamls[2]["Sources"]['Secondary sources']][0], "\n---")
print([s.keys() for s in yamls[3]["Sources"]['Secondary sources']][0])



dict_keys(['Type of source', 'Name', 'Link', 'Description and remarks']) 
---
dict_keys(['Type of source', 'Name', 'Link', 'Description and remarks'])


In [83]:
# print(Level2(yamls[2])())

print(ResearchAid(yamls[0], raise_parsing_error=True)())

KeyError: 'content-type'

In [68]:
yamls[3]

{'Level': 3,
 'Title': 'Wereldmuseum Amsterdam',
 'Abstract': "Today's Wereldmuseum Amsterdam, which before was called the Tropenmuseum, has it's origins in the 19th century. The collection of the Wereldmuseum Amsterdam includes objects from several other museums, including the former Artis Ethnographic Museum.",
 'RelatedAides': [{'Sources': {'link': 'niveau1/English/Sources_20240501.yml',
    'rel_type': 'see also'}},
  {'Trade': {'link': 'niveau2/English/Trade_20240316.yml',
    'rel_type': 'see also'}},
  {'Military and navy': {'link': 'niveau2/English/MilitaryAndNavy_20240417.yml',
    'rel_type': 'see also'}},
  {'Science': {'link': 'niveau2/English/Science_20240821.yml',
    'rel_type': 'see also'}},
  {'Royal Cabinet of Curiosities': {'link': 'niveau3/English/KKZ_20240417.yml',
    'rel_type': 'see also'}},
  {'Artis Ethnographic Museum': {'link': 'niveau3/English/EMArtis_20240712.yml',
    'rel_type': 'see also'}},
  {'Wereldmuseum Berg en Dal': {'link': 'niveau3/English/WMBer