In [1]:
from jinja2 import Template, Environment, FileSystemLoader
import os
import json

env = Environment(loader=FileSystemLoader("./templates"))
#template = env.get_template("page.tmpl")
#print(template.render())

# 0. Retrieving the raw json file for profiles

In [2]:
!wget https://github.com/BioSchemas/bioschemas-dde/raw/main/bioschemas.json 

--2025-02-18 11:36:29--  https://github.com/BioSchemas/bioschemas-dde/raw/main/bioschemas.json
Résolution de github.com (github.com)… 140.82.121.4
Connexion à github.com (github.com)|140.82.121.4|:443… connecté.
requête HTTP transmise, en attente de la réponse… 302 Found
Emplacement : https://raw.githubusercontent.com/BioSchemas/bioschemas-dde/main/bioschemas.json [suivant]
--2025-02-18 11:36:29--  https://raw.githubusercontent.com/BioSchemas/bioschemas-dde/main/bioschemas.json
Résolution de raw.githubusercontent.com (raw.githubusercontent.com)… 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connexion à raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443… connecté.
requête HTTP transmise, en attente de la réponse… 200 OK
Taille : 305680 (299K) [text/plain]
Sauvegarde en : « bioschemas.json.1 »


2025-02-18 11:36:30 (11,5 MB/s) — « bioschemas.json.1 » sauvegardé [305680/305680]



# 1. Generating one markdown document per profile

In [3]:
cross_domain_profiles = ["ComputationalTool", "ComputationalWorkflow", "FormalParameter", 
                         "Dataset", "DataCatalog", "Course", "CourseInstance", "TrainingMaterial"]

In [4]:
with open("./bioschemas.json") as file:
    profiles_dump = json.load(file)

In [5]:
profiles_dump

{'@context': {'schema': 'http://schema.org/',
  'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
  'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
  'owl': 'http://www.w3.org/2002/07/owl/',
  'bioschemas': 'https://discovery.biothings.io/view/bioschemas/',
  'bioschemasdrafts': 'https://discovery.biothings.io/view/bioschemasdrafts/',
  'bioschemastypes': 'https://discovery.biothings.io/view/bioschemastypes/',
  'bioschemastypesdrafts': 'https://discovery.biothings.io/view/bioschemastypesdrafts/',
  'bioschemasdeprecated': 'https://discovery.biothings.io/view/bioschemasdeprecated/',
  'dct': 'http://purl.org/dc/terms/',
  '@dateModified': '05/01/2023, 09:41:23',
  'dwc': 'http://rs.tdwg.org/dwc/terms/'},
 '@graph': [{'@id': 'bioschemas:ChemicalSubstance',
   '@type': 'rdfs:Class',
   'rdfs:comment': "This profile describes a ChemicalSubstance which is 'a portion of matter of constant composition, composed of molecular entities of the same type or of different types' (source: ChEBI

In [6]:
def list_profiles(data):
    profiles = []
    for p in data["@graph"]:
        if p["rdfs:label"] in cross_domain_profiles:
            my_prof = {} 
            my_prof["name"] = p["rdfs:label"]
            my_prof["description"] = p["rdfs:comment"]
            my_prof["required"] = p["$validation"]["required"]
            my_prof["recommended"] = p["$validation"]["recommended"]
            my_prof["optional"] = p["$validation"]["optional"]
            my_prof["properties"] = []
            
            for prop in p["$validation"]["properties"].keys():
                #print(f'{prop} : {p["$validation"]["properties"][prop]["description"]}')
                desc = p["$validation"]["properties"][prop]["description"]
                desc = desc.replace('\n', ' ').replace('\r', '')
                
                url = None
                if prop not in ["conformsTo"]: 
                    url = "https://schema.org/"+prop
                
                if "owl:cardinality" in p["$validation"]["properties"][prop].keys():
                    card = p["$validation"]["properties"][prop]["owl:cardinality"]
                    
                my_prof["properties"].append({"name": prop, 
                                              "url": url,
                                              "description": desc, 
                                              "card": card})
            profiles.append(my_prof)                
    return profiles

In [7]:
all_profiles = list_profiles(profiles_dump)

In [8]:
#profiles
print(json.dumps(all_profiles, indent=True))

[
 {
  "name": "FormalParameter",
  "description": "Bioschemas specification for describing a formal parameter in the Life Sciences. Version: 1.0-RELEASE (09 March 2021) ",
  "required": [
   "name",
   "conformsTo"
  ],
  "recommended": [
   "additionalType",
   "description",
   "encodingFormat"
  ],
  "optional": [
   "defaultValue",
   "identifier",
   "valueRequired"
  ],
  "properties": [
   {
    "name": "defaultValue",
    "url": "https://schema.org/defaultValue",
    "description": "The default value for the FormalParameter. This is commonly only used for Inputs.",
    "card": "one"
   },
   {
    "name": "valueRequired",
    "url": "https://schema.org/valueRequired",
    "description": "If the FormalParameter must be specified. This is commonly only used for Inputs",
    "card": "one"
   },
   {
    "name": "encodingFormat",
    "url": "https://schema.org/encodingFormat",
    "description": "URLs to accepted formats.  It is strongly recommented that this be specified. If it i

In [9]:
for profile in all_profiles:
    single_profile_tmpl = env.get_template("single_profile.tmpl")
    output = single_profile_tmpl.render(name = profile["name"], 
                                        description = profile["description"], 
                                        required = profile["required"], 
                                        recommended = profile["recommended"],
                                        optional = profile["optional"], 
                                        all_properties = profile["properties"]
                                       )
    #print(json.dumps(profile["properties"]))
    #print(output)
    with open(f"../docs/profiles/{profile['name']}.md", 'w') as out_file:
        out_file.writelines(output)