In [2]:
# Create RO Crates from BioDT B2Share records
#
# written for the BioDT project https://doi.org/10.3030/101057437
# Oct 2024

!pip install rocrate
!pip install deims

from rocrate.rocrate import ROCrate # tested with rocrate 0.10.0
from rocrate.model.contextentity import ContextEntity
import json
import deims
from urllib.request import urlopen

crate = ROCrate()

# provide general information about the model
model = crate.add(ContextEntity(crate, "grassmind", properties={
    "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"],
    "name": "GRASSMIND",
    "url": "https://www.ufz.de/index.php?en=48445",
    "version": "2.0",
    "programmingLanguage": {"@id": "#c++"},
    "creator": [
        {"@id": "#https://orcid.org/0000-0001-8541-789X"},
        {"@id": "#https://orcid.org/0000-0001-7594-8152"},
    ],
    "dateCreated": "2024-07-10",
    "license": {"@id": "tbd"},
    "input": []
}))

model_creator1 = crate.add(ContextEntity(crate, "Thomas Banitz", properties={
    "@id": "https://orcid.org/0000-0001-8541-789X",
    "creator_name": "Thomas Banitz",
    "family_name": "Banitz",
    "given_name": "Thomas"
}))

model_creator2 = crate.add(ContextEntity(crate, "Franziska Taubert", properties={
    "@id": "https://orcid.org/0000-0001-7594-8152",
    "creator_name": "Franziska Taubert",
    "family_name": "Taubert",
    "given_name": "Franziska"
}))

# query B2Share API for LTER, BioDT and Grassland records
url = "https://b2share.eudat.eu/api/records/?q=keywords.keyword=%27BioDT%20AND%20Grassland%20pDT%27&community=d952913c-451e-4b5c-817e-d578dc8a4469&size=100"
response = urlopen(url)
json_response = json.loads(response.read())
list_of_file_ids = []

for record in json_response['hits']['hits']:

    # deims.id can be used to get any information about site from DEIMS
    deims_id = record["metadata"]["community_specific"]["d2f5457f-6318-494a-b363-8098356035b7"]["metadata_url"]

    list_of_keyword_labels = []
    for keyword in record["metadata"]["keywords"]:
        list_of_keyword_labels.append(keyword["keyword"])

    list_of_related_files = []

    creators = []
    for creator in record["metadata"]["creators"]:
        creator_object = {
            "@id": creator["creator_name"],
            "creator_name": creator["creator_name"],
            "family_name": creator["family_name"],
            "given_name": creator["given_name"]
        }
        creators.append(creator_object)

    deims_site_record = deims.getSiteById(deims_id)

    current_crate = crate.add(ContextEntity(crate, "dataset", properties={
        "name": record["metadata"]["titles"][0]["title"],
        "@id": record["metadata"]["DOI"],
        "description": record["metadata"]["descriptions"][0]["description"],
        "keywords": list_of_keyword_labels,
        "dateCreated": record["created"],
        "variableMeasured": "Grassland Dynamics",
        "spatialCoverage": {
            "@id": deims_id
        },
        "creators": creators,
        "license": record["metadata"]["license"]["license"],
        "hasPart": list_of_related_files
    }))

    list_of_file_ids.append(record["metadata"]["DOI"])

    eLTER = crate.add(ContextEntity(crate, "https://elter-ri.eu/", properties={
        "@type": "Organization",
        "name": "eLTER",
        "url": "https://elter-ri.eu/",
    }))
    current_crate.append_to("organization", eLTER)

    deims_site_record["attributes"]["geographic"]

    coordinates = deims_site_record["attributes"]["geographic"]["coordinates"]
    coordinates = coordinates.split("(")[1].split(")")[0].split(" ")

    place = crate.add(ContextEntity(crate, deims_id, properties={
        "@type": "Place",
        "name": deims_site_record["attributes"]["general"]["siteName"],
        "description": deims_site_record["attributes"]["general"]["abstract"],
        "geo": {
            "@type": "GeoCoordinates",
            "@id": deims_id,
            "lat": coordinates[1],
            "lon": coordinates[0],
        }
    }))
    current_crate.append_to("Place", place)

    for file in record['files']:
        list_of_related_files.append(file['ePIC_PID'])
        current_file = crate.add_file(file['ePIC_PID'], properties={
            "name": file['key'],
            "contentSize": str(file['size']),
        })

    current_crate.append_to("file", current_file)

model["input"] = list_of_file_ids

crate.write("grassland_crate")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [24]:
# visualsierung
