In [None]:
# Load libraries
from aiida_openbis.utils import bisutils

In [None]:
# Connect to openBIS
session = bisutils.log_in(bisurl="openbis", bisuser="admin", bispasswd="changeit")

In [91]:
# Classes
class OpenBISDatabase:
    def __init__(self):
        self.session = None
        self.schema = {
            "spaces": [], 
            "property_types": [], 
            "object_types": [],
            "experiment_types": [],
            "vocabularies": [],
            "dataSetTypes": [],
        }

    def connect_to_openbis(self, bisurl: str, bisuser: str, bispasswd: str):
        self.session = bisutils.log_in(
            bisurl = bisurl, 
            bisuser = bisuser, 
            bispasswd = bispasswd
        )
    
    def extract_spaces(self):
        spaces = self.session.get_spaces()

        for space in spaces:
            space_metadata = {"code": space.code}
            self.schema["spaces"].append(space_metadata)

    def extract_projects(self):
        for index, space in enumerate(self.schema["spaces"]):
            projects = self.session.get_projects(space = space["code"])
            self.schema["spaces"][index]["projects"] = []

            for project in projects:
                project_metadata = {
                    "code": project.code
                }
                self.schema["spaces"][index]["projects"].append(project_metadata)
    
    def extract_experiments(self):
        for index_space, space in enumerate(self.schema["spaces"]):
            for index_project, project in enumerate(space["projects"]):
                experiments = self.session.get_experiments(
                    space = space["code"], 
                    project = project["code"]
                )
                self.schema["spaces"][index_space]["projects"][index_project]["experiments"] = []

                for experiment in experiments:
                    experiment_metadata = {
                        "code": experiment.code, 
                        "type": experiment.type.code,
                        "properties": experiment.props.all()
                    }
                    self.schema["spaces"][index_space]["projects"][index_project]["experiments"].append(experiment_metadata)
                    
    def extract_objects(self):
        for index_space, space in enumerate(self.schema["spaces"]):
            for index_project, project in enumerate(space["projects"]):
                for index_experiment, experiment in enumerate(project["experiments"]):
                    objects = self.session.get_samples(
                        space = space["code"], 
                        experiment = f"/{space['code']}/{project['code']}/{experiment['code']}"
                    )
                    self.schema["spaces"][index_space]["projects"][index_project]["experiments"][index_experiment]["objects"] = []

                    for object in objects:
                        object = self.session.get_object(object.permId) #TODO: Correct this easy fix once the bug of getting parents from items from get_objects() is fixed

                        object_parents_identifiers = []
                        object_children_identifiers = []
                        object_datasets_metadata = []

                        for parent in object.get_parents():
                            object_parents_identifiers.append(parent.identifier)
                        
                        for child in object.get_children():
                            object_children_identifiers.append(child.identifier)

                        object_datasets = object.get_datasets()
                        for dataset in object_datasets:
                            dataset.data["dataStore"]["downloadUrl"] = 'https://openbis/'
                            dataset.download(destination = 'openBIS_datasets')
                            object_datasets_metadata.append({"type": dataset.type.code, "folderpath": f"openBIS_datasets/{dataset.code}"})

                        object_metadata = {
                            "code": object.code, 
                            "type": object.type.code,
                            "properties": object.props.all(),
                            "parents": object_parents_identifiers,
                            "children": object_children_identifiers,
                            "datasets": object_datasets_metadata
                        }
                        self.schema["spaces"][index_space]["projects"][index_project]["experiments"][index_experiment]["objects"].append(object_metadata)
    
    def extract_property_types(self):
        for property_type in self.session.get_property_types():
            property_type_metadata = {
                "code": property_type.code,
                "label": property_type.label,
                "description": property_type.description,
                "data_type": property_type.data_type,
                "vocabulary_id": property_type.vocabulary,
                "metaData": property_type.metaData,
            }
            self.schema["property_types"].append(property_type_metadata)
    
    def extract_object_types(self):
        for object_type in self.session.get_object_types():
            object_properties = object_type.get_property_assignments().df
            object_properties_metadata = []

            for _, object_property in object_properties.iterrows():
                object_properties_metadata.append(
                    {
                    "section": object_property.section,
                    "mandatory": object_property.mandatory,
                    "property_type": object_property.propertyType,
                    }
                )

            object_type_metadata = {
                "code": object_type.code,
                "description": object_type.description,
                "prefix": object_type.generatedCodePrefix,
                "generate_code": object_type.autoGeneratedCode,
                "properties": object_properties_metadata
            }
            self.schema["object_types"].append(object_type_metadata)
    
    def extract_experiment_types(self):
        for experiment_type in self.session.get_experiment_types():
            experiment_properties = experiment_type.get_property_assignments().df
            experiment_properties_metadata = []

            for _, experiment_property in experiment_properties.iterrows():
                experiment_properties_metadata.append(
                    {
                    "section": experiment_property.section,
                    "mandatory": experiment_property.mandatory,
                    "property_type": experiment_property.propertyType,
                    }
                )

            experiment_type_metadata = {
                "code": experiment_type.code,
                "description": experiment_type.description,
                "propertyAssignments": experiment_properties_metadata,
            }
            self.schema["experiment_types"].append(experiment_type_metadata)

    def extract_vocabularies(self):
        for vocabulary in self.session.get_vocabularies():
            
            terms_metadata = []
            terms = vocabulary.get_terms().df
            for _, term in terms.iterrows():
                terms_metadata.append(
                    {
                    "code": term.code,
                    "label": term.label,
                    "description": term.description,
                    "official": term.official,
                    }
                )

            vocabulary_metadata = {
                "code": vocabulary.code,
                "description": vocabulary.description,
                "terms": terms_metadata,
            }
            self.schema["vocabularies"].append(vocabulary_metadata)
    
    def extract_dataset_types(self):
        for dataset_type in self.session.get_dataset_types():
            dataset_type_properties = dataset_type.get_property_assignments().df
            dataset_type_properties_metadata = []

            for _, dataset_type_property in dataset_type_properties.iterrows():
                dataset_type_properties_metadata.append(
                    {
                    "section": dataset_type_property.section,
                    "mandatory": dataset_type_property.mandatory,
                    "property_type": dataset_type_property.propertyType,
                    }
                )

            dataset_type_metadata = {
                "code": dataset_type.code,
                "description": dataset_type.description,
                "propertyAssignments": dataset_type_properties_metadata,
            }
            self.schema["dataSetTypes"].append(dataset_type_metadata)


In [92]:
openbis_database = OpenBISDatabase()
openbis_database.connect_to_openbis("openbis", "admin", "changeit")
openbis_database.extract_spaces()
openbis_database.extract_projects()
openbis_database.extract_experiments()
openbis_database.extract_objects()
openbis_database.extract_property_types()
openbis_database.extract_object_types()
openbis_database.extract_experiment_types()
openbis_database.extract_vocabularies()
openbis_database.extract_dataset_types()
x = openbis_database.schema
x["dataSetTypes"]

[{'code': 'UNKNOWN', 'description': 'Unknown', 'propertyAssignments': []},
 {'code': 'ELN_PREVIEW',
  'description': None,
  'propertyAssignments': [{'section': 'General info',
    'mandatory': False,
    'property_type': '$NAME'},
   {'section': 'Comments', 'mandatory': False, 'property_type': 'NOTES'},
   {'section': None, 'mandatory': False, 'property_type': '$XMLCOMMENTS'}]},
 {'code': 'RAW_DATA',
  'description': None,
  'propertyAssignments': [{'section': 'General info',
    'mandatory': False,
    'property_type': '$NAME'},
   {'section': 'Comments', 'mandatory': False, 'property_type': 'NOTES'},
   {'section': None, 'mandatory': False, 'property_type': '$XMLCOMMENTS'}]},
 {'code': 'PROCESSED_DATA',
  'description': None,
  'propertyAssignments': [{'section': 'General info',
    'mandatory': False,
    'property_type': '$NAME'},
   {'section': 'Comments', 'mandatory': False, 'property_type': 'NOTES'},
   {'section': None, 'mandatory': False, 'property_type': '$XMLCOMMENTS'}]},
 

In [14]:
x["spaces"][10]

{'code': 'INSTITUTIONS',
 'projects': [{'code': 'INSTITUTIONS',
   'experiments': [{'code': 'INSTITUTIONS_EXP_1',
     'type': 'COLLECTION',
     'properties': {'$name': 'Institutions',
      '$default_object_type': 'INSTITUTION',
      '$default_collection_view': None},
     'objects': [{'code': 'INST5',
       'type': 'INSTITUTION',
       'properties': {'$name': 'EMPA', 'address': None},
       'parents': [],
       'children': ['/PEOPLE/PEOPLE/AUTH65',
        '/PEOPLE/PEOPLE/AUTH66',
        '/INSTITUTIONS/ROOMS/ROOM54',
        '/INSTITUTIONS/ROOMS/ROOM55'],
       'datasets': [{'type': 'ELN_PREVIEW',
         'folderpath': 'openBIS_datasets/20231106215919638-116'}]},
      {'code': 'INST6',
       'type': 'INSTITUTION',
       'properties': {'$name': 'Max Planck Institute for Polymer Research',
        'address': None},
       'parents': [],
       'children': ['/PEOPLE/PEOPLE/AUTH67'],
       'datasets': [{'type': 'ELN_PREVIEW',
         'folderpath': 'openBIS_datasets/20231106

In [None]:
session.get_experiments()[0].type.code

In [None]:
for _,sample in enumerate(session.get_samples()):
    sample.get_parents()

In [None]:
spaces_df = session.get_spaces().df

projects = []
for space_code in spaces_df.code:
    projects.append(session.get_projects(space = space_code).df)

projects[5]

In [None]:
session.get_spaces()
session.get_projects()
# session.get_experiments()
# session.get_samples()
# session.get_datasets()
# session.get_sample_type("STM_SIMULATION").get_property_assignments()
# session.get_property_types().df