In [1]:
from sapiopylib.rest.User import SapioUser
from sapiopylib.rest.DataRecordManagerService import DataRecordPojo
from sapiopylib.rest.DataRecordManagerService import DataRecordPojoPageCriteria
from sapiopylib.rest.DataMgmtService import DataMgmtServer
from sapiopylib.rest.DataRecordManagerService import DataRecordPojoHierarchyPageCriteria

user = SapioUser(url="https://linux-vm:8443/webservice/api",
                               guid="60916c2e-fe12-4d71-b7e6-f5a9e9b4d282", account_name="sapio",
                               username="pyRestTest", password="password1", verify_ssl_cert=False)
dataRecordManager = DataMgmtServer.get_data_record_manager(user)

We just created a new connection! In this case, we are not using api-token but rather we are specifying the guid, username, password instead.

Now let's try to query for record ID = 1

We set Verify SSL to false because it's a SELF SIGNED CERT.

In [2]:
result = dataRecordManager.query_data_records_by_id("Directory", [1])
if len(result.result_list) > 0:
    result_list: list = result.result_list
    element: DataRecordPojo = result_list[0]
    print(element.get_record_id())
    print(element.get_fields())

1
{'CreatedBy': 'exemplar', 'DataRecordName': 'Root Directory', 'RelatedRecord3': None, 'RelatedRecord111': None, 'RelatedRecord110': None, 'VeloxLastModifiedDate': 1660833587844, 'VeloxLastModifiedBy': 'yqiao', 'DateCreated': 1659019678095, 'RecordId': 1, 'DirectoryName': 'Root Directory'}




In example above, we query a directory whose record ID is 1 (which by the way is the root directory. We print the record ID and the fields of the record.
Alternatively, you can direct print a DataRecordPojo object or a page's result directly. It will show the data record name of these data records.


In [3]:
result = dataRecordManager.query_data_records("Sample", "SampleId", ["000007"])
print(result)

000007 export_COVID19 samples 23_04_20_ST3_COVID19_ICU_003_A...




Paging is available. In the next example we are going to get samples, 10 samples per page for 3 pages.

In [4]:
next_page = DataRecordPojoPageCriteria(page_size=10)
for i in range(3):
    result = dataRecordManager.query_all_records_of_type("Sample", next_page)
    next_page = result.next_page_criteria
    print("Page " + str(i + 1) + ": " + str(result))
    if not result.is_next_page_available:
        break

Page 1: 000004, 000005, 000006, 000007 export_COVID19 samples 23_04_20_ST3_COVID19_ICU_003_A..., PyWebDemo-13, PyWebDemo-14, PyWebDemo-15, PyWebDemo-16, PyWebDemo-19, PyWebDemo-20[More Pages...]
Page 2: PyWebDemo-23, PyWebDemo-24, 000008, 000009, 000010, 000011, 000012




query_system_for_record returns either the DataRecordPojo when it finds a record, or a "None" object if it's not found.

In [5]:
result = dataRecordManager.query_system_for_record('Directory', 1)
print(result)
result = dataRecordManager.query_system_for_record('Directory', 1000000)
print(result)

Root Directory
None




You can get parents and children of the records.
The easiest form is to take a single record and get children of that.

In this example, we find one FCS file and try to get its parent sample.

In [6]:
fcs_result = dataRecordManager.query_all_records_of_type("FCSFile", DataRecordPojoPageCriteria(page_size=1))
if len(fcs_result.result_list) > 0:
    fcs_record = fcs_result.result_list[0]
    parents = dataRecordManager.get_parents(fcs_record.get_record_id(), "FCSFile", "Sample")
    if len(parents.result_list) > 0:
        parent_sample = parents.result_list[0]
        print(parent_sample)

000004




In a slightly more complex case, we will be using a batch call to retrieve parents of multiple FCS children at once.

It is often much faster to use batch call than calling records in a loop. This will reduce the HTTP and remote overhead.

The result object is slightly different. To demonstrate paging in this situation, we will try to have a page of 10 on parents, for first five pages of parents for first 100 FCS records.
Note: Since each FCS record has a parent, this will not complete the entire query. The remaining pages will simply never be computed nor returned here.

In [2]:
fcs_result = dataRecordManager.query_all_records_of_type("FCSFile", DataRecordPojoPageCriteria(page_size=100))
samples = list()
if len(fcs_result.result_list) > 0:
    fcs_record_id_list = [x.get_record_id() for x in fcs_result.result_list]
    fcs_record_by_record_id = dict((x.get_record_id(), x) for x in fcs_result.result_list)
    next_parent_page_criteria = DataRecordPojoHierarchyPageCriteria(page_size=10)
    for i in range(5):
        print("Page " + str(i + 1))
        parents = dataRecordManager.get_parents_list(fcs_record_id_list, "FCSFile", "Sample", next_parent_page_criteria)
        next_parent_page_criteria = parents.next_page_criteria
        for source_record_id, result_records in parents.result_map.items():
            if result_records is None or len(result_records) == 0:
                continue
            fcs_record = fcs_record_by_record_id.get(source_record_id)
            samples.extend(result_records)
            if fcs_record is not None:
                print(str(fcs_record) + " -> [" + ', '.join([str(x) for x in result_records]) + "]")
        if not parents.is_next_page_available:
            break

Page 1
export_COVID19 samples 21_04_20_ST3_COVID19_ICU_031_A ST3 21... -> [000004]
export_COVID19 samples 23_04_20_ST3_COVID19_ICU_025_A ST3 23... -> [000005]
export_COVID19 samples 23_04_20_ST3_COVID19_ICU_003_A ST3 23... -> [000006]
export_COVID19 samples 23_04_20_ST3_COVID19_ICU_003_A ST3 23... -> [000007 export_COVID19 samples 23_04_20_ST3_COVID19_ICU_003_A...]




You can translate data in a list of data records into a pandas data frame, which can be used by external tools!

In [4]:
import pandas as pd
pd.set_option('display.max_columns', 100)
samples_data_frame = dataRecordManager.get_data_frame(samples)
display(samples_data_frame)

Unnamed: 0,ObservedCondition,MultiParentLink234,OtherSampleId,ActiveWorkflowId,SacrificedDate,RelatedNotebookExperiment,RelatedRecord170,SampleId,StorageLocationBarcode,Organism,CreatedBy,RelatedRecord146,CollectionDateTime,ElnbNumber,IsControl,PercentRecoveries,DataRecordName,Volume,ExemplarSampleType,RelatedChild123,ControlType,RelatedRecord181,TotalMass,ConcentrationUnits,RelatedRecord180,CollectionDate,RelatedRecord112,RelatedRecord111,RelatedRecord199,RelatedRecord110,TubeBarcode,DateCreated,MultiParentLink176,RelatedRecord119,IsPooled,RowPosition,ColPosition,VeloxLastModifiedBy,RecordId,ExemplarSampleStatus,MultiParentLink247,VolumeUnits,RelatedRecord168,RelatedRecord200,Comments,ContainerType,PreviousExemplarStatus,TimePoint,Concentration,Flags,RelatedPlate,MultiParentLink243,RelatedRecord208,StorageUnitPath,SapioSingleTemplateProcess,LastEditedBy,AssayType,VeloxLastModifiedDate,PlateId,RelatedChild149,TatProgressStatus,IsInvalid
0,,,,,,,,4,,,sdmsapi,,,,False,,000004,,,,,,,ng/uL,,,,,,,,1663688244010,,,False,,,sdmsapi,130,,,,,,,,,,,,,,,,,,,1663688244522,,,,False
1,,,,,,,,5,,,sdmsapi,,,,False,,000005,,,,,,,ng/uL,,,,,,,,1663688248670,,,False,,,sdmsapi,132,,,,,,,,,,,,,,,,,,,1663688248737,,,,False
2,,,,,,,,6,,,sdmsapi,,,,False,,000006,,Flow Cytometry,,,,,ng/uL,,,,,,,,1663688707312,,,False,,,sdmsapi,169,,,,,,,,,,,,,,,,,,,1663688707708,,,,False
3,,,export_COVID19 samples 23_04_20_ST3_COVID19_IC...,,,,,7,,,sdmsapi,,,,False,,000007 export_COVID19 samples 23_04_20_ST3_COV...,,Flow Cytometry,,,,,ng/uL,,,,,,,,1663693730249,,,False,,,sdmsapi,171,,,,,,,,,,,,,,,,,,,1663693730582,,,,False
