In [2]:
import sys
!{sys.executable} -m pip install decentriq-platform==0.9.0rc1

## Initialize session

In [1]:
#import uuid
#import os
import sys
import decentriq_platform as dq
import decentriq_platform.container as dqc

# Get credentials from file
with open("credentials", "r") as file:
    lines = file.readlines()
    lines = [line.rstrip() for line in lines]
user_email = lines[0]
api_token = lines[1]

client = dq.create_client(user_email, api_token, integrate_with_platform=True)
specs = dq.enclave_specifications.versions([
    "decentriq.driver:v2",
    "decentriq.sql-worker:v2",
    "decentriq.python-ml-worker:v1"
])
auth = client.platform.create_auth_using_decentriq_pki()
session = client.create_session(auth, specs)

# Python DCR
## DCR definition and publishing

In [20]:
import decentriq_platform as dq
import decentriq_platform.sql as dqsql
import decentriq_platform.container as dqc
from decentriq_platform.container.proto import MountPoint

python_builder = dq.DataRoomBuilder(
    "pythonDCR_03-05-2022",
    enclave_specs=specs
)

# Create one data node for each party.
data_node_builder1 = dqsql.TabularDataNodeBuilder(
    "party_a",
    schema=[
        ("id", dqsql.PrimitiveType.INT64, False),
        ("mean radius", dqsql.PrimitiveType.FLOAT64, False),
        ("mean texture", dqsql.PrimitiveType.FLOAT64, False),
        ("mean perimeter", dqsql.PrimitiveType.FLOAT64, False)
    ]
)
data_node_builder1.add_to_builder(
    python_builder,
    authentication=client.platform.decentriq_pki_authentication,
    users=[user_email]
)
data_node_builder2 = dqsql.TabularDataNodeBuilder(
    "party_b",
    schema=[
        ("id", dqsql.PrimitiveType.INT64, False),
        ("y", dqsql.PrimitiveType.FLOAT64, False)
    ]
)
data_node_builder2.add_to_builder(
    python_builder,
    authentication=client.platform.decentriq_pki_authentication,
    users=[user_email]
)

# Create the python computation node.
python_script_filename = "train_script_decentriq.py"
with open(python_script_filename,"rb") as input_script:
    my_script_content_from_file = input_script.read()
script_node1 = dq.StaticContent("python_script", my_script_content_from_file)
python_builder.add_compute_node(script_node1)

training_node = dqc.StaticContainerCompute(
    name="training_node",
    command=["python", "/input/train_script_decentriq.py"],
    mount_points=[
        MountPoint(path="/input/train_script_decentriq.py", dependency="python_script"),
        MountPoint(path="/input/party_a", dependency="party_a"),
        MountPoint(path="/input/party_b", dependency="party_b")
    ],
    output_path="/output",
    enclave_type="decentriq.python-ml-worker",
    include_container_logs_on_error=True
)
python_builder.add_compute_node(training_node)

python_builder.add_user_permission(
    email="alexandros.metsai@ringier.ch",
    authentication_method=client.platform.decentriq_pki_authentication,
    permissions=[
        #dq.Permissions.leaf_crud("party_a"),
        #dq.Permissions.leaf_crud("party_b"),  # no permissions for tabular datasets?
        dq.Permissions.execute_compute("training_node"),
        dq.Permissions.retrieve_published_datasets(),
        dq.Permissions.update_data_room_status(),
        dq.Permissions.retrieve_data_room_status(),
        dq.Permissions.retrieve_data_room(),
        dq.Permissions.retrieve_audit_log()
    ]
)

In [21]:
# Publish Data Clean Room.
data_room = python_builder.build()
python_dcr_id = session.publish_data_room(data_room)
print("DCR is successfully published. DCR ID:", python_dcr_id)

DCR is successfully published. DCR ID: 53ba084d5a2e286ef303f2ecfe0619815d317a18fe7c759c6cb411e6d970a523


### Upload data and store results

In [22]:
# Party A
key = dq.Key()

input_data = dqsql.read_input_csv_file("data/data_party_a.csv", has_header=True, delimiter=",")

dataset_id_1 = dqsql.upload_and_publish_tabular_dataset(
    input_data, 
    key,
    python_dcr_id,
    table = "party_a",
    session = session,
    description = "These are the data of party A",
    validate = True
)

# Get dataset from postgres
client.get_dataset(dataset_id_1)

{'datasetId': 'ebe4df66680831c84bc2b918339a9afb13e450a1bef0470a9ed445aeb0645c91',
 'name': 'party_a',
 'creationDate': '2022-05-03T15:00:18.831Z'}

In [23]:
# Party B
key = dq.Key()

input_data = dqsql.read_input_csv_file("data/data_party_b.csv", has_header=True, delimiter=",")

dataset_id_2 = dqsql.upload_and_publish_tabular_dataset(
    input_data, 
    key,
    python_dcr_id,
    table = "party_b",
    session = session,
    description = "These are the data of party B",
    validate = True
)

# Get dataset from postgres
client.get_dataset(dataset_id_2)

{'datasetId': '7bbf99dfc207aa603d2e0b2293f3dfcc207b827c676b0e176c361487e2d1e479',
 'name': 'party_b',
 'creationDate': '2022-05-03T15:00:27.421Z'}

In [24]:
key = dq.Key()

# Run computation and get results.
raw_result = session.run_computation_and_get_results(python_dcr_id, "training_node")
zip_result = dqc.read_result_as_zipfile(raw_result)


In [52]:
zip_result.extractall(".")