In [None]:
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from secretnote.instrumentation import ProfilingInstrumentor, MermaidExporter

mermaid = MermaidExporter()

resource = Resource(attributes={SERVICE_NAME: "simulation"})
provider = TracerProvider(resource=resource)
provider.add_span_processor(
    BatchSpanProcessor(OTLPSpanExporter(endpoint="localhost:4317", insecure=True)),
)
provider.add_span_processor(
    BatchSpanProcessor(mermaid),
)
trace.set_tracer_provider(provider)

In [None]:
instrumentor = ProfilingInstrumentor()
instrumentor.start()

In [None]:
import secretflow as sf

# In case you have a running secretflow runtime already.
sf.shutdown()

sf.init(["alice", "bob", "carol"], address="local")
alice, bob, carol = sf.PYU("alice"), sf.PYU("bob"), sf.PYU("carol")

In [None]:
import pandas as pd
from sklearn.datasets import load_iris

iris = load_iris(as_frame=True)
data = pd.concat([iris.data, iris.target], axis=1)
data

In [None]:
# Horizontal partitioning.
h_alice, h_bob, h_carol = data.iloc[:40, :], data.iloc[40:100, :], data.iloc[100:, :]

# Save to temporary files.
import tempfile
import os

temp_dir = tempfile.mkdtemp()

h_alice_path = os.path.join(temp_dir, "h_alice.csv")
h_bob_path = os.path.join(temp_dir, "h_bob.csv")
h_carol_path = os.path.join(temp_dir, "h_carol.csv")
h_alice.to_csv(h_alice_path, index=False)
h_bob.to_csv(h_bob_path, index=False)
h_carol.to_csv(h_carol_path, index=False)

In [None]:
h_alice.head(), h_bob.head(), h_carol.head()

In [None]:
# Vertical partitioning.
v_alice, v_bob, v_carol = data.iloc[:, :2], data.iloc[:, 2:4], data.iloc[:, 4:]

# Save to temporary files.
v_alice_path = os.path.join(temp_dir, "v_alice.csv")
v_bob_path = os.path.join(temp_dir, "v_bob.csv")
v_carol_path = os.path.join(temp_dir, "v_carol.csv")
v_alice.to_csv(v_alice_path, index=False)
v_bob.to_csv(v_bob_path, index=False)
v_carol.to_csv(v_carol_path, index=False)

In [None]:
v_alice, v_bob, v_carol

In [None]:
from secretflow.data.horizontal import read_csv as h_read_csv
from secretflow.security.aggregation import SecureAggregator
from secretflow.security.compare import SPUComparator

# The aggregator and comparator are respectively used to aggregate
# or compare data in subsequent data analysis operations.
aggr = SecureAggregator(device=alice, participants=[alice, bob, carol])

spu = sf.SPU(sf.utils.testing.cluster_def(parties=["alice", "bob", "carol"]))
comp = SPUComparator(spu)
hdf = h_read_csv(
    {alice: h_alice_path, bob: h_bob_path, carol: h_carol_path},
    aggregator=aggr,
    comparator=comp,
)

In [None]:
from secretflow.data.vertical import read_csv as v_read_csv

vdf = v_read_csv({alice: v_alice_path, bob: v_bob_path, carol: v_carol_path})

In [None]:
hdf.columns

In [None]:
vdf.columns

In [None]:
print("Horizontal df:\n", hdf.min())
print("\nVertical df:\n", vdf.min())
print("\nPandas:\n", data.min())

In [None]:
hdf.max()

In [None]:
vdf.max()

In [None]:
hdf.mean(numeric_only=True)

In [None]:
vdf.mean(numeric_only=True)

In [None]:
hdf.count()

In [None]:
vdf.count()

In [None]:
hdf_part = hdf[["sepal length (cm)", "target"]]
hdf_part.mean(numeric_only=True)

In [None]:
vdf_part = hdf[["sepal width (cm)", "target"]]
vdf_part.mean(numeric_only=True)

In [None]:
hdf_copy = hdf.copy()
print("Min of target: ", hdf_copy["target"].min()[0])
print("Max of target: ", hdf_copy["target"].max()[0])

In [None]:
# Set target to 1。
hdf_copy["target"] = 1

# You can see that the value of target has become 1.
print("Min of target: ", hdf_copy["target"].min()[0])
print("Max of target: ", hdf_copy["target"].max()[0])

In [None]:
vdf_copy = vdf.copy()
print("Min of sepal width (cm): ", vdf_copy["sepal width (cm)"].min()[0])
print("Max of sepal width (cm): ", vdf_copy["sepal width (cm)"].max()[0])

In [None]:
# Set sepal width (cm) to 20。
vdf_copy["sepal width (cm)"] = 20

# You can see that the value of sepal width (cm) has become 20.
print("Min of sepal width (cm): ", vdf_copy["sepal width (cm)"].min()[0])
print("Max of sepal width (cm): ", vdf_copy["sepal width (cm)"].max()[0])

In [None]:
instrumentor.stop()

In [None]:
print(mermaid.graph())