In [15]:
%pip install "syft>=0.8.2.b0,<0.9" -q

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 24.1.2
[notice] To update, run: C:\Users\smart\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
import syft as sy

In [3]:
# Set the host and port for the server
host = "127.0.0.1"  # Localhost
port = 8081  # Choose an appropriate port number

In [4]:
node = sy.orchestra.launch(name="test-domain-1", host=host, port=port, dev_mode=False, reset=True)
domain_client = node.login(email="info@openmined.org", password="changethis")

Starting test-domain-1 server on 127.0.0.1:8081
 Done.


Logged into <test-domain-1: High-side Datasite> as GUEST
Logged into <test-domain-1: High side Datasite> as <info@openmined.org>


In [18]:
print(domain_client)

<DatasiteClient: test-domain-1>


In [5]:
# Set the random seed for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x1402e8cdef0>

In [6]:
# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert the data to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

In [7]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Convert training data to a Syft ActionObject and send it to the domain
train_data = sy.ActionObject.from_obj(X_train)
train_labels = sy.ActionObject.from_obj(y_train)
train_data_domain_obj = train_data.send(domain_client)
train_labels_domain_obj = train_labels.send(domain_client)

# Ensure that the sum of the training data is as expected (sum value will be dataset specific)
assert torch.round(X_train.sum()) == torch.round(train_data_domain_obj.syft_action_data.sum())

In [9]:
# Define the MLP model
class MLP(nn.Module):
    def __init__(self, input_dims, out_dims):
        super().__init__()
        self.out_dims = out_dims
        self.linear1 = nn.Linear(input_dims, 128)
        self.linear2 = nn.Linear(128, out_dims)

    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x

model = MLP(input_dims=X_train.shape[1], out_dims=2)
weights = model.state_dict()

In [10]:
# Convert model weights to a Syft ActionObject and send it to the domain
weights_obj = sy.ActionObject.from_obj(weights)
weights_domain_obj = weights_obj.send(domain_client)

print("Weights domain object:", type(weights_domain_obj), getattr(weights_domain_obj, 'id', 'No ID'))
print("Train data domain object:", type(train_data_domain_obj), getattr(train_data_domain_obj, 'id', 'No ID'))
print("Train labels domain object:", type(train_labels_domain_obj), getattr(train_labels_domain_obj, 'id', 'No ID'))

Weights domain object: <class 'syft.service.action.action_object.AnyActionObject'> b9b74e606c484bfd9a6bed3c0ae57192
Train data domain object: <class 'syft.service.action.action_object.AnyActionObject'> 65b88c1220be467ea27610ff1e362adb
Train labels domain object: <class 'syft.service.action.action_object.AnyActionObject'> ebb7dc93ca8e425db1e6dd8e05577bda


In [11]:
# Define the training function
@sy.syft_function(
    input_policy=sy.ExactMatch(weights=weights_domain_obj.id, data=train_data_domain_obj.id, labels=train_labels_domain_obj.id),
    output_policy=sy.SingleExecutionExactOutput(),
)
def train_mlp(weights, data, labels):
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.utils.data import TensorDataset, DataLoader

    class MLP(nn.Module):
        def __init__(self, input_dims, out_dims):
            super().__init__()
            self.out_dims = out_dims
            self.linear1 = nn.Linear(input_dims, 128)
            self.linear2 = nn.Linear(128, out_dims)

        def forward(self, x):
            x = self.linear1(x)
            x = F.relu(x)
            x = self.linear2(x)
            return x

    # Initialize the model
    model = MLP(input_dims=data.shape[1], out_dims=2)

    # Load weights into the model
    model.load_state_dict(weights)

    # Prepare the dataset and data loader
    dataset = TensorDataset(data, labels)
    loader = DataLoader(dataset, batch_size=4, shuffle=True)

    # Define the optimizer and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # Train the model
    model.train()
    for epoch in range(10):  # 10 epochs for demonstration purposes
        for batch_data, batch_labels in loader:
            optimizer.zero_grad()
            outputs = model(batch_data)
            loss = criterion(outputs, batch_labels)
            loss.backward()
            optimizer.step()

    # Perform a forward pass on the entire dataset to get the output
    model.eval()
    with torch.no_grad():
        output = model(data)

    return model.state_dict(), output

In [12]:
# Ensure that the domain objects are properly created
assert isinstance(weights_domain_obj, sy.ActionObject), "weights_domain_obj is not an ActionObject"
assert isinstance(train_data_domain_obj, sy.ActionObject), "train_data_domain_obj is not an ActionObject"
assert isinstance(train_labels_domain_obj, sy.ActionObject), "train_labels_domain_obj is not an ActionObject"

# Ensure the domain objects have valid IDs
print("weights_domain_obj ID:", getattr(weights_domain_obj, 'id', 'No ID'))
print("train_data_domain_obj ID:", getattr(train_data_domain_obj, 'id', 'No ID'))
print("train_labels_domain_obj ID:", getattr(train_labels_domain_obj, 'id', 'No ID'))

# Register and execute the training function
request = domain_client.code.request_code_execution(train_mlp)
request.approve()

result_ptr = domain_client.code.train_mlp(weights=weights_domain_obj.id, data=train_data_domain_obj.id, labels=train_labels_domain_obj.id)
trained_weights, train_output = result_ptr.get()

weights_domain_obj ID: b9b74e606c484bfd9a6bed3c0ae57192
train_data_domain_obj ID: 65b88c1220be467ea27610ff1e362adb
train_labels_domain_obj ID: ebb7dc93ca8e425db1e6dd8e05577bda
Approving request on change train_mlp for datasite test-domain-1


In [13]:
# Update local model with trained weights
model.load_state_dict(trained_weights)

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    test_output = model(X_test)
    test_predictions = torch.argmax(test_output, dim=1)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, test_predictions)
auc = roc_auc_score(y_test, test_output[:, 1])
precision = precision_score(y_test, test_predictions)
recall = recall_score(y_test, test_predictions)
f1 = f1_score(y_test, test_predictions)
conf_matrix = confusion_matrix(y_test, test_predictions)

print(f"Accuracy: {accuracy}")
print(f"AUC: {auc}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Confusion Matrix:\n{conf_matrix}")

Accuracy: 0.9824561403508771
AUC: 0.996069439895185
Precision: 0.9859154929577465
Recall: 0.9859154929577465
F1 Score: 0.9859154929577465
Confusion Matrix:
[[42  1]
 [ 1 70]]


In [14]:
# Convert metrics to Syft ActionObjects and send them to the domain
accuracy_obj = sy.ActionObject.from_obj(accuracy).send(domain_client)
auc_obj = sy.ActionObject.from_obj(auc).send(domain_client)
precision_obj = sy.ActionObject.from_obj(precision).send(domain_client)
recall_obj = sy.ActionObject.from_obj(recall).send(domain_client)
f1_obj = sy.ActionObject.from_obj(f1).send(domain_client)

# Print the IDs of the sent metrics
print("Accuracy Object ID:", accuracy_obj.id)
print("AUC Object ID:", auc_obj.id)
print("Precision Object ID:", precision_obj.id)
print("Recall Object ID:", recall_obj.id)
print("F1 Score Object ID:", f1_obj.id)

Accuracy Object ID: 488f679faccb4a5aa24ad7f54fa5eabf
AUC Object ID: ab25e0f063164eddbd148d0df1703f4f
Precision Object ID: 5184384143ca4e9fb53f798721766f15
Recall Object ID: 4403ffff1718450bbcc21e7ac0f7a386
F1 Score Object ID: 5884b8c0e5a942f59d6506933b91a41a


In [16]:
# Define a function to print the metrics on the server
@sy.syft_function(
    input_policy=sy.ExactMatch(accuracy=accuracy_obj.id, auc=auc_obj.id, precision=precision_obj.id, recall=recall_obj.id, f1=f1_obj.id),
    output_policy=sy.SingleExecutionExactOutput(),
)
def print_metrics(accuracy, auc, precision, recall, f1):
    print(f"Accuracy: {accuracy}")
    print(f"AUC: {auc}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")

# Register and execute the print function
request = domain_client.code.request_code_execution(print_metrics)
request.approve()

domain_client.code.print_metrics(
    accuracy=accuracy_obj.id,
    auc=auc_obj.id,
    precision=precision_obj.id,
    recall=recall_obj.id,
    f1=f1_obj.id,
)

Approving request on change print_metrics for datasite test-domain-1



**Pointer**

None
