# Data Scientist (DS)

Change `LOCAL_TEST` to True if you want to run the clients locally to test.   
With `LOCAL_TEST = False`, please have your syftbox client running. Installation instructions here https://www.syftbox.net/.

In [None]:
LOCAL_TEST = True

## Some paths and constants 

In [None]:
from pathlib import Path

SYFTBOX_DATASET_NAME = "pima-indians-diabetes-database"

## Log into the data owners' datasites

In [None]:
if LOCAL_TEST:
    from syft_rds.orchestra import setup_rds_server

    print("Running locally!")

    DS = "ds@openmined.org"
    print("DS email: ", DS)

    DO1 = "do1@openmined.org"
    DO2 = "do2@openmined.org"

    ds_stack = setup_rds_server(email=DS, key="flwr", root_dir=Path("."))
    do_client_1 = ds_stack.init_session(host=DO1)
    do_client_2 = ds_stack.init_session(host=DO2)
else:
    import syft_rds as sy
    from syft_core import Client

    DS = Client.load().email
    print("DS email: ", DS)

    DO1 = "flower-test-group-1@openmined.org"
    DO2 = "flower-test-group-2@openmined.org"

    do_client_1 = sy.init_session(host=DO1)
    print("Logged into: ", do_client_1.host)

    do_client_2 = sy.init_session(host=DO2)
    print("Logged into: ", do_client_2.host)

do_clients = [do_client_1, do_client_2]
do_emails = [DO1, DO2]

## Explore the datasets

In [None]:
SYFTBOX_DATASET_NAME

In [None]:
mock_paths = []
for client in do_clients:
    dataset = client.dataset.get(name=SYFTBOX_DATASET_NAME)
    mock_paths.append(dataset.get_mock_path())
    print(f"Client {client.host}'s dataset: \n{dataset}\n")

## Bootstrapping and run `syft_flwr` simulation

In [None]:
SYFT_FLWR_PROJECT_PATH = Path("./EXPERIMENT_NAME")
assert SYFT_FLWR_PROJECT_PATH.exists()

In [None]:
import syft_flwr

try:
    !rm -rf {SYFT_FLWR_PROJECT_PATH / "main.py"}
    syft_flwr.bootstrap(SYFT_FLWR_PROJECT_PATH, aggregator=DS, datasites=do_emails)
    print("Bootstrapped project successfully ✅")
except Exception as e:
    print(e)

## Run `flwr` and `syft_flwr` simulations (optional)

In [None]:
RUN_SIMULATION = True

In [None]:
if RUN_SIMULATION:
    !flwr run {SYFT_FLWR_PROJECT_PATH}

In [None]:
# clean up
!rm -rf {SYFT_FLWR_PROJECT_PATH / "EXPERIMENT_NAME" / "__pycache__"}
!rm -rf weights/

In [None]:
mock_paths

In [None]:
if RUN_SIMULATION:
    print(f"running syft_flwr simulation with mock paths: {mock_paths}")
    syft_flwr.run(SYFT_FLWR_PROJECT_PATH, mock_paths)

## Submit jobs

<img src="./images/dsSendsJobs.png" width="80%" alt="DS Submits Jobs">

In [None]:
# clean up before submitting jobs
!rm -rf {SYFT_FLWR_PROJECT_PATH / "EXPERIMENT_NAME" / "__pycache__"}
!rm -rf {SYFT_FLWR_PROJECT_PATH / "simulation_logs"}
!rm -rf weights/

In [None]:
for client in do_clients:
    print(f"sending job to {client.host}")
    job = client.jobs.submit(
        name="Syft Flower Experiment",
        description="Syft Flower Federated Learning Experiment",
        user_code_path=SYFT_FLWR_PROJECT_PATH,
        dataset_name=SYFTBOX_DATASET_NAME,
        tags=["federated learning", "fl", "syft_flwr", "flwr"],
        entrypoint="main.py",
    )
    print(job)

<img src="./images/dsDoneSubmittingJobs.png" width="40%" alt="DS waits for jobs to be approved">

## DS starts the FL server code

In [None]:
import os

if LOCAL_TEST:
    os.environ["SYFTBOX_CLIENT_CONFIG_PATH"] = str(ds_stack.client.config_path)

os.environ["LOGURU_LEVEL"] = "DEBUG"
os.environ["SYFT_FLWR_MSG_TIMEOUT"] = "60"

!uv run {str(SYFT_FLWR_PROJECT_PATH / "main.py")} --active

By running the FL server code, the DS aggregates the models trained on DOs' private local data into an improved global model

<img src="./images/dsAggregateModels.png" width="30%" alt="DS Aggregates Models">

## DS Observes the Results

Now the DS can monitor the aggregated models trained no DO's private datasets in the `weights` folder