In [1]:
from p2pfl.node import Node
from p2pfl.learning.pytorch.mnist_examples.mnistfederated_dm import MnistFederatedDM
from p2pfl.learning.pytorch.mnist_examples.models.mlp import MLP
from collections import OrderedDict
import torch
import time


def test_convergence(x):
    n,r = x

    nodes = []
    
    for i in range(n):
        node = Node(MLP(),MnistFederatedDM(), simulation=False)
        node.start()
        nodes.append(node)

    # Node Connection
    for i in range(len(nodes)-1):
        nodes[i+1].connect_to(nodes[i].host,nodes[i].port)
        time.sleep(.1)

    time.sleep(3)     
        
    # Check if they are connected
    for node in nodes:
        assert len(node.neightboors) == n-1

    # Start Learning
    nodes[0].set_start_learning(rounds=r,epochs=0)

    # Wait 4 results
    while True:
        time.sleep(1)
        finish = True
        for f in [node.round is None for node in nodes]:
            finish = finish and f

        if finish:
            break


    # Validamos Modelos obtenidos sean iguales
    model = None
    first = True
    for node in nodes:
        if first:
            model = node.learner.get_parameters()
            first = False
        else:
            for layer in model:
                a = torch.round(model[layer], decimals=2)
                b = torch.round(node.learner.get_parameters()[layer], decimals=2)
                assert torch.eq(a, b).all()

    # Cerrar
    for node in nodes:
        node.stop()
        time.sleep(.1) #Esperar por la asincronía
    
    return nodes
        
nodes = test_convergence((4,2))

Train: 54000 Val:6000 Test:10000
INFO:root:Nodo listening at 127.0.0.1:39085
Train: 54000 Val:6000 Test:10000
INFO:root:Nodo listening at 127.0.0.1:52381
Train: 54000 Val:6000 Test:10000
INFO:root:Nodo listening at 127.0.0.1:35047
Train: 54000 Val:6000 Test:10000
INFO:root:Nodo listening at 127.0.0.1:58765
INFO:root:('127.0.0.1', 39085) Conexión aceptada con 127.0.0.1:52381
INFO:root:('127.0.0.1', 52381) Connected to 127.0.0.1:39085
INFO:root:('127.0.0.1', 35047) Connected to 127.0.0.1:52381
INFO:root:('127.0.0.1', 52381) Conexión aceptada con 127.0.0.1:35047
INFO:root:('127.0.0.1', 35047) Conexión aceptada con 127.0.0.1:39085
INFO:root:('127.0.0.1', 39085) Connected to 127.0.0.1:35047
INFO:root:('127.0.0.1', 35047) Conexión aceptada con 127.0.0.1:58765
INFO:root:('127.0.0.1', 58765) Connected to 127.0.0.1:35047
INFO:root:('127.0.0.1', 58765) Conexión aceptada con 127.0.0.1:39085
INFO:root:('127.0.0.1', 39085) Connected to 127.0.0.1:58765
INFO:root:('127.0.0.1', 52381) Connected to 127

DEBUG:root:Closed connection: ('127.0.0.1', 39085)
DEBUG:root:Closed connection: ('127.0.0.1', 39085)
DEBUG:root:Closed connection: ('127.0.0.1', 39085)
DEBUG:root:Closed connection: ('127.0.0.1', 52381)
DEBUG:root:Closed connection: ('127.0.0.1', 58765)
DEBUG:root:Closed connection: ('127.0.0.1', 35047)
INFO:root:Bajando el nodo, dejando de escuchar en 127.0.0.1 52381 y desconectándose de 2 nodos
DEBUG:root:Closed connection: ('127.0.0.1', 52381)
DEBUG:root:Closed connection: ('127.0.0.1', 52381)
DEBUG:root:Closed connection: ('127.0.0.1', 35047)
DEBUG:root:Closed connection: ('127.0.0.1', 58765)
INFO:root:Bajando el nodo, dejando de escuchar en 127.0.0.1 35047 y desconectándose de 1 nodos
DEBUG:root:Closed connection: ('127.0.0.1', 35047)
DEBUG:root:Closed connection: ('127.0.0.1', 58765)
INFO:root:Bajando el nodo, dejando de escuchar en 127.0.0.1 58765 y desconectándose de 0 nodos


In [2]:
from p2pfl.learning.pytorch.mnist_examples.mnistfederated_dm import MnistFederatedDM
from p2pfl.learning.pytorch.mnist_examples.models.cnn import CNN
from p2pfl.learning.pytorch.mnist_examples.models.mlp import MLP
from p2pfl.node import Node
import pytest
import time
import threading

nodes = []
        
def test_node_down_on_learning(n):

    # Node Creation
    nodes = []
    for i in range(n):
        node = Node(MLP(),MnistFederatedDM())
        node.start()
        nodes.append(node)

    # Node Connection
    for i in range(len(nodes)-1):
        nodes[i+1].connect_to(nodes[i].host,nodes[i].port)
        time.sleep(0.1)

    # Check if they are connected
    for node in nodes:
        assert len(node.neightboors) == n-1

    # Start Learning
    nodes[0].set_start_learning(rounds=2,epochs=0)

    # Stopping node
    time.sleep(0.3)
    print("STPIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIING")
    #nodes[1].stop()
    # Wait 4 results
    while True:
        time.sleep(1)
        finish = True
        for f in [node.round is None for node in nodes]:
            finish = finish and f

        if finish:
            break

    for node in nodes:
        node.stop()


nodes = []
for _ in range(6):
    test_node_down_on_learning(4)
    break


Train: 54000 Val:6000 Test:10000
INFO:root:Nodo listening at 127.0.0.1:48939
Train: 54000 Val:6000 Test:10000
INFO:root:Nodo listening at 127.0.0.1:56855
Train: 54000 Val:6000 Test:10000
INFO:root:Nodo listening at 127.0.0.1:57255
Train: 54000 Val:6000 Test:10000
INFO:root:Nodo listening at 127.0.0.1:38971
INFO:root:('127.0.0.1', 56855) Connected to 127.0.0.1:48939
INFO:root:('127.0.0.1', 48939) Conexión aceptada con 127.0.0.1:56855
INFO:root:('127.0.0.1', 57255) Connected to 127.0.0.1:56855
INFO:root:('127.0.0.1', 56855) Conexión aceptada con 127.0.0.1:57255
INFO:root:('127.0.0.1', 48939) Connected to 127.0.0.1:57255
INFO:root:('127.0.0.1', 57255) Conexión aceptada con 127.0.0.1:48939
INFO:root:('127.0.0.1', 38971) Connected to 127.0.0.1:57255
INFO:root:('127.0.0.1', 57255) Conexión aceptada con 127.0.0.1:38971
INFO:root:('127.0.0.1', 56855) Connected to 127.0.0.1:38971
INFO:root:('127.0.0.1', 48939) Connected to 127.0.0.1:38971
INFO:root:('127.0.0.1', 38971) Conexión aceptada con 127

DEBUG:root:Closed connection: ('127.0.0.1', 56855)
DEBUG:root:Closed connection: ('127.0.0.1', 56855)
DEBUG:root:Closed connection: ('127.0.0.1', 48939)
INFO:root:Bajando el nodo, dejando de escuchar en 127.0.0.1 38971 y desconectándose de 3 nodos
DEBUG:root:Closed connection: ('127.0.0.1', 57255)
DEBUG:root:Closed connection: ('127.0.0.1', 38971)
DEBUG:root:Closed connection: ('127.0.0.1', 57255)
DEBUG:root:Closed connection: ('127.0.0.1', 56855)
DEBUG:root:Closed connection: ('127.0.0.1', 57255)
DEBUG:root:Closed connection: ('127.0.0.1', 38971)
DEBUG:root:Closed connection: ('127.0.0.1', 38971)


In [3]:
import pytest
from p2pfl.base_node import BaseNode
from p2pfl.communication_protocol import CommunicationProtocol
import time
import pytest
from p2pfl.learning.pytorch.mnist_examples.mnistfederated_dm import MnistFederatedDM
from p2pfl.learning.pytorch.mnist_examples.models.mlp import MLP
from p2pfl.node import Node
from p2pfl.settings import Settings


def four_nodes():
    n1 = Node(MLP(),MnistFederatedDM())
    n2 = Node(MLP(),MnistFederatedDM())
    n3 = Node(MLP(),MnistFederatedDM())
    n4 = Node(MLP(),MnistFederatedDM())
    n1.start()
    n2.start()
    n3.start()
    n4.start()

    return n1,n2,n3,n4

def test_gossip_heartbeat():

    n1.connect_to(n2.host,n2.port, full=False)
    n2.connect_to(n3.host,n3.port, full=False)
    n3.connect_to(n4.host,n4.port, full=False)

    time.sleep(1) # Wait for asincronity    
    print(len(n1.heartbeater.get_nodes()) , len(n2.heartbeater.get_nodes()), len(n3.heartbeater.get_nodes()) , len(n4.heartbeater.get_nodes()))

    time.sleep(2) # Wait for asincronity

    print(len(n1.heartbeater.get_nodes()) , len(n2.heartbeater.get_nodes()), len(n3.heartbeater.get_nodes()) , len(n4.heartbeater.get_nodes()))
    assert len(n1.heartbeater.get_nodes()) == len(n2.heartbeater.get_nodes()) == len(n3.heartbeater.get_nodes()) == len(n4.heartbeater.get_nodes()) == 3

    n1.stop()

    time.sleep(Settings.NODE_TIMEOUT+1) 

    assert len(n2.heartbeater.get_nodes()) == len(n3.heartbeater.get_nodes()) == len(n4.heartbeater.get_nodes()) == 2

    # Stop Nodes
    n2.stop()
    n3.stop()
    n4.stop()



n1, n2, n3, n4 = four_nodes()
test_gossip_heartbeat()

Train: 54000 Val:6000 Test:10000
Train: 54000 Val:6000 Test:10000
Train: 54000 Val:6000 Test:10000
Train: 54000 Val:6000 Test:10000
INFO:root:Nodo listening at 127.0.0.1:36049
INFO:root:Nodo listening at 127.0.0.1:44643
INFO:root:Nodo listening at 127.0.0.1:33047
INFO:root:Nodo listening at 127.0.0.1:55151
INFO:root:127.0.0.1:36049 Connected to 127.0.0.1:44643
Nodo conectado---------------------------------------------------------
INFO:root:127.0.0.1:44643 Conexión aceptada con 127.0.0.1:36049
Nodo conectado---------------------------------------------------------
--------Beat received127.0.0.1:44643
--------Beat received127.0.0.1:36049
INFO:root:127.0.0.1:44643 Connected to 127.0.0.1:33047
Nodo conectado---------------------------------------------------------
INFO:root:127.0.0.1:33047 Conexión aceptada con 127.0.0.1:44643
Nodo conectado---------------------------------------------------------
--------Beat received127.0.0.1:33047
--------Beat received127.0.0.1:44643
INFO:root:127.0.0.

In [3]:
assert len(n2.heartbeater.get_nodes()) == len(n3.heartbeater.get_nodes()) == len(n4.heartbeater.get_nodes()) == 2
