In [1]:
%cd ../

/Users/sarchey1/paper/FederatedLearning


In [2]:
import warnings
warnings.simplefilter('ignore')

## Chain graph Neural Network Experiment

Before geting into the experiment details, let's review algorithm 1 and the primal and dual updates.

### Algorithm 1

![title](../algorithm1.png)

In [12]:
# %load algorithm/main.py
from sklearn.metrics import mean_squared_error

# from algorithm.penalty import *


from abc import ABC
from joblib import Parallel, delayed
import copy



class OptVar(ABC):
    def __init__(self, vectors):
        self.vectors = vectors
        
    def get_node_vectors(self, node):
        node_vectors = []
        for vec in self.vectors:
            node_vectors.append(vec[node])
        return node_vectors
    
    def set_node_vectors(self, node, node_vectors):
        for i, node_vec in enumerate(node_vectors):
            self.vectors[i][node] = node_vec
            
    def diff(self, var2):
        diff_vec = []
        for i, sub_w in enumerate(self.vectors):
            diff_vec.append(sub_w - var2.vectors[i])
        return OptVar(diff_vec)
    
    def mult(self, num):
        for i, sub_vec in enumerate(self.vectors):
            self.vectors[i] = num * sub_vec
    

class PrimalVar(OptVar):
    def __init__(self, primal_vectors, T_matrix, D):
        super(PrimalVar, self).__init__(primal_vectors)
        self.T_matrix = T_matrix
        self.D = D
            
    def update(self, new_u):
        hat_w = []
        for i, sub_w in enumerate(self.vectors):
            hat_w.append(sub_w - np.dot(self.T_matrix, np.dot(self.D.T, new_u.vectors[i])))
        return OptVar(hat_w)
    
class DualVar(OptVar):
    def __init__(self, dual_vectors, Sigma, D):
        super(DualVar, self).__init__(dual_vectors)
        self.Sigma = Sigma
        self.D = D
            
    def update(self, tilde_w):
        for i, sub_vec in enumerate(self.vectors):
            self.vectors[i] = sub_vec + np.dot(self.Sigma, np.dot(self.D, tilde_w.vectors[i]))

            
def primal_update(hat_w_i, datapoints, i):
    optimizer = datapoints[i]['optimizer']
    res = optimizer.optimize(
        datapoints[i]['features'], 
        datapoints[i]['label'], 
        hat_w_i, 
        datapoints[i]['degree'],
    )
    return res, i
    

            
def algorithm_1(K, D, weight_vec, datapoints, true_labels, samplingset, lambda_lasso, penalty_func_name='norm1', calculate_score=False):
    '''
    :param K: the number of iterations
    :param D: the block incidence matrix
    :param weight_vec: a list containing the edges's weights of the graph
    :param datapoints: a dictionary containing the data of each node in the graph needed for the algorithm 1
    :param true_labels: a list containing the true labels of the nodes
    :param samplingset: the sampling set
    :param lambda_lasso: the parameter lambda
    :param penalty_func_name: the name of the penalty function used in the algorithm

    :return iteration_scores: the mean squared error of the predicted weight vectors in each iteration
    :return new_w: the predicted weigh vectors for each node
    '''

    Sigma = np.diag(np.full(weight_vec.shape, 0.9 / 2))
    '''
    Sigma: the block diagonal matrix Sigma
    '''
    T_matrix = np.diag(np.array((1.0 / (np.sum(abs(D), 0)))).ravel())
    '''
    T_matrix: the block diagonal matrix T
    '''

    if np.linalg.norm(np.dot(Sigma ** 0.5, D).dot(T_matrix ** 0.5), 2) > 1:
        print ('product norm', np.linalg.norm(np.dot(Sigma ** 0.5, D).dot(T_matrix ** 0.5), 2))

    E, N = D.shape
#     m, n = datapoints[0]['features'].shape
    m = len(datapoints[0]['features'])
    n = np.sum(datapoints[0]['features'][0].shape)

    # define the penalty function
    if penalty_func_name == 'norm1':
#         penalty_func = Norm1Pelanty(lambda_lasso, weight_vec, Sigma, n)
        penalty_func = Norm2Pelanty(lambda_lasso, weight_vec, Sigma, n)

    elif penalty_func_name == 'norm2':
        penalty_func = Norm2Pelanty(lambda_lasso, weight_vec, Sigma, n)

    elif penalty_func_name == 'mocha':
        penalty_func = MOCHAPelanty(lambda_lasso, weight_vec, Sigma, n)
   
    elif penalty_func_name == 'sq_norm2':
        penalty_func = SquaredNorm2Pelanty(lambda_lasso, weight_vec, Sigma, n)

    else:
        raise Exception('Invalid penalty name')

    # starting algorithm 1
    model = datapoints[0]['optimizer'].model
    out1, in1 = model.linear1.in_features, model.linear1.out_features
    out2, in2 = model.linear2.in_features, model.linear2.out_features
    
    l1 = np.random.random(in1*out1)
    l2 = np.random.random(in2*out2)
    new_w = PrimalVar([
        np.array([l1 for i in range(N)]), 
        np.array([l2 for i in range(N)]),
    ], T_matrix, D)
    hat_w = np.copy(new_w)
    '''
    new_w: the primal variable of the algorithm 1
    '''
    new_u = DualVar([
        np.array([l1 for i in range(E)]),
        np.array([l2 for i in range(E)]),
    ], Sigma, D)
    '''
    new_u: the dual variable of the algorithm 1
    '''

    iteration_scores = []
    for iterk in range(K):
        if iterk % 2 == 0:
            alg1_score = get_algorithm1_error(datapoints, new_w, samplingset)
            print ('iter:', iterk, alg1_score)
        prev_w = copy.deepcopy(new_w)

        # algorithm 1, line 2
        hat_w = new_w.update(new_u)
        
#         results = Parallel(n_jobs=10)(delayed(primal_update)(hat_w.get_node_vectors(i), datapoints, i)
#                                              for i in samplingset)  
        
#         for res, i in results:
#             new_w.set_node_vectors(i, res)

        for i in samplingset:
            res, i = primal_update(hat_w.get_node_vectors(i), datapoints, i)
            new_w.set_node_vectors(i, res)
 
        for i in range(N):
            if i in samplingset:  # algorithm 1, line 6
#                 optimizer = datapoints[i]['optimizer']
#                 new_w.set_node_vectors(i, optimizer.optimize(
#                     datapoints[i]['features'], 
#                     datapoints[i]['label'], 
#                     hat_w.get_node_vectors(i), 
#                     datapoints[i]['degree'],
#                 ))
                pass
            else:
                new_w.vectors[0][i] = hat_w.vectors[0][i]
                new_w.vectors[1][i] = hat_w.vectors[1][i]

        # algorithm 1, line 9
        tilde_w = new_w.diff(prev_w)
        tilde_w.mult(2)
        new_u.update(tilde_w)
        
        # algorithm 1, line 10
        new_u.vectors[0] = penalty_func.update(new_u.vectors[0])
        new_u.vectors[1] = penalty_func.update(new_u.vectors[1])
        

        # calculate the MSE of the predicted weight vectors
        if calculate_score:
            pass
#             Y_pred = []
#             for i in range(N):
#                 Y_pred.append(np.dot(datapoints[i]['features'], new_w[i]))

#             iteration_scores.append(mean_squared_error(true_labels.reshape(N, m), Y_pred))

    # print (np.max(abs(new_w - prev_w)))
    
    for i in range(N):
        optimizer = datapoints[i]['optimizer']
        
        optimizer.model.linear1.weight.data = torch.from_numpy(
            np.array(new_w.vectors[0][i].reshape(in1, out1), dtype=np.float32))
        optimizer.model.linear2.weight.data = torch.from_numpy(
            np.array(new_w.vectors[1][i].reshape(in2, out2), dtype=np.float32))


    return iteration_scores, new_w, new_u


### Primal Update 

As you see in the algorithm picture, the primal update needs a optimizer operator for the sampling set (line 6). We have implemented the optimizers discussed in the paper, both the logistic loss and squared error loss optimizers implementations with pytorch is available, also we have implemented the squared error loss optimizer using the fixed point equation in the `Networked Linear Regression` section of the paper.  

In [6]:
# %load algorithm/optimizer.py 
import torch
import abc
import numpy as np

from abc import ABC
import torch.nn.functional as F

# The linear model which is implemented by pytorch
class TorchLinearModel(torch.nn.Module):
    def __init__(self, n):
        super(TorchLinearModel, self).__init__()
        self.linear = torch.nn.Linear(n, 1, bias=False)

    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred

# The SimpleLinear which is for MNIST experiment
class SimpleLinear(torch.nn.Module):

    def __init__(self, h1=2048):
        super().__init__()
        self.linear1 = torch.nn.Linear(28*28, h1)
        self.linear2 = torch.nn.Linear(h1, 2)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return F.log_softmax(x, dim=1)


# The abstract optimizer model which should have model, optimizer, and criterion as the input
class Optimizer(ABC):
    def __init__(self, model, optimizer, criterion):
        self.model = model
        self.optimizer = optimizer
        self.criterion = criterion

    @abc.abstractmethod
    def optimize(self, x_data, y_data, old_weight, regularizer_term):
        out1, in1 = self.model.linear1.in_features, self.model.linear1.out_features
        out2, in2 = self.model.linear2.in_features, self.model.linear2.out_features
        
        torch_old_weight0 = torch.from_numpy(np.array(old_weight[0], dtype=np.float32).reshape(in1, out1))
        torch_old_weight1 = torch.from_numpy(np.array(old_weight[1], dtype=np.float32).reshape(in2, out2))
        
        if np.sum(old_weight[0]) == 0:
#             print('hereee ', np.sum(old_weight[0]))
            self.model.linear1.weight.data = torch.tensor(np.array(np.random.rand(in1*out1), dtype=np.float32).reshape(in1, out1))
            self.model.linear2.weight.data = torch.tensor(np.array(np.random.rand(in2*out2), dtype=np.float32).reshape(in2, out2))
        else:
            self.model.linear1.weight.data = torch.tensor(np.array(old_weight[0], dtype=np.float32).reshape(in1, out1))
            self.model.linear2.weight.data = torch.tensor(np.array(old_weight[1], dtype=np.float32).reshape(in2, out2))
            
        for iterinner in range(40):
#         for iterinner in range(30):
            y_pred = self.model(x_data)
#             y_pred = torch.argmax(y_pred, axis=1)
#             loss1 = self.criterion(y_pred.ravel(), y_data.ravel())
            loss1 = self.criterion(y_pred, y_data)
            loss2 = 1 / (2 * regularizer_term) * torch.mean((self.model.linear1.weight.data - torch_old_weight0) ** 2)  # + 10000*torch.mean((model.linear.bias+0.5)**2)#model.linear.weight.norm(2)
            loss3 = 1 / (2 * regularizer_term) * torch.mean((self.model.linear2.weight.data - torch_old_weight1) ** 2)
            loss = loss1 + loss2 + loss3
            loss.backward()
            self.optimizer.step()
            self.optimizer.zero_grad()

#         print('old_weight', old_weight)
#         print('model', self.model.linear1.weight.data.numpy().ravel(), self.model.linear2.weight.data.numpy().ravel())
        return [self.model.linear1.weight.data.numpy().ravel(), 
                self.model.linear2.weight.data.numpy().ravel()]


# The linear model in Networked Linear Regression section of the paper
class LinearModel:
    def __init__(self, degree, features, label):
        mtx1 = 2 * degree * np.dot(features.T, features).astype('float64')
        mtx1 += 1 * np.eye(mtx1.shape[0])
        mtx1_inv = np.linalg.inv(mtx1)

        mtx2 = 2 * degree * np.dot(features.T, label).T

        self.mtx1_inv = mtx1_inv
        self.mtx2 = mtx2

    def forward(self, x):
        mtx2 = x + self.mtx2
        mtx_inv = self.mtx1_inv

        return np.dot(mtx_inv, mtx2)


# The Linear optimizer in Networked Linear Regression section of the paper
class LinearOptimizer(Optimizer):

    def __init__(self, model):
        super(LinearOptimizer, self).__init__(model, None, None)

    def optimize(self, x_data, y_data, old_weight, regularizer_term):
        return self.model.forward(old_weight)


# The Linear optimizer model which is implemented by pytorch
class TorchLinearOptimizer(Optimizer):
    def __init__(self, model, criterion):
#         criterion = torch.nn.MSELoss(reduction='mean')
        optimizer = torch.optim.RMSprop(model.parameters())
        super(TorchLinearOptimizer, self).__init__(model, optimizer, criterion)

    def optimize(self, x_data, y_data, old_weight, regularizer_term):
        return super(TorchLinearOptimizer, self).optimize(x_data, y_data, old_weight, regularizer_term)


# The Logistic optimizer model which is implemented by pytorch
class TorchLogisticOptimizer(Optimizer):
    def __init__(self, model):
        criterion = torch.nn.BCELoss(reduction='mean')
        optimizer = torch.optim.RMSprop(model.parameters())
        super(TorchLogisticOptimizer, self).__init__(model, optimizer, criterion)

    def optimize(self, x_data, y_data, old_weight, regularizer_term):
        return super(TorchLogisticOptimizer, self).optimize(x_data, y_data, old_weight, regularizer_term)


### Dual Update 

As mentioned in the paper, the dual update has a penalty function(line 10) which is either norm1, norm2, or mocha.

In [7]:
# %load algorithm/penalty.py
import abc
import numpy as np

from abc import ABC


# The abstract penalty function which has a function update
class Penalty(ABC):
    def __init__(self, lambda_lasso, weight_vec, Sigma, n):
        self.lambda_lasso = lambda_lasso
        self.weight_vec = weight_vec
        self.Sigma = Sigma

    @abc.abstractmethod
    def update(self, new_u):
        pass


# The norm2 penalty function
class Norm2Pelanty(Penalty):
    def __init__(self, lambda_lasso, weight_vec, Sigma, n):
        super(Norm2Pelanty, self).__init__(lambda_lasso, weight_vec, Sigma, n)
        self.limit = np.array(lambda_lasso * weight_vec)

    def update(self, new_u):
        normalized_u = np.where(np.linalg.norm(new_u, axis=1) >= self.limit)
        new_u[normalized_u] = (new_u[normalized_u].T * self.limit[normalized_u] / np.linalg.norm(new_u[normalized_u], axis=1)).T
        return new_u


# The squared norm2 penalty function
class SquaredNorm2Pelanty(Penalty):
    def __init__(self, lambda_lasso, weight_vec, Sigma, n):
        super(SquaredNorm2Pelanty, self).__init__(lambda_lasso, weight_vec, Sigma, n)
        self.normalize_factor = 1 + np.dot(2 * self.Sigma, 1/(self.lambda_lasso * self.weight_vec))

    def update(self, new_u):
        for i in range(new_u.shape[1]):
            new_u[:, i] /= self.normalize_factor

        return new_u
    
    

# The MOCHA penalty function
class MOCHAPelanty(Penalty):
    def __init__(self, lambda_lasso, weight_vec, Sigma, n):
        super(MOCHAPelanty, self).__init__(lambda_lasso, weight_vec, Sigma, n)
        self.normalize_factor = 1 + np.dot(self.Sigma, 1/(self.lambda_lasso * self.weight_vec))

    def update(self, new_u):
        for i in range(new_u.shape[1]):
            new_u[:, i] /= self.normalize_factor

        return new_u


# The norm1 penalty function
class Norm1Pelanty(Penalty):
    def __init__(self, lambda_lasso, weight_vec, Sigma, n):
        super(Norm1Pelanty, self).__init__(lambda_lasso, weight_vec, Sigma, n)
        self.limit = np.array([np.zeros(n) for i in range(len(weight_vec))])
        for i in range(n):
            self.limit[:, i] = lambda_lasso * weight_vec

    def update(self, new_u):
        normalized_u = np.where(abs(new_u) >= self.limit)
        new_u[normalized_u] = self.limit[normalized_u] * new_u[normalized_u] / abs(new_u[normalized_u])
        return new_u


## Create Chain Graph

Each node $i \in V$ represents a local dataset consisting of $m$ feature vectors $x^{(i,1)}, ... , x^{(i,m)} \in R^n$. The feature vectors are i.i.d. realizations of a standard Gaussian random vector x ∼ N(0,I). The labels $y_1^{(i)}, . . . , y_m^{(i)} \in R$ of the nodes $i \in V$ are generated according to the linear model $y_r^{(i)} = (x^{(i, r)})^T w^{(i)} + \epsilon$, with $\epsilon ∼ N(0,\sigma)$. To learn the weight $w^{(i)}$ ,we apply Algorithm 1 to a training set M obtained by randomly selecting 40% of the nodes.

In [40]:
# from algorithm.optimizer import *
from torch.autograd import Variable
import torchvision
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset, TensorDataset


def get_chain_data(cluster_sizes, eps, W, m=5, n=2, noise_sd=0):
    '''
    :param W: a list containing the weight vectors for each cluster
    :param m, n: shape of features vector for each node
    :param pin: the probability of edges inside each cluster
    :param pout: the probability of edges between the clusters
    :param noise_sd: the standard deviation of the noise for calculating the labels
    
    :return B: adjacency matrix of the graph
    :return weight_vec: a list containing the edges's weights of the graph
    :return true_labels: a list containing the true labels of the nodes
    :return datapoints: a dictionary containing the data of each node in the graph needed for the algorithm 1 
    '''

    N = np.sum(cluster_sizes)
    E = N-1
    '''
    N: total number of nodes
    E: total number of edges
    '''
    
    
    # create B(adjacency matrix) and edges's weights vector(weight_vec) based on the graph G
    B = np.zeros((E, N))
    '''
    B: adjacency matrix of the graph with the shape of E*N
    '''
    weight_vec = np.zeros(E)
    '''
    weight_vec: a list containing the edges's weights of the graph with the shape of E
    '''
    
    cnt = 0
    for i in range(E):
        B[cnt, i] = 1
        B[cnt, i+1] = -1

        weight_vec[cnt] = 1
        if i == cluster_sizes[0] - 1:
            weight_vec[cnt] = eps
        cnt += 1
    
    weight_vec = weight_vec[:cnt]
    B = B[:cnt, :]
    
    # create the data of each node needed for the algorithm 1 
    
    node_degrees = np.array((1.0 / (np.sum(abs(B), 0)))).ravel()
    '''
    node_degrees: a list containing the nodes degree for the alg1 (1/N_i)
    '''
    
    
    transforms = torchvision.transforms.Compose([
                       torchvision.transforms.ToTensor(),
                     ])
    mnist_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms)

    dl = DataLoader(mnist_dataset)

    X = dl.dataset.data # (60000,28, 28)
    y = dl.dataset.targets #(60000)

    # normalize to have 0 ~ 1 range in each pixel

    X = X / 255.0
    
    cluster_nums = [[0, 1], [2, 3]]
    
    cluster_data0 = np.concatenate((np.where(y==cluster_nums[0][0])[0], np.where(y==cluster_nums[0][1])[0]))
    random.shuffle(cluster_data0)
    
    cluster_data1 = np.concatenate((np.where(y==cluster_nums[1][0])[0], np.where(y==cluster_nums[1][1])[0]))
    random.shuffle(cluster_data1)
    cluster_data = [
        cluster_data0,
        cluster_data1, 
    ] 
    feature_size = int(10000//cluster_sizes[0])
    train_size = int(feature_size*0.8)
    
    
    datapoints = {}
    '''
    datapoints: a dictionary containing the data of each node in the graph needed for the algorithm 1,
    which are features, label, degree, and also the optimizer model for each node
    '''
    true_labels = []
    '''
    true_labels: the true labels for the nodes of the graph
    '''
    
    
    cnt = 0
    for i, cluster_size in enumerate(cluster_sizes):
        for j in range(cluster_size):
            features = X[cluster_data[i][j*feature_size:(j+1)*feature_size]][:train_size]
            test_features = X[cluster_data[i][j*feature_size:(j+1)*feature_size]][train_size:]
            '''
            features: the feature vector of node i 
            '''
            label = y[cluster_data[i][j*train_size:(j+1)*feature_size]][:train_size]
            test_label = y[cluster_data[i][j*feature_size:(j+1)*feature_size]][train_size:]
            
            label[label == cluster_nums[i][0]] = 0
            label[label == cluster_nums[i][1]] = 1
            
            test_label[test_label == cluster_nums[i][0]] = 0
            test_label[test_label == cluster_nums[i][1]] = 1
            '''
            label: the label of the node i
            '''
            
            true_labels.append(label)

            model = SimpleLinear(h1=200) 
            criterion = torch.nn.CrossEntropyLoss()
            optimizer = TorchLinearOptimizer(model, criterion)
#             optimizer = TorchLogisticOptimizer(model)
#             features = Variable(torch.from_numpy(features)).to(torch.float32)
#             label = Variable(torch.from_numpy(label)).to(torch.float32) 
      
            '''
            model : the linear model for the node i 
            optimizer : the optimizer model for the node i 
            ''' 
            
            datapoints[cnt] = {
                'features': features,
                'test_features': test_features,
                'degree': node_degrees[cnt],
                'label': label,
                'test_label': test_label,
                'optimizer': optimizer
            }
            cnt += 1

    return B, weight_vec, np.array(true_labels), datapoints




### Compare Results

As the result we compare the MSE of Algorithm 1 with plain linear regression 
and decision tree regression

In [41]:
# %load results/compare_results.py
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error



def get_algorithm1_error(datapoints, predicted_w, samplingset):
    '''
    :param datapoints:  a dictionary containing the data of each node in the graph needed for the algorithm 1
    :param predicted_w: the predicted weigh vectors for each node
    :param samplingset: the sampling set for the algorithm 1

    :return alg1_MSE: the MSE of the algorithm 1 for all the nodes, the samplingset and other nodes (test set)
    '''
    
    true_labels = []
    test_true_labels = []
    pred_labels = []
    test_pred_labels = []
    for i in range(len(datapoints)):
        features = datapoints[i]['features']
        label = datapoints[i]['label'].ravel().detach().numpy()
        true_labels.append(label)
        
        pred_label = datapoints[i]['optimizer'].model(features)
        pred_label = torch.argmax(pred_label, axis=1).ravel().detach().numpy() 
        pred_labels.append(pred_label)
        
        
        test_features = datapoints[i]['test_features']
        test_label = datapoints[i]['test_label'].ravel().detach().numpy()
        test_true_labels.append(test_label)
        
        test_pred_label = datapoints[i]['optimizer'].model(test_features)
        test_pred_label = torch.argmax(test_pred_label, axis=1).ravel().detach().numpy() 
        test_pred_labels.append(test_pred_label)

    pred_labels = np.array(pred_labels).ravel()
    true_labels = np.array(true_labels).ravel()
    
    test_pred_labels = np.array(test_pred_labels).ravel()
    test_true_labels = np.array(test_true_labels).ravel()
    
    all_labels = np.concatenate((true_labels, test_true_labels))
    all_pred_labels = np.concatenate((pred_labels, test_pred_labels))
    
    alg1_MSE = {'total': len(np.where(all_labels == all_pred_labels)[0]) / len(all_pred_labels),
                'train': len(np.where(true_labels == pred_labels)[0]) / len(pred_labels),
                'test': len(np.where(test_true_labels == test_pred_labels)[0]) / len(test_pred_labels),
               }

    return alg1_MSE


### Chain graph with Two Clusters

This chain graph has two clusters $|C_1| = |C_2| = 100$.
Each node $i \in V$ represents a local dataset consisting of feature vectors $x^{(i,1)}, ... , x^{(i,5)} \in R^2$.
The feature vectors are i.i.d. realizations of a standard Gaussian random vector x ~ N(0,I).
The labels $y_1^{(i)}, . . . , y_5^{(i)} \in R$ for each node $i \in V$
are selected from the MNIST dataset in away that we have two clusters $C_1$ and $C_2$ and for the first cluster we selected the images accosiated with number 0 and 1 and for the other cluster we selected the images accosiated with number 2 and 3 and we splited the selected dataset for each cluster randomly to each node within that cluster, and the model for each node is a simple 2 layers NN.
 
The tuning parameter $\lambda$ in algorithm1 
is manually chosen, guided by the resulting MSE, as $\lambda=0.01$ for norm1 and norm2 and also $\lambda=0.05$ for mocha penalty function. 
To learn the weight $w^{(i)}$ ,we apply Algorithm 1 to a training set M obtained by randomly selecting 90% of the nodes and use the rest as test set. As the result we ploted the mean accuracy achived by each choise of $\epsilon$.

In [42]:
%%capture
# from sparsebm import generate_SBM_dataset
import networkx as nx


def get_chain_graph_data(eps=0.1, m=5, n=2, pin=0.5, pout=0.01, noise_sd=0):
    '''
    :param m, n: shape of features vector for each node
    :param pin: the probability of edges inside each cluster
    :param pout: the probability of edges between the clusters
    :param noise_sd: the standard deviation of the noise for calculating the labels
    
    :return B: adjacency matrix of the graph
    :return weight_vec: a list containing the edges's weights of the graph
    :return true_labels: a list containing the true labels of the nodes
    :return datapoints: a dictionary containing the data of each node in the graph needed for the algorithm 1 
    '''
    cluster_sizes = [100, 100]
    cluster_sizes = [20, 20]
    probs = np.array([[pin, pout],[pout, pin]])

    '''
    G: generated SBM graph with 2 clusters
    ''' 
    
    # define weight vectors for each cluster of the graph
    
    W1 = np.array([2, 2])
    '''
    W1: the weigh vector for the first cluster
    '''
    W2 = np.array([-2, 2])
    '''
    W2: the weigh vector for the second cluster
    '''
    
    W = [W1, W2]
    
    
    return get_chain_data(cluster_sizes, eps, W, m, n, noise_sd)



Plot the MSE with respect to the different epsilons (0.1, 0.5, 0.8) for each penalty function.

In [49]:

for lambda_lasso in [0.001, 0.01, 0.1, 1.0, 5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 100.0]:
    print(lambda_lasso)
    B, weight_vec, true_labels, datapoints = get_chain_graph_data(0.01, pin=0.5, pout=0.01)
    E, N = B.shape
    samplingset = [i for i in range(N-1)]
    _, predicted_w, predicted_u = algorithm_1(70, B, weight_vec, datapoints, true_labels, samplingset, 
                             lambda_lasso, penalty_func)
#     for i in range(10):
#         features = datapoints[i]['test_features']
#         print(datapoints[i]['test_label'].detach().numpy())
#         pred_label = datapoints[i]['optimizer'].model(features)
#         print(torch.argmax(pred_label, axis=1).ravel().detach().numpy())
#         print()
    print()

0.001
iter: 0 {'total': 0.4884, 'train': 0.4894375, 'test': 0.48425}
iter: 2 {'total': 0.5792, 'train': 0.594, 'test': 0.52}
iter: 4 {'total': 0.6056, 'train': 0.6239375, 'test': 0.53225}
iter: 6 {'total': 0.6313, 'train': 0.656375, 'test': 0.531}
iter: 8 {'total': 0.6315, 'train': 0.6551875, 'test': 0.53675}
iter: 10 {'total': 0.6568, 'train': 0.6876875, 'test': 0.53325}
iter: 12 {'total': 0.6627, 'train': 0.6939375, 'test': 0.53775}
iter: 14 {'total': 0.7238, 'train': 0.7724375, 'test': 0.52925}
iter: 16 {'total': 0.7945, 'train': 0.8605625, 'test': 0.53025}
iter: 18 {'total': 0.82295, 'train': 0.8970625, 'test': 0.5265}
iter: 20 {'total': 0.845, 'train': 0.924875, 'test': 0.5255}
iter: 22 {'total': 0.87725, 'train': 0.9659375, 'test': 0.5225}
iter: 24 {'total': 0.89205, 'train': 0.983, 'test': 0.52825}
iter: 26 {'total': 0.89415, 'train': 0.9861875, 'test': 0.526}
iter: 28 {'total': 0.89415, 'train': 0.9865, 'test': 0.52475}
iter: 30 {'total': 0.8939, 'train': 0.9865, 'test': 0.5235

iter: 54 {'total': 0.896, 'train': 0.9885, 'test': 0.526}
iter: 56 {'total': 0.89525, 'train': 0.9885, 'test': 0.52225}
iter: 58 {'total': 0.89585, 'train': 0.9885, 'test': 0.52525}
iter: 60 {'total': 0.8965, 'train': 0.9885, 'test': 0.5285}
iter: 62 {'total': 0.89675, 'train': 0.9885, 'test': 0.52975}
iter: 64 {'total': 0.89595, 'train': 0.9885, 'test': 0.52575}
iter: 66 {'total': 0.8961, 'train': 0.9885, 'test': 0.5265}
iter: 68 {'total': 0.89645, 'train': 0.9885, 'test': 0.52825}

5.0
iter: 0 {'total': 0.49415, 'train': 0.503875, 'test': 0.45525}
iter: 2 {'total': 0.57135, 'train': 0.5849375, 'test': 0.517}
iter: 4 {'total': 0.59165, 'train': 0.6126875, 'test': 0.5075}
iter: 6 {'total': 0.60045, 'train': 0.6225, 'test': 0.51225}
iter: 8 {'total': 0.62565, 'train': 0.65475, 'test': 0.50925}
iter: 10 {'total': 0.63775, 'train': 0.67025, 'test': 0.50775}
iter: 12 {'total': 0.6627, 'train': 0.698625, 'test': 0.519}
iter: 14 {'total': 0.68905, 'train': 0.73275, 'test': 0.51425}
iter: 16 

iter: 36 {'total': 0.841, 'train': 0.9178125, 'test': 0.53375}
iter: 38 {'total': 0.8517, 'train': 0.9324375, 'test': 0.52875}
iter: 40 {'total': 0.8625, 'train': 0.946625, 'test': 0.526}
iter: 42 {'total': 0.86485, 'train': 0.9505, 'test': 0.52225}
iter: 44 {'total': 0.88065, 'train': 0.966375, 'test': 0.53775}
iter: 46 {'total': 0.87715, 'train': 0.9634375, 'test': 0.532}
iter: 48 {'total': 0.8839, 'train': 0.9709375, 'test': 0.53575}
iter: 50 {'total': 0.87955, 'train': 0.9658125, 'test': 0.5345}
iter: 52 {'total': 0.88115, 'train': 0.9673125, 'test': 0.5365}
iter: 54 {'total': 0.87885, 'train': 0.9678125, 'test': 0.523}
iter: 56 {'total': 0.88575, 'train': 0.975625, 'test': 0.52625}
iter: 58 {'total': 0.88425, 'train': 0.97325, 'test': 0.52825}
iter: 60 {'total': 0.87965, 'train': 0.968, 'test': 0.52625}
iter: 62 {'total': 0.8826, 'train': 0.972875, 'test': 0.5215}
iter: 64 {'total': 0.8817, 'train': 0.971125, 'test': 0.524}
iter: 66 {'total': 0.88175, 'train': 0.9700625, 'test': 0

In [50]:
lambda_lasso = 10.0
B, weight_vec, true_labels, datapoints = get_chain_graph_data(0.01, pin=0.5, pout=0.01)
E, N = B.shape
samplingset = [i for i in range(N)]
_, predicted_w, predicted_u = algorithm_1(50, B, weight_vec, datapoints, true_labels, samplingset, 
                             lambda_lasso, penalty_func)
# # model = datapoints[0]['optimizer'].model
# for i in range(10):
#     features = datapoints[i]['test_features']
#     print(datapoints[i]['test_label'].detach().numpy())
# #     pred_label = datapoints[i]['optimizer'].model(features)
#     pred_label = model(features)
#     print(torch.argmax(pred_label, axis=1).ravel().detach().numpy())
#     print()

iter: 0 {'total': 0.4923, 'train': 0.4891875, 'test': 0.50475}
iter: 2 {'total': 0.5758, 'train': 0.5870625, 'test': 0.53075}
iter: 4 {'total': 0.59995, 'train': 0.617375, 'test': 0.53025}
iter: 6 {'total': 0.6095, 'train': 0.6285, 'test': 0.5335}
iter: 8 {'total': 0.62385, 'train': 0.6465625, 'test': 0.533}
iter: 10 {'total': 0.6204, 'train': 0.641875, 'test': 0.5345}
iter: 12 {'total': 0.63575, 'train': 0.663, 'test': 0.52675}
iter: 14 {'total': 0.6735, 'train': 0.7071875, 'test': 0.53875}
iter: 16 {'total': 0.7102, 'train': 0.7523125, 'test': 0.54175}
iter: 18 {'total': 0.72845, 'train': 0.7758125, 'test': 0.539}
iter: 20 {'total': 0.8073, 'train': 0.8721875, 'test': 0.54775}
iter: 22 {'total': 0.79825, 'train': 0.862125, 'test': 0.54275}
iter: 24 {'total': 0.83865, 'train': 0.9119375, 'test': 0.5455}
iter: 26 {'total': 0.8707, 'train': 0.9501875, 'test': 0.55275}
iter: 28 {'total': 0.85415, 'train': 0.93075, 'test': 0.54775}
iter: 30 {'total': 0.8604, 'train': 0.9395625, 'test': 0.

In [80]:
true_labels = []
test_true_labels = []
pred_labels = []
test_pred_labels = []
for i in range(len(datapoints)):
    features = datapoints[i]['features']
    label = datapoints[i]['label'].ravel().detach().numpy()
    true_labels.append(label)

    pred_label = datapoints[i]['optimizer'].model(features)
    pred_label = torch.argmax(pred_label, axis=1).ravel().detach().numpy() 
    pred_labels.append(pred_label)

    test_features = datapoints[i]['test_features']
    test_label = datapoints[i]['test_label'].ravel().detach().numpy()
    test_true_labels.append(test_label)

    test_pred_label = datapoints[i]['optimizer'].model(test_features)
    test_pred_label = torch.argmax(test_pred_label, axis=1).ravel().detach().numpy() 
    test_pred_labels.append(test_pred_label)
    
    cluster = 0 if i < N//2 else 1

    print('node:', i, ', cluster:', cluster, ', accuracy:', len(np.where(test_label == test_pred_label)[0]) / len(test_label))
    
    
pred_labels = np.array(pred_labels).ravel()
true_labels = np.array(true_labels).ravel()

test_pred_labels = np.array(test_pred_labels).ravel()
test_true_labels = np.array(test_true_labels).ravel()

all_labels = np.concatenate((true_labels, test_true_labels))
all_pred_labels = np.concatenate((pred_labels, test_pred_labels))

alg1_MSE = {'total': len(np.where(all_labels == all_pred_labels)[0]) / len(all_pred_labels),
            'train': len(np.where(true_labels == pred_labels)[0]) / len(pred_labels),
            'test': len(np.where(test_true_labels == test_pred_labels)[0]) / len(test_pred_labels),
           }
print(alg1_MSE)

node: 0 , cluster: 0 , accuracy: 1.0
node: 1 , cluster: 0 , accuracy: 0.47
node: 2 , cluster: 0 , accuracy: 0.46
node: 3 , cluster: 0 , accuracy: 0.55
node: 4 , cluster: 0 , accuracy: 0.55
node: 5 , cluster: 0 , accuracy: 0.53
node: 6 , cluster: 0 , accuracy: 0.56
node: 7 , cluster: 0 , accuracy: 0.49
node: 8 , cluster: 0 , accuracy: 0.58
node: 9 , cluster: 0 , accuracy: 0.49
node: 10 , cluster: 0 , accuracy: 0.5
node: 11 , cluster: 0 , accuracy: 0.46
node: 12 , cluster: 0 , accuracy: 0.44
node: 13 , cluster: 0 , accuracy: 0.53
node: 14 , cluster: 0 , accuracy: 0.58
node: 15 , cluster: 0 , accuracy: 0.5
node: 16 , cluster: 0 , accuracy: 0.55
node: 17 , cluster: 0 , accuracy: 0.58
node: 18 , cluster: 0 , accuracy: 0.53
node: 19 , cluster: 0 , accuracy: 0.5
node: 20 , cluster: 1 , accuracy: 0.98
node: 21 , cluster: 1 , accuracy: 0.48
node: 22 , cluster: 1 , accuracy: 0.42
node: 23 , cluster: 1 , accuracy: 0.42
node: 24 , cluster: 1 , accuracy: 0.41
node: 25 , cluster: 1 , accuracy: 0.39
