In [33]:
#will create a graph for metis with given distance file and data_size file for each node
#using distance threshold

from sklearn.neighbors import kneighbors_graph
from sklearn.neighbors import NearestNeighbors
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
import os
import math

In [34]:
class Graph():
 
    def __init__(self, vertices):
        self.V = vertices
        self.graph = [[0 for column in range(vertices)]
                      for row in range(vertices)]
 
    def printSolution(self, dist):
        print("Vertex tDistance from Source")
        for node in range(self.V):
            print(node, "t", dist[node])
 
    # A utility function to find the vertex with
    # minimum distance value, from the set of vertices
    # not yet included in shortest path tree
    def minDistance(self, dist, sptSet):
 
        # Initilaize minimum distance for next node
        mind = float("Inf")
 
        # Search not nearest vertex not in the
        # shortest path tree
        min_index = -1
        for v in range(self.V):
            if (dist[v] < mind) and (sptSet[v] == False):
                mind = dist[v]
                min_index = v
 
        return min_index
 
    # Funtion that implements Dijkstra's single source
    # shortest path algorithm for a graph represented
    # using adjacency matrix representation
    def dijkstra(self, src):
 
        dist = [sys.maxsize] * self.V
        dist[src] = 0
        sptSet = [False] * self.V
 
        for cout in range(self.V):
 
            # Pick the minimum distance vertex from
            # the set of vertices not yet processed.
            # u is always equal to src in first iteration
            u = self.minDistance(dist, sptSet)
 
            # Put the minimum distance vertex in the
            # shotest path tree
            sptSet[u] = True
 
            # Update dist value of the adjacent vertices
            # of the picked vertex only if the current
            # distance is greater than new distance and
            # the vertex in not in the shotest path tree
            for v in range(self.V):
                if (self.graph[u][v] > 0) and (sptSet[v] == False) and (dist[v] > dist[u] + self.graph[u][v]):
                    dist[v] = dist[u] + self.graph[u][v]
 
        for i in range(len(dist)):
            if dist[i] == sys.maxsize:
                dist[i] = -1
        return dist
    

def get_adj_matrix_of_graph_by_threshold(d, th):
    
#---review on Dec 26, 2022. Yes, It is correct for the diversity partitioning approach:
#---big distance of vertex i and j => i and j are connected => the edge weight = the distance
#---small distance of vertex i and vertex j => i and j are not connected by setting => edge weight = 0
#---next metis will group vertices with big distances between each other in the same group. 
#---The min-cut will minimize the avg of edge weights where edges are cut in  
    A = np.argwhere(d <= th)
    u = d.copy()
    for t in A:
        u[t[0],t[1]] = 0
    return u


def get_adj_matrix_of_graph_by_threshold_sim(sim, th):
#---review on Dec 26, 2022
#---create an adj matrix given the similarity matrix: 
#---the sim[i][j] is in the range [0..1],
#---if sim[i][j] <= threshold => i and j are not connected, otherwise they are connected.
#---if i and j are connected, the edge weight = sim[i][j]
#---if not, the edge weight = 0
    A = np.argwhere(sim <= th)
    u = sim.copy()
    for t in A:
        u[t[0],t[1]] = 0
    return u

def get_adj_matrix_of_graph_by_sim(d):
#d: a 2d numpy array, the distance matrix
#return an adj matrix of a connected graph where the edge weights are the simmilarities of 2 vertices     

    max_distance = np.max(d) 
    sim = 1 - d/max_distance

    
    th = np.min(sim) * 0.95
    n_nodes = len(d)
    g = Graph(n_nodes)
    meand = np.mean(d)
    
    while True:    
        A = get_adj_matrix_of_graph_by_threshold_sim(sim, th)
        g.graph = A
        
        #checking connectivity of the graph
        found = False
        
        for i in range(n_nodes):
            dist = g.dijkstra(i)
            for k in range(len(dist)):
                if dist[k] == -1:
                    found = True
                    break
            if  found:
                break # the graph is not connected
                
        if not found: # the graph is connected
            return A
        
        th = th / 2
        if th < meand * 0.2 :
            break
    return np.array([[0]])

def get_adj_matrix_of_graph(d):
#return an adj matrix of a connected graph    
    th = np.max(d) * 0.95
    n_nodes = len(d)
    g = Graph(n_nodes)
    meand = np.mean(d)
    
    while True:    
        A = get_adj_matrix_of_graph_by_threshold(d, th)
        g.graph = A
        
        #checking connectivity of the graph
        found = False
        
        for i in range(n_nodes):
            dist = g.dijkstra(i)
            for k in range(len(dist)):
                if dist[k] == -1:
                    found = True
                    break
            if  found:
                break # the graph is not connected
                
        if not found: # the graph is connected
            return A
        
        th = th / 2
        if th < meand * 0.2 :
            break
    return np.array([[0]])
            
        
def create_METISgraph_file(z_dir, d_file, cl_data_size_fname, q_file):
    

    print("creating a graph file for METIS...")

    if not os.path.exists(z_dir):
        os.makedirs(z_dir)
    
    q = np.load(z_dir+q_file)
    
    d = pd.read_csv(z_dir + d_file, index_col=False, header=None).to_numpy()
    with open(z_dir + cl_data_size_fname, 'rb') as f:
        cl_data_size = np.load(f)#pd.read_csv(z_dir + cl_data_size_fname, header=None).to_numpy()
    
    n_nodes = len(d)
    print ("n_nodes, n_clients: ", n_nodes, ", ", len(cl_data_size))
    
    A = get_adj_matrix_of_graph(d)
    
    if A.shape[0] <= 1:
        print("Cannot create a connected graph!")
        exit()
    
    
    num_edges = len(np.argwhere(A != 0)) / 2
    
    header = str(n_nodes) + " " + str(int(num_edges)) + " 011\n"

    fname = "g_nw_q_" + d_file

    with open(z_dir + fname, 'w') as the_file:
        the_file.write(header)

    for i in range (n_nodes):

        i_prime = i + 1
        #a_line = str(cl_data_size[i])
        a_line = str(1)
        for j in range (n_nodes):
            j_prime = j + 1
            if j_prime == i_prime:
                continue
            if A[i][j] != 0:
                a_line = a_line + " " + str(j_prime) + " " + str(1 + int(1000*A[i][j]*q[i]*q[j]))

        a_line = a_line + "\n"
        with open(z_dir + fname, 'a') as the_file:
            the_file.write(a_line)

    the_file.close()
    print("saved graph in file: " + z_dir + fname)
    return A


def create_METISgraph_file_v2(z_dir, d_file, q_file):
    

    print("creating a graph file for METIS...")

    if not os.path.exists(z_dir):
        os.makedirs(z_dir)
    
    q = np.load(z_dir+q_file)
    
    d = pd.read_csv(z_dir + d_file, index_col=False, header=None).to_numpy()
    
    n_nodes = len(d)
    print ("n_nodes, n_clients: ", n_nodes)
    
    A = get_adj_matrix_of_graph(d)
    
    if A.shape[0] <= 1:
        print("Cannot create a connected graph!")
        exit()
    
    
    num_edges = len(np.argwhere(A != 0)) / 2
    
    header = str(n_nodes) + " " + str(int(num_edges)) + " 011\n"

    fname = "g_qw_" + d_file

    with open(z_dir + fname, 'w') as the_file:
        the_file.write(header)

    for i in range (n_nodes):

        i_prime = i + 1
        a_line = str(int(q[i] * 1000))
        for j in range (n_nodes):
            j_prime = j + 1
            if j_prime == i_prime:
                continue
            if A[i][j] != 0:
                a_line = a_line + " " + str(j_prime) + " " + str(1 + int(1000*A[i][j])) # * q[i] * q[j]))

        a_line = a_line + "\n"
        with open(z_dir + fname, 'a') as the_file:
            the_file.write(a_line)

    the_file.close()
    print("saved graph in file: " + z_dir + fname)
    return A

def create_METISgraph_file_noq(z_dir, d_file, cl_data_size_fname):
    
    print("creating a graph file for METIS...")

    if not os.path.exists(z_dir):
        os.makedirs(z_dir)
    
    d = pd.read_csv(z_dir + d_file, index_col=False, header=None).to_numpy()
    with open(z_dir + cl_data_size_fname, 'rb') as f:
        cl_data_size = np.load(f)#pd.read_csv(z_dir + cl_data_size_fname, header=None).to_numpy()
    
    n_nodes = len(d)
    print ("n_nodes, n_clients: ", n_nodes, ", ", len(cl_data_size))
    
    A = get_adj_matrix_of_graph(d)
    
    if A.shape[0] <= 1:
        print("Cannot create a connected graph!")
        exit()
    
    
    num_edges = len(np.argwhere(A != 0)) / 2
    
    header = str(n_nodes) + " " + str(int(num_edges)) + " 011\n"

    fname = "g_nw_" + d_file

    with open(z_dir + fname, 'w') as the_file:
        the_file.write(header)

    for i in range (n_nodes):

        i_prime = i + 1
        #a_line = str(cl_data_size[i])
        a_line = str(1)
        for j in range (n_nodes):
            j_prime = j + 1
            if j_prime == i_prime:
                continue
            if A[i][j] != 0:
                a_line = a_line + " " + str(j_prime) + " " + str(1 + int(1000*A[i][j]))

        a_line = a_line + "\n"
        with open(z_dir + fname, 'a') as the_file:
            the_file.write(a_line)

    the_file.close()
    print("saved graph in file: " + z_dir + fname)
    return A


def create_METISgraph_file_v3(z_dir, q_file):
    

    print("creating a graph file for METIS...")

    if not os.path.exists(z_dir):
        os.makedirs(z_dir)
    
    q = np.load(z_dir+q_file)
    
    
    n_nodes = len(q)
    print ("n_nodes: ", n_nodes)
    
    num_edges = int(n_nodes * (n_nodes - 1) / 2)
    
    header = str(n_nodes) + " " + str(int(num_edges)) + " 011\n"

    fname = "g_vw_q_no_d.csv"

    with open(z_dir + fname, 'w') as the_file:
        the_file.write(header)

    for i in range (n_nodes):

        i_prime = i + 1
        a_line = str(int(q[i] * 1000))
        #a_line = str(1)
        for j in range (n_nodes):
            j_prime = j + 1
            if j_prime == i_prime:
                continue
            a_line = a_line + " " + str(j_prime) + " " + str(1)

        a_line = a_line + "\n"
        with open(z_dir + fname, 'a') as the_file:
            the_file.write(a_line)

    the_file.close()
    print("saved graph in file: " + z_dir + fname)
    return


def create_METISgraph_file_v4(z_dir, d_file):
    
    print("creating a graph file for METIS...")

    if not os.path.exists(z_dir):
        os.makedirs(z_dir)
    
    d = pd.read_csv(z_dir + d_file, index_col=False, header=None).to_numpy()
    
    n_nodes = len(d)
    print ("n_nodes: ", n_nodes, ", ")
    
    A = get_adj_matrix_of_graph(d)
    
    if A.shape[0] <= 1:
        print("Cannot create a connected graph!")
        exit()
    
    
    num_edges = len(np.argwhere(A != 0)) / 2
    
    header = str(n_nodes) + " " + str(int(num_edges)) + " 011\n"

    fname = "g_nw_" + d_file

    with open(z_dir + fname, 'w') as the_file:
        the_file.write(header)

    for i in range (n_nodes):

        i_prime = i + 1
        #a_line = str(cl_data_size[i])
        a_line = str(1)
        for j in range (n_nodes):
            j_prime = j + 1
            if j_prime == i_prime:
                continue
            if A[i][j] != 0:
                a_line = a_line + " " + str(j_prime) + " " + str(1 + int(1000* A[i][j]))#str(1 + int(A[i][j]/100000))# minkowski p =0.5 # euclidean str(1 + int(1000*A[i][j]))

        a_line = a_line + "\n"
        with open(z_dir + fname, 'a') as the_file:
            the_file.write(a_line)

    the_file.close()
    print("saved graph in file: " + z_dir + fname)
    return A


In [35]:
def main_create_distance_graph_file():
    #main
    #z_dir = "./output/femnist/z_ass/"
    #d_fname = "d_euclidean_partition_noniid-labeldir0.175_nclient1000.npy"
    #cl_data_size_fname = "data_size_partition_noniid-labeldir0.175_nclient1000.npy"
    #q_file = "q_lognorm_part300.npy"

    #A = create_METISgraph_file(z_dir, d_fname, cl_data_size_fname, q_file)
    #z_dir = "./output/cifar100/z_ass/"
    d_fname = "d_euclidean_MLP2_G10_partition_noniid90_nclient300.npy"
    #cl_data_size_fname = "data_size_partition_noniid-#label2_nclient100.npy"

    #A = create_METISgraph_file(z_dir, d_fname, cl_data_size_fname, q_file)
    z_dir = "./output/mnist/z_ass/"
    #d_fname = "d_wPCA_MLP2_G10_partition_noniid90_nclient300.npy"
    #d_fname = "d_euclidean_MLP2_G10_partition_noniid82_#label2_nclient100.npy"

    #cl_data_size_fname = "data_size_partition_noniid-#label2_nclient100.npy"
    #q_file = "q_lognorm_part100.npy"

    #z_dir = "./output/cifar10/z_ass/"
    #d_fname = "d_euclidean_CNN2_G10_partition_noniid90_nclient300.npy"

    #z_dir = "./output/cifar10/z_ass/"
    #d_fname = "d_euclidean_CNN2_G100_partition_iid_nclient300.npy"

    #z_dir = "./output/cifar10/z_ass/"
    #d_fname = "d_minkowski_p1.0_CNN2_G10_partition_noniid90_nclient300.npy"
    #d_fname = "d_minkowski_p0.5_CNN2_G10_partition_noniid90_nclient300.npy"

    #mnist
    z_dir = "./output/mnist/z_ass/"
    d_fname = "d_minkowski_p1.0_MLP2_G100_partition_noniid90_label2_nclient300.npy"
    d_fname = "d_minkowski_p1.0_MLP2_G100_partition_noniid90_label3_nclient300.npy"


    #cifar10
    z_dir = "./output/cifar10/z_ass/"
    d_fname = "d_minkowski_p1.0_CNN2_G100_partition_noniid90_label2_nclient300.npy"
    d_fname = "d_minkowski_p1.0_CNN2_G100_partition_zipfz0.5_noniid90_nclient300.npy"

    #synthetic
    #z_dir = "./output/synthetic/z_ass/"
    #d_fname = "d_minkowski_p1.0_LR_G200_partition_noniid_synthetic_nclient300.npy"
    

    A = create_METISgraph_file_v4(z_dir, d_fname)

    print(A == 1)

#usage

main_create_distance_graph_file()


creating a graph file for METIS...
n_nodes:  300 , 
saved graph in file: ./output/cifar10/z_ass/g_nw_d_minkowski_p1.0_CNN2_G100_partition_zipfz0.5_noniid90_nclient300.npy
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]


In [36]:
#create a graph file for metis but use simmilarity
#will convert the distance -> simmilarity

#sim = 1 - distance/max_distance


def create_METISgraph_file_by_sim(z_dir, d_file):
    
    print("creating a graph file for METIS...")

    if not os.path.exists(z_dir):
        os.makedirs(z_dir)
    
    d = pd.read_csv(z_dir + d_file, index_col=False, header=None).to_numpy()
    
    n_nodes = len(d)
    print ("n_nodes: ", n_nodes, ", ")
    
    A = get_adj_matrix_of_graph_by_sim(d)
    
    if A.shape[0] <= 1:
        print("Cannot create a connected graph!")
        exit()
    
    
    num_edges = (len(np.argwhere(A > 0))  - n_nodes)/ 2
    
    header = str(n_nodes) + " " + str(int(num_edges)) + " 011\n"

    fname = "g_nw_sim_" + d_file

    with open(z_dir + fname, 'w') as the_file:
        the_file.write(header)

    for i in range (n_nodes):

        i_prime = i + 1
        #a_line = str(cl_data_size[i])
        a_line = str(1)
        for j in range (n_nodes):
            j_prime = j + 1
            if j_prime == i_prime:
                continue
            if A[i][j] > 0:
                a_line = a_line + " " + str(j_prime) + " " + str(1+ int(1000* A[i][j]))

        a_line = a_line + "\n"
        with open(z_dir + fname, 'a') as the_file:
            the_file.write(a_line)

    the_file.close()
    print("saved graph in file: " + z_dir + fname)
    return A

def main_create_sim_graph_file():
    #mnist
    z_dir = "./output/mnist/z_ass/"
    #d_fname = "d_minkowski_p1.0_MLP2_G10_partition_noniid90_nclient300.npy"
    d_fname = "d_minkowski_p1.0_MLP2_G100_partition_noniid90_label2_nclient300.npy"
    d_fname = "d_minkowski_p1.0_MLP2_G100_partition_noniid90_label3_nclient300.npy"

    


    #cifar10
    z_dir = "./output/cifar10/z_ass/"
    #d_fname = "d_minkowski_p1.0_CNN2_G10_partition_noniid90_nclient300.npy"
    d_fname = "d_minkowski_p1.0_CNN2_G100_partition_noniid90_label2_nclient300.npy"
    d_fname = "d_minkowski_p1.0_CNN2_G100_partition_noniid90_label3_nclient300.npy"
    

    #synthetic
    z_dir = "./output/synthetic/z_ass/"
    d_fname = "d_minkowski_p1.0_LR_G200_partition_noniid_synthetic_nclient300.npy"

    
    A = create_METISgraph_file_by_sim(z_dir, d_fname)

    print(sum(A < 0))

#usage
#main_create_sim_graph_file()