In [1]:
import pickle
import os

In [3]:
print(os.getcwd())

C:\Users\user\Desktop\project\benchmarking_gnns


In [4]:
class DotDict(dict):
    def __init__(self, **kwds):
        self.update(kwds)
        self.__dict__ = self

In [5]:

import time
import os
import pickle
import numpy as np

import dgl
import torch




class load_SBMsDataSetDGL(torch.utils.data.Dataset):

#     def __init__(self,
#                  data_dir,
#                  name,
#                  split):

#         self.split = split
#         self.is_test = split.lower() in ['test', 'val'] 
#         with open(os.path.join(data_dir, name + '_%s.pkl' % self.split), 'rb') as f:
#             self.dataset = pickle.load(f)
#         self.node_labels = []
#         self.graph_lists = []
#         self.n_samples = len(self.dataset)
#         self._prepare()

    def __init__(self,
                 data_dir,
                 name,
                 split):

        self.split = split
        self.is_test = split.lower() in ['test', 'val'] 
        with open(os.path.join(data_dir, name + '.pkl'), 'rb') as f:
            self.dataset = pickle.load(f)
        self.node_labels = []
        self.graph_lists = []
        self.n_samples = len(self.dataset)
        self._prepare()

    def _prepare(self):

        print("preparing %d graphs for the %s set..." % (self.n_samples, self.split.upper()))

        for data in self.dataset:

            node_features = data.node_feat
            edge_list = (data.W != 0).nonzero()  # converting adj matrix to edge_list

            # Create the DGL Graph
            g = dgl.DGLGraph()
            g.add_nodes(node_features.size(0))
            g.ndata['feat'] = node_features.long()
            for src, dst in edge_list:
                g.add_edges(src.item(), dst.item())

            # adding edge features for Residual Gated ConvNet
            #edge_feat_dim = g.ndata['feat'].size(1) # dim same as node feature dim
            edge_feat_dim = 1 # dim same as node feature dim
            g.edata['feat'] = torch.ones(g.number_of_edges(), edge_feat_dim)

            self.graph_lists.append(g)
            self.node_labels.append(data.node_label)


    def __len__(self):
        """Return the number of graphs in the dataset."""
        return self.n_samples

    def __getitem__(self, idx):
        """
            Get the idx^th sample.
            Parameters
            ---------
            idx : int
                The sample index.
            Returns
            -------
            (dgl.DGLGraph, int)
                DGLGraph with node feature stored in `feat` field
                And its label.
        """
        return self.graph_lists[idx], self.node_labels[idx]


class SBMsDatasetDGL(torch.utils.data.Dataset):

    def __init__(self, name):
        """
            TODO
        """
        start = time.time()
        print("[I] Loading data ...")
        self.name = name
        data_dir = 'data/SBMs'
        self.train = load_SBMsDataSetDGL(data_dir, name, split='train')
        self.test = load_SBMsDataSetDGL(data_dir, name, split='test')
        self.val = load_SBMsDataSetDGL(data_dir, name, split='val')
        print("[I] Finished loading.")
        print("[I] Data load time: {:.4f}s".format(time.time()-start))




def self_loop(g):
    """
        Utility function only, to be used only when necessary as per user self_loop flag
        : Overwriting the function dgl.transform.add_self_loop() to not miss ndata['feat'] and edata['feat']
        
        
        This function is called inside a function in SBMsDataset class.
    """
    new_g = dgl.DGLGraph()
    new_g.add_nodes(g.number_of_nodes())
    new_g.ndata['feat'] = g.ndata['feat']
    
    src, dst = g.all_edges(order="eid")
    src = dgl.backend.zerocopy_to_numpy(src)
    dst = dgl.backend.zerocopy_to_numpy(dst)
    non_self_edges_idx = src != dst
    nodes = np.arange(g.number_of_nodes())
    new_g.add_edges(src[non_self_edges_idx], dst[non_self_edges_idx])
    new_g.add_edges(nodes, nodes)
    
    # This new edata is not used since this function gets called only for GCN, GAT
    # However, we need this for the generic requirement of ndata and edata
    new_g.edata['feat'] = torch.zeros(new_g.number_of_edges())
    return new_g



class SBMsDataset(torch.utils.data.Dataset):

    def __init__(self, name):
        """
            Loading SBM datasets
        """
        start = time.time()
        print("[I] Loading dataset %s..." % (name))
        self.name = name
        data_dir = 'data/SBMs/'
        with open(data_dir+name+'.pkl',"rb") as f:
            f = pickle.load(f)
            self.train = f[0]
            self.val = f[1]
            self.test = f[2]
        print('train, test, val sizes :',len(self.train),len(self.test),len(self.val))
        print("[I] Finished loading.")
        print("[I] Data load time: {:.4f}s".format(time.time()-start))


    # form a mini batch from a given list of samples = [(graph, label) pairs]
    def collate(self, samples):
        # The input samples is a list of pairs (graph, label).
        graphs, labels = map(list, zip(*samples))
        labels = torch.cat(labels).long()
        tab_sizes_n = [ graphs[i].number_of_nodes() for i in range(len(graphs))]
        tab_snorm_n = [ torch.FloatTensor(size,1).fill_(1./float(size)) for size in tab_sizes_n ]
        snorm_n = torch.cat(tab_snorm_n).sqrt()  
        tab_sizes_e = [ graphs[i].number_of_edges() for i in range(len(graphs))]
        tab_snorm_e = [ torch.FloatTensor(size,1).fill_(1./float(size)) for size in tab_sizes_e ]
        snorm_e = torch.cat(tab_snorm_e).sqrt()
        batched_graph = dgl.batch(graphs)

        return batched_graph, labels, snorm_n, snorm_e

    def _add_self_loops(self):
        
        # function for adding self loops
        # this function will be called only if self_loop flag is True
            
        self.train.graph_lists = [self_loop(g) for g in self.train.graph_lists]
        self.val.graph_lists = [self_loop(g) for g in self.val.graph_lists]
        self.test.graph_lists = [self_loop(g) for g in self.test.graph_lists]






In [6]:
import os
os.getcwd()

'C:\\Users\\user\\Desktop\\project\\benchmarking_gnns'

In [10]:
with open('SBM_CLUSTER_a4.pkl', 'rb') as f:
    data = pickle.load(f)

In [13]:
data

[<data.SBMs.load_SBMsDataSetDGL at 0x266c0757a48>,
 <data.SBMs.load_SBMsDataSetDGL at 0x2689b7804c8>,
 <data.SBMs.load_SBMsDataSetDGL at 0x268b9598ac8>]

In [None]:
data[0].dataset


In [12]:
with open('SBM_CLUSTER_a2.pkl', 'rb') as f:
    data2 = pickle.load(f)



In [13]:
data2[0].dataset


[{'nb_nodes': 117,
  'W': tensor([[0, 0, 1,  ..., 0, 1, 0],
          [0, 0, 1,  ..., 0, 0, 0],
          [1, 1, 0,  ..., 1, 1, 0],
          ...,
          [0, 0, 1,  ..., 0, 1, 0],
          [1, 0, 1,  ..., 1, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.int8),
  'rand_idx': tensor([ 30,  81,  78,  18, 104,  72,  22,  61,   4,   3,  31,  51,  58,  16,
           35, 107,  88,   1,  62,  97,  46,  83,  50, 102,  70,  39,  65, 111,
           43,  85,  21,  15,  82, 112,  41, 114,  68,  42,  13,  93,  29,  73,
           75,  76,  25,  91,  53,  12,  77, 116,  47, 101,  37,  33,  66,  96,
           98,  67,  84,  86,   0, 108,  40,  94,  63,  17,  24, 100,  87,  20,
           28,  38, 106,  48,  79,  56,   2,  14,  10,  99,  45,  55,  90,  80,
           64,  32,   5,  44,  59, 115,  95,  27,  60, 109, 103,   8,   6,  57,
           23,  26,   9,  74,  92,  52,  54,  36,  49,  11,  19,   7, 110,  71,
           34, 105,  89,  69, 113], dtype=torch.int16),
  'node_feat': te

In [None]:


train = data[0].dataset

In [34]:
len(train)

10000

In [61]:
label

[0,
 3,
 3,
 0,
 4,
 3,
 0,
 2,
 0,
 0,
 0,
 2,
 2,
 0,
 1,
 5,
 3,
 0,
 2,
 4,
 2,
 3,
 2,
 4,
 3,
 1,
 3,
 5,
 2,
 3,
 0,
 0,
 3,
 5,
 2,
 5,
 3,
 2,
 0,
 3,
 0,
 3,
 3,
 3,
 0,
 3,
 2,
 0,
 3,
 5,
 2,
 4,
 1,
 1,
 3,
 4,
 4,
 3,
 3,
 3,
 0,
 5,
 2,
 4,
 3,
 0,
 0,
 4,
 3,
 0,
 0,
 1,
 4,
 2,
 3,
 2,
 0,
 0,
 0,
 4,
 2,
 2,
 3,
 3,
 3,
 0,
 0,
 2,
 2,
 5,
 4,
 0,
 2,
 5,
 4,
 0,
 0,
 2,
 0,
 0,
 0,
 3,
 3,
 2,
 2,
 1,
 2,
 0,
 0,
 0,
 5,
 3,
 1,
 4,
 3,
 3,
 5]

In [36]:
train[0]['node_label']

tensor([0, 3, 3, 0, 4, 3, 0, 2, 0, 0, 0, 2, 2, 0, 1, 5, 3, 0, 2, 4, 2, 3, 2, 4,
        3, 1, 3, 5, 2, 3, 0, 0, 3, 5, 2, 5, 3, 2, 0, 3, 0, 3, 3, 3, 0, 3, 2, 0,
        3, 5, 2, 4, 1, 1, 3, 4, 4, 3, 3, 3, 0, 5, 2, 4, 3, 0, 0, 4, 3, 0, 0, 1,
        4, 2, 3, 2, 0, 0, 0, 4, 2, 2, 3, 3, 3, 0, 0, 2, 2, 5, 4, 0, 2, 5, 4, 0,
        0, 2, 0, 0, 0, 3, 3, 2, 2, 1, 2, 0, 0, 0, 5, 3, 1, 4, 3, 3, 5],
       dtype=torch.int16)

In [23]:
data[0].graph_lists

[DGLGraph(num_nodes=117, num_edges=4104,
          ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}
          edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}),
 DGLGraph(num_nodes=92, num_edges=2524,
          ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}
          edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}),
 DGLGraph(num_nodes=110, num_edges=3638,
          ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}
          edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}),
 DGLGraph(num_nodes=99, num_edges=3018,
          ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}
          edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}),
 DGLGraph(num_nodes=136, num_edges=5602,
          ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}
          edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}),
 DGLGraph(num_nodes=87, num_edges=2288,
          ndata_schemes={'f

In [40]:
data[0].n_samples

10000

'nb_nodes': 117, 
'W': tensor([[0, 0, 1,  ..., 0, 1, 0],
             [0, 0, 1,  ..., 0, 0, 0],
             [1, 1, 0,  ..., 1, 1, 0],
             ...,
             [0, 0, 1,  ..., 0, 1, 0],
             [1, 0, 1,  ..., 1, 0, 0],
             [0, 0, 0,  ..., 0, 0, 0]], 
dtype=torch.int8), 
'rand_idx': tensor([ 30,  81,  78,  18, 104,  72,  22,  61,   4,   3,  31,  51,  58,  16,
                     35, 107,  88,   1,  62,  97,  46,  83,  50, 102,  70,  39,  65, 111,
                     43,  85,  21,  15,  82, 112,  41, 114,  68,  42,  13,  93,  29,  73,
                     75,  76,  25,  91,  53,  12,  77, 116,  47, 101,  37,  33,  66,  96,
                     98,  67,  84,  86,   0, 108,  40,  94,  63,  17,  24, 100,  87,  20,
                     28,  38, 106,  48,  79,  56,   2,  14,  10,  99,  45,  55,  90,  80,
                     64,  32,   5,  44,  59, 115,  95,  27,  60, 109, 103,   8,   6,  57,
                     23,  26,   9,  74,  92,  52,  54,  36,  49,  11,  19,   7, 110,  71,
                     34, 105,  89,  69, 113], 

dtype=torch.int16), 

'node_feat': tensor([0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 0, 0],
        dtype=torch.int16), 
        
        
'node_label': tensor([0, 3, 3, 0, 4, 3, 0, 2, 0, 0, 0, 2, 2, 0, 1, 5, 3, 0, 2, 4, 2, 3, 2, 4,
                     3, 1, 3, 5, 2, 3, 0, 0, 3, 5, 2, 5, 3, 2, 0, 3, 0, 3, 3, 3, 0, 3, 2, 0,
                     3, 5, 2, 4, 1, 1, 3, 4, 4, 3, 3, 3, 0, 5, 2, 4, 3, 0, 0, 4, 3, 0, 0, 1,
                     4, 2, 3, 2, 0, 0, 0, 4, 2, 2, 3, 3, 3, 0, 0, 2, 2, 5, 4, 0, 2, 5, 4, 0,
                     0, 2, 0, 0, 0, 3, 3, 2, 2, 1, 2, 0, 0, 0, 5, 3, 1, 4, 3, 3, 5],
        dtype=torch.int16)}

In [10]:
import networkx as nx

In [118]:
W = train[0]['W']
labels = train[0]['node_label'].tolist()

In [119]:
type(W.numpy())

numpy.ndarray

In [120]:
W =W.numpy()

In [121]:
g_nx=nx.from_numpy_matrix(W)

In [122]:
g_nx

<networkx.classes.graph.Graph at 0x15ed10210>

In [123]:
labels

[0,
 3,
 3,
 0,
 4,
 3,
 0,
 2,
 0,
 0,
 0,
 2,
 2,
 0,
 1,
 5,
 3,
 0,
 2,
 4,
 2,
 3,
 2,
 4,
 3,
 1,
 3,
 5,
 2,
 3,
 0,
 0,
 3,
 5,
 2,
 5,
 3,
 2,
 0,
 3,
 0,
 3,
 3,
 3,
 0,
 3,
 2,
 0,
 3,
 5,
 2,
 4,
 1,
 1,
 3,
 4,
 4,
 3,
 3,
 3,
 0,
 5,
 2,
 4,
 3,
 0,
 0,
 4,
 3,
 0,
 0,
 1,
 4,
 2,
 3,
 2,
 0,
 0,
 0,
 4,
 2,
 2,
 3,
 3,
 3,
 0,
 0,
 2,
 2,
 5,
 4,
 0,
 2,
 5,
 4,
 0,
 0,
 2,
 0,
 0,
 0,
 3,
 3,
 2,
 2,
 1,
 2,
 0,
 0,
 0,
 5,
 3,
 1,
 4,
 3,
 3,
 5]

In [137]:
class ProgressSmoothing:
    def __init__(self, g_nx):
        self.g_nx = g_nx
        
    def _get_weight_list(self, a, m, neighbor_list_dict):
        denominator = 0
        weight_list = [0 for _ in range(m)]
        for h in range(0, m):
            weighting = np.power(a, (m - h))
           # print(len(neighbor_list_dict[h]))
            num_nodes = len(neighbor_list_dict[h])
            weight_list[h] = weighting * num_nodes
            
#             print(weighting, "@")
#             print(num_nodes, "#")
            denominator += weighting * num_nodes
#         print(type(denominator))
#         print(type(weight_list))
        print(weight_list/denominator)
        return weight_list / denominator
    def nei_dict(self, hop_dict):
        neighbor_list_dict = {}  # neighbor_list_dict = {which_hop: [index1, index5, ....]}
        for u, h in hop_dict.items():  # hop_dict = {neighbor_id : which_hop}
            if not h in neighbor_list_dict.keys():
                n_list = [u] # include self node
                neighbor_list_dict[h] = n_list
            else:
                neighbor_list_dict[h].append(u)
        return neighbor_list_dict
    def get_neigh_smooth_weight(self, v, a):
#         hop_dict = nx.single_source_shortest_path_length(self.g_nx, v)
        hop_dict = nx.single_source_shortest_path_length(self.g_nx, v, 2)
        neighbor_list_dict = self.nei_dict(hop_dict)
#         print(neighbor_list_dict)
        m = np.max(list(neighbor_list_dict.keys()))
        weight_list = self._get_weight_list(a, m, neighbor_list_dict)
        #print(weight_list)
        nidx_weight_list = []
        for h in range(0, m):
            for u in neighbor_list_dict[h]:
                nidx_weight_list.append((int(u), weight_list[h]))
        return nidx_weight_list
    def smooth_all(self, a, labels):
        total_nidx_weight_list = []
        for v in list(g_nx.nodes):
            #print(v)
            nidx_weight_list = self.get_neigh_smooth_weight(v, a)
            #print(nidx_weight_list)
            total_nidx_weight_list.extend(nidx_weight_list)
        smoothed_labels = labels.copy()
        for u, w in total_nidx_weight_list:
            smoothed_labels[u] *= w
        return smoothed_labels
#     def save(self, output_path):
#         self.df.to_csv(output_path, index=False)
        

ps = ProgressSmoothing(g_nx=g_nx)
smoothed_labels = ps.smooth_all(2, labels)

[0.05263158 0.94736842]
[0.05 0.95]
[0.06060606 0.93939394]
[0.05 0.95]
[0.05 0.95]
[0.04761905 0.95238095]
[0.05263158 0.94736842]
[0.05 0.95]
[0.03921569 0.96078431]
[0.05405405 0.94594595]
[0.04651163 0.95348837]
[0.06060606 0.93939394]
[0.05 0.95]
[0.04166667 0.95833333]
[0.06666667 0.93333333]
[0.0625 0.9375]
[0.05405405 0.94594595]
[0.05128205 0.94871795]
[0.05714286 0.94285714]
[0.05128205 0.94871795]
[0.05263158 0.94736842]
[0.05263158 0.94736842]
[0.05555556 0.94444444]
[0.07142857 0.92857143]
[0.04878049 0.95121951]
[0.06451613 0.93548387]
[0.05405405 0.94594595]
[0.05714286 0.94285714]
[0.04651163 0.95348837]
[0.05 0.95]
[0.05128205 0.94871795]
[0.06666667 0.93333333]
[0.04651163 0.95348837]
[0.09090909 0.90909091]
[0.05714286 0.94285714]
[0.06451613 0.93548387]
[0.05405405 0.94594595]
[0.06896552 0.93103448]
[0.0625 0.9375]
[0.04255319 0.95744681]
[0.04878049 0.95121951]
[0.05714286 0.94285714]
[0.04545455 0.95454545]
[0.05263158 0.94736842]
[0.05 0.95]
[0.04651163 0.953488

In [138]:
labels

[0,
 3,
 3,
 0,
 4,
 3,
 0,
 2,
 0,
 0,
 0,
 2,
 2,
 0,
 1,
 5,
 3,
 0,
 2,
 4,
 2,
 3,
 2,
 4,
 3,
 1,
 3,
 5,
 2,
 3,
 0,
 0,
 3,
 5,
 2,
 5,
 3,
 2,
 0,
 3,
 0,
 3,
 3,
 3,
 0,
 3,
 2,
 0,
 3,
 5,
 2,
 4,
 1,
 1,
 3,
 4,
 4,
 3,
 3,
 3,
 0,
 5,
 2,
 4,
 3,
 0,
 0,
 4,
 3,
 0,
 0,
 1,
 4,
 2,
 3,
 2,
 0,
 0,
 0,
 4,
 2,
 2,
 3,
 3,
 3,
 0,
 0,
 2,
 2,
 5,
 4,
 0,
 2,
 5,
 4,
 0,
 0,
 2,
 0,
 0,
 0,
 3,
 3,
 2,
 2,
 1,
 2,
 0,
 0,
 0,
 5,
 3,
 1,
 4,
 3,
 3,
 5]

In [141]:
smoothed_labels

[0.0,
 0.018692466808744857,
 0.03420408867916433,
 0.0,
 0.023096323768328327,
 0.016807749981718804,
 0.0,
 0.011488201538045561,
 0.0,
 0.0,
 0.0,
 0.022655360682577074,
 0.01087828586992243,
 0.0,
 0.014071373775988957,
 0.05757484068841429,
 0.02404009170628897,
 0.0,
 0.01851756953257619,
 0.026289381195662605,
 0.012873513972590199,
 0.020878326537678907,
 0.017064367192232407,
 0.07101630953985662,
 0.017781968466478632,
 0.012634228487004948,
 0.024853411285026192,
 0.0424904566444556,
 0.009087735502746675,
 0.01910541336210829,
 0.0,
 0.0,
 0.014354201904994715,
 0.14309812369072156,
 0.017857880083722553,
 0.06631442737484146,
 0.02435891558070048,
 0.030711562365604544,
 0.0,
 0.011741012903048556,
 0.0,
 0.028498721165467488,
 0.012177095339043084,
 0.020974792047257856,
 0.0,
 0.013541130582214482,
 0.020598581112405673,
 0.0,
 0.04605314211568015,
 0.049453125824303217,
 0.011934001839194271,
 0.016544816295084416,
 0.018611010544235466,
 0.015393210670826591,
 0.014697

In [140]:
torch.tensor(c)

tensor([0.0000, 0.0187, 0.0342, 0.0000, 0.0231, 0.0168, 0.0000, 0.0115, 0.0000,
        0.0000, 0.0000, 0.0227, 0.0109, 0.0000, 0.0141, 0.0576, 0.0240, 0.0000,
        0.0185, 0.0263, 0.0129, 0.0209, 0.0171, 0.0710, 0.0178, 0.0126, 0.0249,
        0.0425, 0.0091, 0.0191, 0.0000, 0.0000, 0.0144, 0.1431, 0.0179, 0.0663,
        0.0244, 0.0307, 0.0000, 0.0117, 0.0000, 0.0285, 0.0122, 0.0210, 0.0000,
        0.0135, 0.0206, 0.0000, 0.0461, 0.0495, 0.0119, 0.0165, 0.0186, 0.0154,
        0.0147, 0.0536, 0.0254, 0.0272, 0.0352, 0.0191, 0.0000, 0.1206, 0.0199,
        0.0567, 0.0270, 0.0000, 0.0000, 0.0642, 0.0263, 0.0000, 0.0000, 0.0117,
        0.0439, 0.0092, 0.0222, 0.0112, 0.0000, 0.0000, 0.0000, 0.0765, 0.0127,
        0.0213, 0.0327, 0.0108, 0.0206, 0.0000, 0.0000, 0.0152, 0.0144, 0.0509,
        0.0633, 0.0000, 0.0298, 0.0534, 0.0335, 0.0000, 0.0000, 0.0103, 0.0000,
        0.0000, 0.0000, 0.0157, 0.0193, 0.0099, 0.0168, 0.0115, 0.0176, 0.0000,
        0.0000, 0.0000, 0.0342, 0.0172, 

In [12]:
with open('./data/SBMs/SBM_PATTERN.pkl', 'rb') as f:
    data_p = pickle.load(f)

In [None]:
sbm = data_p.dateset[0]

In [None]:
# def to_dict_of_dicts(G, nodelist=None, edge_data=None):
#     """Returns adjacency representation of graph as a dictionary of dictionaries.

#     Parameters
#     ----------
#     G : graph
#        A NetworkX graph

#     nodelist : list
#        Use only nodes specified in nodelist

#     edge_data : list, optional
#        If provided,  the value of the dictionary will be
#        set to edge_data for all edges.  This is useful to make
#        an adjacency matrix type representation with 1 as the edge data.
#        If edgedata is None, the edgedata in G is used to fill the values.
#        If G is a multigraph, the edgedata is a dict for each pair (u,v).
#     """
#     dod = {}
#     if nodelist is None:
#         if edge_data is None:
#             for u, nbrdict in G.adjacency():
#                 dod[u] = nbrdict.copy()
#         else:  # edge_data is not None
#             for u, nbrdict in G.adjacency():
#                 dod[u] = dod.fromkeys(nbrdict, edge_data)
#     else:  # nodelist is not None
#         if edge_data is None:
#             for u in nodelist:
#                 dod[u] = {}
#                 for v, data in ((v, data) for v, data in G[u].items() if v in nodelist):
#                     dod[u][v] = data
#         else:  # nodelist and edge_data are not None
#             for u in nodelist:
#                 dod[u] = {}
#                 for v in (v for v in G[u] if v in nodelist):
#                     dod[u][v] = edge_data
#     return dod

In [70]:
# class ProgressSmoothing:
#     def __init__(self, g_nx, df):
#         self.g_nx = g_nx
#         self.df = df.copy()
#     def _get_weight_list(self, a, m, neighbor_list_dict):
#         denominator = 0
#         weight_list = [0 for _ in range(m)]
#         for h in range(0, m):
#             weighting = np.power(a, (m - h))
#            # print(len(neighbor_list_dict[h]))
#             num_nodes = len(neighbor_list_dict[h])
#             weight_list[h] = weighting * num_nodes
#             denominator += weighting * num_nodes
#         return weight_list / denominator
#     def nei_dict(self, hop_dict):
#         neighbor_list_dict = {}  # neighbor_list_dict = {which_hop: [index1, index5, ....]}
#         for u, h in hop_dict.items():  # hop_dict = {neighbor_id : which_hop}
#             if not h in neighbor_list_dict.keys():
#                 n_list = [u] # include self node
#                 neighbor_list_dict[h] = n_list
#             else:
#                 neighbor_list_dict[h].append(u)
#         return neighbor_list_dict
#     def get_neigh_smooth_weight(self, v, a):
#         hop_dict = nx.single_source_shortest_path_length(self.g_nx, v)
#         neighbor_list_dict = self.nei_dict(hop_dict)
#         m = np.max(list(neighbor_list_dict.keys()))
#         weight_list = self._get_weight_list(a, m, neighbor_list_dict)
#         #print(weight_list)
#         nidx_weight_list = []
#         for h in range(0, m):
#             for u in neighbor_list_dict[h]:
#                 nidx_weight_list.append((int(u), weight_list[h]))
#         return nidx_weight_list
#     def smooth_all(self, a, target_col_name):
#         total_nidx_weight_list = []
#         for v in list(g_nx.nodes):
#             #print(v)
#             nidx_weight_list = self.get_neigh_smooth_weight(v, a)
#             #print(nidx_weight_list)
#             total_nidx_weight_list.extend(nidx_weight_list)
#         for u, w in total_nidx_weight_list:
#             label = self.df[target_col_name].iloc[u]
#             self.df[target_col_name].iloc[u] = label*w
#         return self.df
#     def save(self, output_path):
#         self.df.to_csv(output_path, index=False)
        

# ps = ProgressSmoothing(g_nx=g_nx, df=node_data)
# a = 2
# target_col_name = 'count_followee'
# smoothing_df = ps.smooth_all(a, target_col_name=target_col_name)
# output_path = './out.csv'
# ps.save(output_path=output_path)

NameError: name 'node_data' is not defined