In [3]:
import pickle
import os

In [4]:
print(os.getcwd())

C:\Users\user\Desktop\project\benchmarking_gnns\data\SBMs


In [5]:
class DotDict(dict):
    def __init__(self, **kwds):
        self.update(kwds)
        self.__dict__ = self

In [6]:

import time
import os
import pickle
import numpy as np

import dgl
import torch




class load_SBMsDataSetDGL(torch.utils.data.Dataset):

#     def __init__(self,
#                  data_dir,
#                  name,
#                  split):

#         self.split = split
#         self.is_test = split.lower() in ['test', 'val'] 
#         with open(os.path.join(data_dir, name + '_%s.pkl' % self.split), 'rb') as f:
#             self.dataset = pickle.load(f)
#         self.node_labels = []
#         self.graph_lists = []
#         self.n_samples = len(self.dataset)
#         self._prepare()

    def __init__(self,
                 data_dir,
                 name,
                 split):

        self.split = split
        self.is_test = split.lower() in ['test', 'val'] 
        with open(os.path.join(data_dir, name + '.pkl'), 'rb') as f:
            self.dataset = pickle.load(f)
        self.node_labels = []
        self.graph_lists = []
        self.n_samples = len(self.dataset)
        self._prepare()

    def _prepare(self):

        print("preparing %d graphs for the %s set..." % (self.n_samples, self.split.upper()))

        for data in self.dataset:

            node_features = data.node_feat
            edge_list = (data.W != 0).nonzero()  # converting adj matrix to edge_list

            # Create the DGL Graph
            g = dgl.DGLGraph()
            g.add_nodes(node_features.size(0))
            g.ndata['feat'] = node_features.long()
            for src, dst in edge_list:
                g.add_edges(src.item(), dst.item())

            # adding edge features for Residual Gated ConvNet
            #edge_feat_dim = g.ndata['feat'].size(1) # dim same as node feature dim
            edge_feat_dim = 1 # dim same as node feature dim
            g.edata['feat'] = torch.ones(g.number_of_edges(), edge_feat_dim)

            self.graph_lists.append(g)
            self.node_labels.append(data.node_label)


    def __len__(self):
        """Return the number of graphs in the dataset."""
        return self.n_samples

    def __getitem__(self, idx):
        """
            Get the idx^th sample.
            Parameters
            ---------
            idx : int
                The sample index.
            Returns
            -------
            (dgl.DGLGraph, int)
                DGLGraph with node feature stored in `feat` field
                And its label.
        """
        return self.graph_lists[idx], self.node_labels[idx]


class SBMsDatasetDGL(torch.utils.data.Dataset):

    def __init__(self, name):
        """
            TODO
        """
        start = time.time()
        print("[I] Loading data ...")
        self.name = name
        data_dir = 'data/SBMs'
        self.train = load_SBMsDataSetDGL(data_dir, name, split='train')
        self.test = load_SBMsDataSetDGL(data_dir, name, split='test')
        self.val = load_SBMsDataSetDGL(data_dir, name, split='val')
        print("[I] Finished loading.")
        print("[I] Data load time: {:.4f}s".format(time.time()-start))




def self_loop(g):
    """
        Utility function only, to be used only when necessary as per user self_loop flag
        : Overwriting the function dgl.transform.add_self_loop() to not miss ndata['feat'] and edata['feat']
        
        
        This function is called inside a function in SBMsDataset class.
    """
    new_g = dgl.DGLGraph()
    new_g.add_nodes(g.number_of_nodes())
    new_g.ndata['feat'] = g.ndata['feat']
    
    src, dst = g.all_edges(order="eid")
    src = dgl.backend.zerocopy_to_numpy(src)
    dst = dgl.backend.zerocopy_to_numpy(dst)
    non_self_edges_idx = src != dst
    nodes = np.arange(g.number_of_nodes())
    new_g.add_edges(src[non_self_edges_idx], dst[non_self_edges_idx])
    new_g.add_edges(nodes, nodes)
    
    # This new edata is not used since this function gets called only for GCN, GAT
    # However, we need this for the generic requirement of ndata and edata
    new_g.edata['feat'] = torch.zeros(new_g.number_of_edges())
    return new_g



class SBMsDataset(torch.utils.data.Dataset):

    def __init__(self, name):
        """
            Loading SBM datasets
        """
        start = time.time()
        print("[I] Loading dataset %s..." % (name))
        self.name = name
        data_dir = 'data/SBMs/'
        with open(data_dir+name+'.pkl',"rb") as f:
            f = pickle.load(f)
            self.train = f[0]
            self.val = f[1]
            self.test = f[2]
        print('train, test, val sizes :',len(self.train),len(self.test),len(self.val))
        print("[I] Finished loading.")
        print("[I] Data load time: {:.4f}s".format(time.time()-start))


    # form a mini batch from a given list of samples = [(graph, label) pairs]
    def collate(self, samples):
        # The input samples is a list of pairs (graph, label).
        graphs, labels = map(list, zip(*samples))
        labels = torch.cat(labels).long()
        tab_sizes_n = [ graphs[i].number_of_nodes() for i in range(len(graphs))]
        tab_snorm_n = [ torch.FloatTensor(size,1).fill_(1./float(size)) for size in tab_sizes_n ]
        snorm_n = torch.cat(tab_snorm_n).sqrt()  
        tab_sizes_e = [ graphs[i].number_of_edges() for i in range(len(graphs))]
        tab_snorm_e = [ torch.FloatTensor(size,1).fill_(1./float(size)) for size in tab_sizes_e ]
        snorm_e = torch.cat(tab_snorm_e).sqrt()
        batched_graph = dgl.batch(graphs)

        return batched_graph, labels, snorm_n, snorm_e

    def _add_self_loops(self):
        
        # function for adding self loops
        # this function will be called only if self_loop flag is True
            
        self.train.graph_lists = [self_loop(g) for g in self.train.graph_lists]
        self.val.graph_lists = [self_loop(g) for g in self.val.graph_lists]
        self.test.graph_lists = [self_loop(g) for g in self.test.graph_lists]






In [7]:
with open('./data/SBMs/SBM_CLUSTER.pkl', 'rb') as f:
    data = pickle.load(f)

FileNotFoundError: [Errno 2] No such file or directory: './data/SBMs/SBM_CLUSTER.pkl'

In [None]:
def update_data(data, labels, data_type):
    if data_type == 'train':
        data[0][]

In [None]:
train = data[0].dataset

In [None]:
len(train)

In [None]:
label

In [None]:
train[0]['node_label']

In [None]:
data[0].graph_lists

In [None]:
data[0].n_samples

'nb_nodes': 117, 
'W': tensor([[0, 0, 1,  ..., 0, 1, 0],
             [0, 0, 1,  ..., 0, 0, 0],
             [1, 1, 0,  ..., 1, 1, 0],
             ...,
             [0, 0, 1,  ..., 0, 1, 0],
             [1, 0, 1,  ..., 1, 0, 0],
             [0, 0, 0,  ..., 0, 0, 0]], 
dtype=torch.int8), 
'rand_idx': tensor([ 30,  81,  78,  18, 104,  72,  22,  61,   4,   3,  31,  51,  58,  16,
                     35, 107,  88,   1,  62,  97,  46,  83,  50, 102,  70,  39,  65, 111,
                     43,  85,  21,  15,  82, 112,  41, 114,  68,  42,  13,  93,  29,  73,
                     75,  76,  25,  91,  53,  12,  77, 116,  47, 101,  37,  33,  66,  96,
                     98,  67,  84,  86,   0, 108,  40,  94,  63,  17,  24, 100,  87,  20,
                     28,  38, 106,  48,  79,  56,   2,  14,  10,  99,  45,  55,  90,  80,
                     64,  32,   5,  44,  59, 115,  95,  27,  60, 109, 103,   8,   6,  57,
                     23,  26,   9,  74,  92,  52,  54,  36,  49,  11,  19,   7, 110,  71,
                     34, 105,  89,  69, 113], 

dtype=torch.int16), 

'node_feat': tensor([0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 0, 0],
        dtype=torch.int16), 
        
        
'node_label': tensor([0, 3, 3, 0, 4, 3, 0, 2, 0, 0, 0, 2, 2, 0, 1, 5, 3, 0, 2, 4, 2, 3, 2, 4,
                     3, 1, 3, 5, 2, 3, 0, 0, 3, 5, 2, 5, 3, 2, 0, 3, 0, 3, 3, 3, 0, 3, 2, 0,
                     3, 5, 2, 4, 1, 1, 3, 4, 4, 3, 3, 3, 0, 5, 2, 4, 3, 0, 0, 4, 3, 0, 0, 1,
                     4, 2, 3, 2, 0, 0, 0, 4, 2, 2, 3, 3, 3, 0, 0, 2, 2, 5, 4, 0, 2, 5, 4, 0,
                     0, 2, 0, 0, 0, 3, 3, 2, 2, 1, 2, 0, 0, 0, 5, 3, 1, 4, 3, 3, 5],
        dtype=torch.int16)}

In [None]:
import networkx as nx

In [None]:
W = train[0]['W']
labels = train[0]['node_label'].tolist()

In [None]:
type(W.numpy())

In [None]:
W =W.numpy()

In [None]:
g_nx=nx.from_numpy_matrix(W)

In [None]:
g_nx

In [None]:
labels

In [None]:
class ProgressSmoothing:
    def __init__(self, g_nx):
        self.g_nx = g_nx
        
    def _get_weight_list(self, a, m, neighbor_list_dict):
        denominator = 0
        weight_list = [0 for _ in range(m)]
        for h in range(0, m):
            weighting = np.power(a, (m - h))
           # print(len(neighbor_list_dict[h]))
            num_nodes = len(neighbor_list_dict[h])
            weight_list[h] = weighting * num_nodes
            
#             print(weighting, "@")
#             print(num_nodes, "#")
            denominator += weighting * num_nodes
#         print(type(denominator))
#         print(type(weight_list))
        print(weight_list/denominator)
        return weight_list / denominator
    def nei_dict(self, hop_dict):
        neighbor_list_dict = {}  # neighbor_list_dict = {which_hop: [index1, index5, ....]}
        for u, h in hop_dict.items():  # hop_dict = {neighbor_id : which_hop}
            if not h in neighbor_list_dict.keys():
                n_list = [u] # include self node
                neighbor_list_dict[h] = n_list
            else:
                neighbor_list_dict[h].append(u)
        return neighbor_list_dict
    def get_neigh_smooth_weight(self, v, a):
#         hop_dict = nx.single_source_shortest_path_length(self.g_nx, v)
        hop_dict = nx.single_source_shortest_path_length(self.g_nx, v, 2)
        neighbor_list_dict = self.nei_dict(hop_dict)
#         print(neighbor_list_dict)
        m = np.max(list(neighbor_list_dict.keys()))
        weight_list = self._get_weight_list(a, m, neighbor_list_dict)
        #print(weight_list)
        nidx_weight_list = []
        for h in range(0, m):
            for u in neighbor_list_dict[h]:
                nidx_weight_list.append((int(u), weight_list[h]))
        return nidx_weight_list
    def smooth_all(self, a, labels):
        total_nidx_weight_list = []
        for v in list(g_nx.nodes):
            #print(v)
            nidx_weight_list = self.get_neigh_smooth_weight(v, a)
            #print(nidx_weight_list)
            total_nidx_weight_list.extend(nidx_weight_list)
        smoothed_labels = labels.copy()
        for u, w in total_nidx_weight_list:
            smoothed_labels[u] *= w
        return smoothed_labels
#     def save(self, output_path):
#         self.df.to_csv(output_path, index=False)
        

ps = ProgressSmoothing(g_nx=g_nx)
smoothed_labels = ps.smooth_all(2, labels)

In [None]:
labels

In [None]:
smoothed_labels

###torch.tensor(smoothed_labels)

In [None]:
with open('./data/SBMs/SBM_PATTERN.pkl', 'rb') as f:
    data_p = pickle.load(f)

In [None]:
sbm = data_p.dateset[0]

In [None]:
import numpy as np
import torch
import pickle
import time

#%matplotlib inline
import matplotlib.pyplot as plt
import scipy.sparse

def schuffle(W, c):
    # relabel the vertices at random
    idx = np.random.permutation(W.shape[0])
    # idx2=np.argsort(idx) # for index ordering wrt classes
    W_new = W[idx, :]
    W_new = W_new[:, idx]
    c_new = c[idx]
    return W_new, c_new, idx


def block_model(c, p, q):
    n = len(c)
    W = np.zeros((n, n))
    for i in range(n):
        for j in range(i + 1, n):
            if c[i] == c[j]:
                prob = p
            else:
                prob = q
            if np.random.binomial(1, prob) == 1:
                W[i, j] = 1
                W[j, i] = 1
    return W


def unbalanced_block_model(nb_of_clust, clust_size_min, clust_size_max, p, q):
    c = []
    for r in range(nb_of_clust):
        if clust_size_max == clust_size_min:
            clust_size_r = clust_size_max
        else:
            clust_size_r = np.random.randint(clust_size_min, clust_size_max, size=1)[0]
        val_r = np.repeat(r, clust_size_r, axis=0)
        c.append(val_r)
    c = np.concatenate(c)
    W = block_model(c, p, q)
    return W, c


class generate_SBM_graph():

    def __init__(self, SBM_parameters):
        # parameters
        nb_of_clust = SBM_parameters['nb_clusters']
        clust_size_min = SBM_parameters['size_min']
        clust_size_max = SBM_parameters['size_max']
        p = SBM_parameters['p']
        q = SBM_parameters['q']

        # block model
        W, c = unbalanced_block_model(nb_of_clust, clust_size_min, clust_size_max, p, q)

        # shuffle
        W, c, idx = schuffle(W, c)

        # signal on block model
        u = np.zeros(c.shape[0])
        for r in range(nb_of_clust):
            cluster = np.where(c == r)[0]
            s = cluster[np.random.randint(cluster.shape[0])]
            u[s] = r + 1

        # target
        target = c

        # convert to pytorch
        W = torch.from_numpy(W)
        W = W.to(torch.int8)
        idx = torch.from_numpy(idx)
        idx = idx.to(torch.int16)
        u = torch.from_numpy(u)
        u = u.to(torch.int16)
        target = torch.from_numpy(target)
        target = target.to(torch.int16)

        # attributes
        self.nb_nodes = W.size(0)
        self.W = W
        self.rand_idx = idx
        self.node_feat = u
        self.node_label = target


# configuration
SBM_parameters = {}
SBM_parameters['nb_clusters'] = 6
SBM_parameters['size_min'] = 5
SBM_parameters['size_max'] = 35
SBM_parameters['p'] = 0.55
SBM_parameters['q'] = 0.25
print(SBM_parameters)

data = generate_SBM_graph(SBM_parameters)

print(data)
# print(data.nb_nodes)
# print(data.W)
# print(data.rand_idx)
# print(data.node_feat)
# print(data.node_label)

W = data.W
plt.spy(W,precision=0.01, markersize=1)
plt.show()

idx = np.argsort(data.rand_idx)
W = data.W
W2 = W[idx,:]
W2 = W2[:,idx]
plt.spy(W2,precision=0.01, markersize=1)
plt.show()


class DotDict(dict):
    def __init__(self, **kwds):
        self.update(kwds)
        self.__dict__ = self


def generate_semisuperclust_dataset(nb_graphs):
    dataset = []
    for i in range(nb_graphs):
        if not i % 250:
            print(i)
        data = generate_SBM_graph(SBM_parameters)
        graph = DotDict()
        graph.nb_nodes = data.nb_nodes
        graph.W = data.W
        graph.rand_idx = data.rand_idx
        graph.node_feat = data.node_feat
        graph.node_label = data.node_label
        dataset.append(graph)
    return dataset


def plot_histo_graphs(dataset, title):
    # histogram of graph sizes
    graph_sizes = []
    for graph in dataset:
        graph_sizes.append(graph.nb_nodes)
    plt.figure(1)
    plt.hist(graph_sizes, bins=50)
    plt.title(title)
    plt.show()


def SBMs_CLUSTER(nb_graphs, name):
    dataset = generate_semisuperclust_dataset(nb_graphs)
    print(len(dataset))
    with open(name + '.pkl', "wb") as f:
        pickle.dump(dataset, f)
    plot_histo_graphs(dataset, name)



import pickle

#%load_ext autoreload
#%autoreload 2

with open('SBM_CLUSTER_train.pkl', 'rb') as f:
    data = pickle.load(f)

import networkx as nx

train = data
W_list = []
label_list = []
for data in train:
    W_list.append(data['W'])
    label_list.append(data['node_label'])


class ProgressSmoothing:
    def __init__(self, g_nx):
        self.g_nx = g_nx

    def _get_weight_list(self, a, m, neighbor_list_dict):
        denominator = 0
        weight_list = [0 for _ in range(m)]
        for h in range(0, m):
            weighting = np.power(a, (m - h))
            # print(len(neighbor_list_dict[h]))
            num_nodes = len(neighbor_list_dict[h])
            weight_list[h] = weighting * num_nodes

            #             print(weighting, "@")
            #             print(num_nodes, "#")
            denominator += weighting * num_nodes
        #         print(type(denominator))
        #         print(type(weight_list))
        #        print(weight_list/denominator)
        return weight_list / denominator

    def nei_dict(self, hop_dict):
        neighbor_list_dict = {}  # neighbor_list_dict = {which_hop: [index1, index5, ....]}
        for u, h in hop_dict.items():  # hop_dict = {neighbor_id : which_hop}
            if not h in neighbor_list_dict.keys():
                n_list = [u]  # include self node
                neighbor_list_dict[h] = n_list
            else:
                neighbor_list_dict[h].append(u)
        return neighbor_list_dict

    def get_neigh_smooth_weight(self, v, a):
        #         hop_dict = nx.single_source_shortest_path_length(self.g_nx, v)
        hop_dict = nx.single_source_shortest_path_length(self.g_nx, v, 2)
        neighbor_list_dict = self.nei_dict(hop_dict)
        #         print(neighbor_list_dict)
        m = np.max(list(neighbor_list_dict.keys()))
        weight_list = self._get_weight_list(a, m, neighbor_list_dict)
        # print(weight_list)
        nidx_weight_list = []
        for h in range(0, m):
            for u in neighbor_list_dict[h]:
                nidx_weight_list.append((int(u), weight_list[h]))
        return nidx_weight_list

    def smooth_all(self, a, labels):
        total_nidx_weight_list = []
        for v in list(g_nx.nodes):
            # print(v)
            nidx_weight_list = self.get_neigh_smooth_weight(v, a)
            # print(nidx_weight_list)
            total_nidx_weight_list.extend(nidx_weight_list)
        smoothed_labels = labels.copy()
        for u, w in total_nidx_weight_list:
            smoothed_labels[u] *= w
        return smoothed_labels


for W, labels in zip(W_list, label_list):
    # train_W =[]
    train_label = []
    W = W.numpy()
    labels = labels.numpy()
    g_nx = nx.from_numpy_matrix(W)
    ps = ProgressSmoothing(g_nx=g_nx)
    # train_W.append(W)
    train_label.append(ps.smooth_all(2, labels))

# ps = ProgressSmoothing(g_nx=g_nx)
# smoothed_labels = ps.smooth_all(2, labels)
data['node_label'] = torch.tensor(train_label)

data

start = time.time()

with open('new_SBM_CLUSTER_train.pkl', 'wb') as f:
    pickle.dump(data, f)
   

print('Time (sec):', time.time() - start)

In [None]:
test_val = np.zeros(10)
a = torch.tensor(test_val)
print(a)


UnsupportedOperation: read

In [19]:
with open('SBM_CLUSTER_train.pkl', 'rb') as f:
    data = pickle.load(f)



In [20]:
 
data


[{'nb_nodes': 109,
  'W': tensor([[0, 1, 0,  ..., 0, 0, 1],
          [1, 0, 0,  ..., 0, 1, 0],
          [0, 0, 0,  ..., 1, 1, 0],
          ...,
          [0, 0, 1,  ..., 0, 0, 0],
          [0, 1, 1,  ..., 0, 0, 0],
          [1, 0, 0,  ..., 0, 0, 0]], dtype=torch.int8),
  'rand_idx': tensor([ 13,  59,  39,  58,  25, 105,   0,  26,  96,   7,  93,  84,  11,  36,
          106,  51,  20,  63,  90,  53,  83,  40,  15,  12, 100, 103,  27,  71,
           70,  38, 104,  68, 107, 108,  67,  75,  77,  57,  74,  64,  50,  37,
           43,  60,  41,   4,   9,  69,  42,  76,  89,  97,  47,   5,  24,  21,
           88,  65,  66,  46,  17,  48, 102,  72,  85,  45,  29,  52,  86,  23,
           79,  34,  99,  16,  94,  73,  18,  19,   6,  55,   3,  14,  28,  95,
           22,  98,  10,  33,  44,   8,  78,   2,  91,  56,  92, 101,  30,  49,
           80,  82,  87,  35,  32,  31,   1,  62,  81,  54,  61],
         dtype=torch.int16),
  'node_feat': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [28]:
W_lists = list(map(lambda d: d['W'].numpy(), data))
rand_idx_lists = list(map(lambda d: d['rand_idx'], data))
node_feat_lists = list(map(lambda d: d['node_feat'], data))
node_label_lists = list(map(lambda d: d['node_label'], data))

In [29]:
W_lists


[array([[0, 1, 0, ..., 0, 0, 1],
        [1, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 1, 1, 0],
        ...,
        [0, 0, 1, ..., 0, 0, 0],
        [0, 1, 1, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0]], dtype=int8),
 array([[0, 0, 0, ..., 1, 0, 1],
        [0, 0, 0, ..., 1, 0, 0],
        [0, 0, 0, ..., 0, 1, 0],
        ...,
        [1, 1, 0, ..., 0, 0, 0],
        [0, 0, 1, ..., 0, 0, 1],
        [1, 0, 0, ..., 0, 1, 0]], dtype=int8),
 array([[0, 1, 1, ..., 0, 0, 0],
        [1, 0, 0, ..., 1, 1, 0],
        [1, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 1, 0, ..., 0, 0, 0],
        [0, 1, 0, ..., 0, 0, 1],
        [0, 0, 0, ..., 0, 1, 0]], dtype=int8),
 array([[0, 0, 1, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 1, 0, 0],
        ...,
        [0, 0, 1, ..., 0, 0, 1],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 1, 0, 0]], dtype=int8),
 array([[0, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 1, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
 

In [None]:

W_lists



In [None]:
train = data
for data in train:
    W_list = []
    label_list = []
    W_list.append(data['W'])
    label_list.append(data['node_label'])

In [13]:
W_list

list(filter(lambda person: person['name'] == 'Pam', people))

list(filter(lambda person: person['name'] == 'Pam', people))

[tensor([[0, 0, 1,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 1, 1],
         [1, 0, 0,  ..., 0, 0, 1],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 1, 0,  ..., 0, 0, 0],
         [0, 1, 1,  ..., 0, 0, 0]], dtype=torch.int8)]

In [None]:
country= ["Japan", "Malaysia", "Philippine", "Thailand"]
capital = ["Tokyo", "Kuala Lumpur", "Manila", "Bangkok"]
currency = ["Yen", "Ringgit", "Peso", "Bath"]

data = [{"country":"Japan", "capital":"Tokyo", "currency":"Yen"}, 
        {"country":"Malaysia", "capital":"Kuala Lumpur", "currency":"Ringgit"}, 
        {"country":"Philippine", "capital":"Manila", "currency":"Peso"},
        {"country":"Thailand", "capital":"Bangkok", "currency":"Bath"}]

data = [{'country':coun, 'capital': cap, 'currency': curr} 
        for coun, cap, curr in zip(country, capital, currency)]

data = [{'W':W, 'rand_idx': rand_idx, 'node_feat': node_feat, 'node_label': node_label} 
        for W, rand_idx, node_feat, node_label in zip(W_list, rand_idx_list, node_feat_list, node_label_list)]


Capital of Japan is Tokyo, and the currency is Yen.
Capital of Malaysia is Kuala Lumpur, and the currency is Ringgit.
Capital of Philippine is Manila, and the currency is Peso.
Capital of Thailand is Bangkok, and the currency is Bath.




for dictionary in data:
    print("Capital of {0} is {1} and currency is {2}".
          format(dictionary['country'],dictionary['capital'], dictionary['currency']))
    
    
    

In [None]:
with open('SBM_CLUSTER_train.pkl', 'rb') as f:
    data1 = pickle.load(f)