In [1]:
import os
import torch
from torch.utils.data import Dataset
import numpy as np
import collections
import csv
import random
import pickle
from torch.utils.data import DataLoader
import dgl
import pandas as pd

class Subgraphs(Dataset):


    def __init__(self, root, mode, subgraph_list, subgraph2label, subgraph2center_node):   

        self.subgraph2label = subgraph2label
        self.subgraph_list = subgraph_list
        self.subgraph2center_node = subgraph2center_node
        
        self.data = pd.read_csv(os.path.join(root, mode + '.csv'))  # csv path
    
    def __getitem__(self, index):
        
        return self.subgraph_list[self.data.iloc[index]['name']], self.subgraph2label[self.data.iloc[index]['name']], self.subgraph2center_node[self.data.iloc[index]['name']] 

    def __len__(self):
        # as we have built up to batchsz of sets, you can sample some small batch size of sets.
        return len(self.data)
    
def collate(samples):
    # The input `samples` is a list of pairs
    #  (graph, label).
    graphs, labels, center_nodes = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
    return batched_graph, torch.LongTensor(labels), torch.LongTensor(center_nodes)


In [2]:
class Subgraphs_test(Dataset):


    def __init__(self, root, mode, subgraph_list, subgraph2label, subgraph2center_node, k_shot, n_qry):   

        self.subgraph2label = subgraph2label
        self.subgraph_list = subgraph_list
        self.subgraph2center_node = subgraph2center_node
        
        self.data = pd.read_csv(os.path.join(root, mode + '.csv'))  # csv path
        self.k_shot = k_shot
        self.n_qry = n_qry
        
        self.labels = np.unique(self.data.label.values)
        self.labels_dict = dict(zip(list(self.labels), list(range(len(self.labels)))))
        
    def __getitem__(self, index):
        support = []
        query = []
        
        for i in self.labels:
            df_labels = self.data[self.data.label == i].reset_index(drop = True)
            support = support + list(df_labels.sample(n = self.k_shot)['name'].values)
            query = query + list(df_labels[~df_labels.name.isin(support)].sample(n = self.n_qry)['name'].values)
        
        return [self.subgraph_list[i] for i in support], [self.labels_dict[self.subgraph2label[i]] for i in support], [self.subgraph2center_node[i] for i in support], [self.subgraph_list[i] for i in query], [self.labels_dict[self.subgraph2label[i]] for i in query], [self.subgraph2center_node[i] for i in query]

    def __len__(self):
        # 100 "batches"
        return 100
    
def collate_test(samples):
    # The input `samples` is a list of pairs
    #  (graph, label).
    
    graphs, labels, center_nodes, g_qry, l_qry, c_qry = map(list, zip(*samples))
    batched_graph_spt = dgl.batch(graphs[0])
    batched_graph_qry = dgl.batch(g_qry[0])
    
    return batched_graph_spt, torch.LongTensor(labels), torch.LongTensor(center_nodes), batched_graph_qry, torch.LongTensor(l_qry), torch.LongTensor(c_qry)


In [3]:
import dgl.function as fn
import torch
import torch.nn as nn


# Sends a message of node feature h.
msg = fn.copy_src(src='h', out='m')

def reduce(nodes):
    """Take an average over all neighbor node features hu and use it to
    overwrite the original node feature."""
    accum = torch.mean(nodes.mailbox['m'], 1)
    return {'h': accum}

class NodeApplyModule(nn.Module):
    """Update the node feature hv with ReLU(Whv+b)."""
    def __init__(self, in_feats, out_feats, activation):
        super(NodeApplyModule, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)
        self.activation = activation

    def forward(self, node):
        h = self.linear(node.data['h'])
        h = self.activation(h)
        return {'h' : h}

class GCN(nn.Module):
    def __init__(self, in_feats, out_feats, activation):
        super(GCN, self).__init__()
        self.apply_mod = NodeApplyModule(in_feats, out_feats, activation)

    def forward(self, g, feature):
        # Initialize the node features with h.
        g.ndata['h'] = feature
        g.update_all(msg, reduce)
        g.apply_nodes(func=self.apply_mod)
        return g.ndata.pop('h')

In [4]:
import copy

In [5]:
import torch.nn.functional as F


class Classifier(nn.Module):
    def __init__(self, in_dim, hidden_dim, n_classes):
        super(Classifier, self).__init__()

        self.layers = nn.ModuleList([
            GCN(in_dim, hidden_dim, F.relu),
            GCN(hidden_dim, hidden_dim, F.relu)])
        self.classify = nn.Linear(hidden_dim, n_classes)

    def forward(self, g, to_fetch, features):
        # For undirected graphs, in_degree is the same as
        # out_degree.
        h = g.in_degrees().view(-1, 1).float().to(device)
        #h = torch.tensor([1.]*g.number_of_nodes()).view(-1, 1).float()

        #h = features.float()
        for conv in self.layers:
            h = conv(g, h)
        g.ndata['h'] = h
        #print(h.shape)
        #hg = dgl.mean_nodes(g, 'h')
        #print(to_fetch)
        num_nodes_ = g.batch_num_nodes
        num_nodes_.insert(0, 0)
        offset = torch.cumsum(torch.LongTensor(num_nodes_), dim = 0)[:-1].to(device)
        hg = h[to_fetch + offset]
        #print(hg.shape)
        #print(hg.shape)
        #print(h[0].shape)
        #hg = h[g.nodes[0].data['center_node'].detach().numpy()[0]]
        #print(hg.shape)
        return hg, self.classify(hg)

In [6]:
from torch.utils.data import DataLoader
import torch.optim as optim
path = './data/multiple_graph/cycle/META_LABEL/random_edge100/'
with open(path + 'list_subgraph.pkl', 'rb') as f:
    total_subgraph = pickle.load(f)
    
with open(path + 'label.pkl', 'rb') as f:
    info = pickle.load(f)

with open(path + 'center.pkl', 'rb') as f:
    center_node = pickle.load(f)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
class Classifier_Finetune(nn.Module):
    def __init__(self, hidden_dim, n_classes):
        super(Classifier_Finetune, self).__init__()

        self.classify = nn.Linear(hidden_dim, n_classes)

    def forward(self, model, g, to_fetch, features):
        
        h, _ = model(g, to_fetch, features)
        
        return self.classify(h)

In [11]:
def run(fold_n):
    path = './data/multiple_graph/cycle/META_LABEL/random_edge100/'

    path = path + 'fold'+str(fold_n)+'/'
    trainset = Subgraphs(path, 'train', total_subgraph, info, center_node)
    valset = Subgraphs(path, 'val', total_subgraph, info, center_node)
    testset = Subgraphs_test(path, 'test', total_subgraph, info, center_node, 1, 12)

    data_loader_test = DataLoader(testset, batch_size=1, shuffle=True, collate_fn=collate_test)

    data_loader = DataLoader(trainset, batch_size=64, shuffle=True, collate_fn=collate)
    
    model = Classifier(1, 256, max(info.values()) + 1)
    model.to(device)
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)
    model.train()
    
    print('model training')
    for epoch in range(50):
        epoch_loss = 0
        for iter, (bg, label, to_fetch) in enumerate(data_loader):
            bg = bg.to(device)
            label = label.to(device)
            features = 1
            hid, prediction = model(bg, to_fetch, features)
            #print(prediction.shape)
            loss = loss_func(prediction, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.detach().item()
        epoch_loss /= (iter + 1)
        
        if epoch%25 ==0:
            print('Epoch {}, training loss {:.4f}'.format(epoch, epoch_loss))
    
    print('model finetuning')
    accs = []
    for iter, (bg, label, to_fetch, bg_qry, label_qry, to_fetch_qry) in enumerate(data_loader_test):
        model_finetune = Classifier_Finetune(256, 2)
        to_fetch = torch.LongTensor(to_fetch.reshape(-1,))
        label = label.reshape(-1,)

        to_fetch_qry = torch.LongTensor(to_fetch_qry.reshape(-1,))
        label_qry = label_qry.reshape(-1,)    

        update_step = 10
        loss_steps = []
        acc_steps = []

        for i in range(update_step):
            model_finetune.train()
            bg_ = copy.deepcopy(bg)
            prediction = model_finetune(model, bg_, to_fetch, 1)
            loss = loss_func(prediction, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            model_finetune.eval()
            bg_ = copy.deepcopy(bg_qry)
            pred_qry = model_finetune(model, bg_, to_fetch_qry, 1)
            probs_Y = torch.softmax(pred_qry, 1)
            argmax_Y = torch.max(probs_Y, 1)[1].view(-1, 1)

            label_q = torch.tensor(label_qry).float().view(-1, 1)
            acc = (label_q == argmax_Y.detach().cpu().float()).sum().item() / len(label_q) * 100

            acc_steps.append(acc)
            loss_steps.append(loss.item())
        accs.append(acc_steps)
        if iter%20 == 0:
            print('step ', iter)
    print(np.mean(np.array(accs), 0))
    
    
    print('model KNN')
    accs = []
    for iter, (bg, label, to_fetch, bg_qry, label_qry, to_fetch_qry) in enumerate(data_loader_test):
        to_fetch = torch.LongTensor(to_fetch.reshape(-1,))
        label = label.reshape(-1,)
        
        to_fetch_qry = torch.LongTensor(to_fetch_qry.reshape(-1,))
        label_qry = label_qry.reshape(-1,) 
        
        bg_ = copy.deepcopy(bg)
        h, prediction = model(bg_, to_fetch, 1)
        
        bg_ = copy.deepcopy(bg_qry)
        h_qry, pred_qry = model(bg_, to_fetch_qry, 1)
        
        from sklearn.metrics import pairwise_distances
        
        dist = pairwise_distances(h.detach().numpy(), h_qry.detach().numpy())
        
        y_pred = np.argmin(dist, 0).reshape(-1,)
        
        label_dict = dict(zip(label.numpy(), list(range(label.numpy().reshape(-1,).shape[0]))))
        y_pred = np.array([label_dict[i] for i in y_pred])
        #print(y_pred.shape)
        label_q = label_qry.float().view(-1, ).numpy()

        acc = sum(label_q == y_pred) / len(label_q) * 100

        accs.append(acc)
        
        if iter%20 == 0:
            print('step ', iter)
            
    print(np.mean(np.array(accs)))

In [12]:
run(1)

model training
Epoch 0, training loss 1.9445
Epoch 25, training loss 1.0477
model finetuning




step  0
step  20
step  40
step  60
step  80
[51.375      51.83333333 52.5        54.20833333 54.875      55.95833333
 56.54166667 58.54166667 60.20833333 61.58333333]
model KNN
step  0
step  20
step  40
step  60
step  80
85.375


In [14]:
run(2)

model training
Epoch 0, training loss 1.9366
Epoch 25, training loss 1.0458
model finetuning




step  0
step  20
step  40
step  60
step  80
[53.45833333 53.625      54.83333333 55.625      56.625      57.625
 59.83333333 61.58333333 62.625      64.79166667]
model KNN
step  0
step  20
step  40
step  60
step  80
84.5


In [15]:
run(3)

model training
Epoch 0, training loss 1.9064
Epoch 25, training loss 0.9786
model finetuning




step  0
step  20
step  40
step  60
step  80
[47.66666667 48.29166667 49.5        52.29166667 55.33333333 60.375
 63.29166667 66.66666667 68.83333333 70.58333333]
model KNN
step  0
step  20
step  40
step  60
step  80
82.58333333333334


In [16]:
run(4)

model training
Epoch 0, training loss 1.8962
Epoch 25, training loss 1.0351
model finetuning
step  0




step  20
step  40
step  60
step  80
[48.16666667 48.16666667 48.29166667 48.125      48.70833333 48.83333333
 49.         50.25       51.25       51.625     ]
model KNN
step  0
step  20
step  40
step  60
step  80
74.58333333333334


In [17]:
run(5)

model training
Epoch 0, training loss 1.9217
Epoch 25, training loss 0.9944
model finetuning




step  0
step  20
step  40
step  60
step  80
[50.         49.91666667 50.04166667 49.875      50.41666667 50.75
 51.79166667 51.5        51.29166667 51.08333333]
model KNN
step  0
step  20
step  40
step  60
step  80
49.58333333333334


In [18]:
def run_BA(fold_n):
    path = './data/multiple_graph/BA/META_LABEL/'
    
    with open(path + 'list_subgraph.pkl', 'rb') as f:
        total_subgraph = pickle.load(f)
    
    with open(path + 'label.pkl', 'rb') as f:
        info = pickle.load(f)

    with open(path + 'center.pkl', 'rb') as f:
        center_node = pickle.load(f)
    
    path = path + 'fold'+str(fold_n)+'/'
    trainset = Subgraphs(path, 'train', total_subgraph, info, center_node)
    valset = Subgraphs(path, 'val', total_subgraph, info, center_node)
    testset = Subgraphs_test(path, 'test', total_subgraph, info, center_node, 1, 12)

    data_loader_test = DataLoader(testset, batch_size=1, shuffle=True, collate_fn=collate_test)

    data_loader = DataLoader(trainset, batch_size=64, shuffle=True, collate_fn=collate)
    
    model = Classifier(1, 256, max(info.values()) + 1)
    model.to(device)
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)
    model.train()
    
    print('model training')
    for epoch in range(50):
        epoch_loss = 0
        for iter, (bg, label, to_fetch) in enumerate(data_loader):
            bg = bg.to(device)
            label = label.to(device)
            features = 1
            hid, prediction = model(bg, to_fetch, features)
            #print(prediction.shape)
            loss = loss_func(prediction, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.detach().item()
        epoch_loss /= (iter + 1)
        
        if epoch%25 ==0:
            print('Epoch {}, training loss {:.4f}'.format(epoch, epoch_loss))
    
    print('model finetuning')
    accs = []
    for iter, (bg, label, to_fetch, bg_qry, label_qry, to_fetch_qry) in enumerate(data_loader_test):
        model_finetune = Classifier_Finetune(256, 2)
        to_fetch = torch.LongTensor(to_fetch.reshape(-1,))
        label = label.reshape(-1,)

        to_fetch_qry = torch.LongTensor(to_fetch_qry.reshape(-1,))
        label_qry = label_qry.reshape(-1,)    

        update_step = 10
        loss_steps = []
        acc_steps = []

        for i in range(update_step):
            model_finetune.train()
            bg_ = copy.deepcopy(bg)
            prediction = model_finetune(model, bg_, to_fetch, 1)
            loss = loss_func(prediction, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            model_finetune.eval()
            bg_ = copy.deepcopy(bg_qry)
            pred_qry = model_finetune(model, bg_, to_fetch_qry, 1)
            probs_Y = torch.softmax(pred_qry, 1)
            argmax_Y = torch.max(probs_Y, 1)[1].view(-1, 1)

            label_q = torch.tensor(label_qry).float().view(-1, 1)
            acc = (label_q == argmax_Y.detach().cpu().float()).sum().item() / len(label_q) * 100

            acc_steps.append(acc)
            loss_steps.append(loss.item())
        accs.append(acc_steps)
        if iter%20 == 0:
            print('step ', iter)
    print(np.mean(np.array(accs), 0))
    
    
    print('model KNN')
    accs = []
    for iter, (bg, label, to_fetch, bg_qry, label_qry, to_fetch_qry) in enumerate(data_loader_test):
        to_fetch = torch.LongTensor(to_fetch.reshape(-1,))
        label = label.reshape(-1,)
        
        to_fetch_qry = torch.LongTensor(to_fetch_qry.reshape(-1,))
        label_qry = label_qry.reshape(-1,) 
        
        bg_ = copy.deepcopy(bg)
        h, prediction = model(bg_, to_fetch, 1)
        
        bg_ = copy.deepcopy(bg_qry)
        h_qry, pred_qry = model(bg_, to_fetch_qry, 1)
        
        from sklearn.metrics import pairwise_distances
        
        dist = pairwise_distances(h.detach().numpy(), h_qry.detach().numpy())
        
        y_pred = np.argmin(dist, 0).reshape(-1,)
        
        label_dict = dict(zip(label.numpy(), list(range(label.numpy().reshape(-1,).shape[0]))))
        y_pred = np.array([label_dict[i] for i in y_pred])
        #print(y_pred.shape)
        label_q = label_qry.float().view(-1, ).numpy()

        acc = sum(label_q == y_pred) / len(label_q) * 100

        accs.append(acc)
        
        if iter%20 == 0:
            print('step ', iter)
            
    print(np.mean(np.array(accs)))

In [19]:
run_BA(1)

model training
Epoch 0, training loss 1.7288
Epoch 25, training loss 0.2990
model finetuning




step  0
step  20
step  40
step  60
step  80
[51.83333333 53.16666667 55.08333333 56.41666667 56.5        58.16666667
 60.41666667 63.         64.25       65.95833333]
model KNN
step  0
step  20
step  40
step  60
step  80
79.95833333333334


In [20]:
run_BA(2)

model training
Epoch 0, training loss 2.0427
Epoch 25, training loss 1.1083
model finetuning




step  0
step  20
step  40
step  60
step  80
[51.33333333 53.         54.5        56.58333333 57.25       60.83333333
 62.58333333 63.41666667 64.83333333 67.16666667]
model KNN
step  0
step  20
step  40
step  60
step  80
94.875


In [21]:
run_BA(3)

model training
Epoch 0, training loss 2.9276
Epoch 25, training loss 0.7290
model finetuning




step  0
step  20
step  40
step  60
step  80
[47.41666667 48.125      47.625      49.5        52.375      54.70833333
 56.375      58.75       60.         61.79166667]
model KNN
step  0
step  20
step  40
step  60
step  80
53.25000000000001


In [22]:
run_BA(4)

model training
Epoch 0, training loss 1.9972
Epoch 25, training loss 0.6662
model finetuning




step  0
step  20
step  40
step  60
step  80
[48.95833333 49.375      49.54166667 49.75       49.95833333 50.5
 51.45833333 52.41666667 52.95833333 53.79166667]
model KNN
step  0
step  20
step  40
step  60
step  80
59.66666666666666


In [23]:
run_BA(5)

model training
Epoch 0, training loss 3.0090
Epoch 25, training loss 1.0970
model finetuning




step  0
step  20
step  40
step  60
step  80
[50.         50.91666667 52.70833333 54.83333333 57.16666667 58.33333333
 60.08333333 62.79166667 64.375      66.29166667]
model KNN
step  0
step  20
step  40
step  60
step  80
96.875
