In [55]:
import mlflow
logged_model = 'runs:/d6cb4eb7061147bea0b2725787bcede1/student_modelBZR'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd

In [53]:
!python deploy_model.py --DS BZR

DS: BZR
Total graphs: 405
[(1, 0.004006451), (1, 0.0039517013), (1, 0.0037293725), (1, 0.0040329676), (1, 0.0042329244), (1, 0.004239408), (1, 0.0042610182), (0, 0.0039714174), (1, 0.0038657787), (1, 0.0037442502), (1, 0.0035977638), (1, 0.0037429663), (1, 0.0042789415), (1, 0.0042074188), (0, 0.004628119), (1, 0.004209623), (1, 0.00402508), (1, 0.0040799556), (0, 0.0038537085), (0, 0.0038617295), (1, 0.004058336), (0, 0.0052948305), (1, 0.003928732), (1, 0.003946598), (1, 0.003943426), (1, 0.0038537455), (1, 0.003878168), (1, 0.0032854667), (1, 0.0036645131), (1, 0.0042365124), (1, 0.0038207509), (1, 0.003815301), (1, 0.0038383815), (1, 0.0040673255), (1, 0.0040504565), (1, 0.0040385327), (1, 0.0040565673), (1, 0.0040578027), (1, 0.0038832796), (1, 0.0038626892), (1, 0.0039943634), (1, 0.0039542574), (1, 0.0040314132), (1, 0.0039288746), (1, 0.0040574484), (0, 0.003920524), (1, 0.0042044194), (0, 0.003925094), (0, 0.0038090833), (0, 0.0038072362), (0, 0.0039127814), (0, 0.003915913), 

In [4]:
import numpy as np
from sklearn.utils.random import sample_without_replacement
from sklearn.metrics import auc, precision_recall_curve, roc_curve
from sklearn.svm import OneClassSVM
import argparse
import load_data
import networkx as nx
from GCN_embedding import GcnEncoderGraph_teacher, GcnEncoderGraph_student
import torch
import torch.nn as nn
import time
import GCN_embedding
from torch.autograd import Variable
from graph_sampler import GraphSampler
from numpy.random import seed
import random
import matplotlib.pyplot as plt
import copy
import torch.nn.functional as F
from sklearn.manifold import TSNE
from matplotlib import cm
from tdc.utils import retrieve_label_name_list
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
import mlflow
from util import sce_loss

In [5]:
def arg_parse():
    parser = argparse.ArgumentParser(description='GLocalKD Arguments.')
    parser.add_argument('--datadir', dest='datadir', default ='dataset', help='Directory where benchmark is located')
    parser.add_argument('--DS', dest='DS', default ='AIDS', help='dataset name')
    parser.add_argument('--max-nodes', dest='max_nodes', type=int, default=0, help='Maximum number of nodes (ignore graghs with nodes exceeding the number.')
    parser.add_argument('--clip', dest='clip', default=0.1, type=float, help='Gradient clipping.')
    parser.add_argument('--num_epochs', dest='num_epochs', default=150, type=int, help='total epoch number')
    parser.add_argument('--batch-size', dest='batch_size', default=300, type=int, help='Batch size.')
    parser.add_argument('--hidden-dim', dest='hidden_dim', default=512, type=int, help='Hidden dimension')
    parser.add_argument('--output-dim', dest='output_dim', default=256, type=int, help='Output dimension')
    parser.add_argument('--num-gc-layers', dest='num_gc_layers', default=3, type=int, help='Number of graph convolution layers before each pooling')
    parser.add_argument('--nobn', dest='bn', action='store_const', const=False, default=True, help='Whether batch normalization is used')
    parser.add_argument('--dropout', dest='dropout', default=0.3, type=float, help='Dropout rate.')
    parser.add_argument('--nobias', dest='bias', action='store_const', const=False, default=True, help='Whether to add bias. Default to True.')
    parser.add_argument('--feature', dest='feature', default='default', help='use what node feature')
    parser.add_argument('--seed', dest='seed', type=int, default=1, help='seed')
    return parser.parse_args()

def setup_seed(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     np.random.seed(seed)
     random.seed(seed)
     torch.backends.cudnn.deterministic = True

In [31]:
def test(data_test_loader, model_teacher, model_student): 

    auroc_final = 0
    model_student.eval()   
    loss = []
    y=[]
    emb=[]
    
    for batch_idx, data in enumerate(data_test_loader):
        adj = Variable(data['adj'].float(), requires_grad=False).to(device)
        h0 = Variable(data['feats'].float(), requires_grad=False).to(device)
                
        embed_node, embed = model_student(h0, adj)
        embed_teacher_node, embed_teacher = model_teacher(h0, adj)
        loss_node = torch.mean(F.mse_loss(embed_node, embed_teacher_node, reduction='none'), dim=2).mean(dim=1).mean(dim=0)
        loss_graph = F.mse_loss(embed, embed_teacher, reduction='none').mean(dim=1).mean(dim=0)
        loss_ = loss_graph + loss_node
        loss_ = np.array(loss_.cpu().detach())
        loss.append(loss_)
        if data['label'] == 0:
            y.append(1)
        else:
            y.append(0)    
        emb.append(embed.cpu().detach().numpy())
                            
    label_test = []
    for loss_ in loss:
        label_test.append(loss_)
    label_test = np.array(label_test)
                            
    fpr_ab, tpr_ab, _ = roc_curve(y, label_test)
    print(list(zip(y, label_test)))
    test_roc_ab = auc(fpr_ab, tpr_ab)   
    print('semi-supervised abnormal detection: auroc_ab: {}'.format(test_roc_ab))
    return auroc_final

In [62]:
mlflow.set_experiment("GraphAD")
experiment = mlflow.get_experiment_by_name("GraphAD")

import mlflow.pytorch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

args = {'max_nodes': 0}
# DS = f'{args.DS}'

# print(f'DS: {DS}')
# setup_seed(args.seed)

graphs = load_data.read_graphfile("dataset", "BZR", max_nodes=0)  
datanum = len(graphs)
if args.get('max_nodes') == 0:
    max_nodes_num = max([G.number_of_nodes() for G in graphs])
else:
    max_nodes_num = args.get('max_nodes')
print(f'Total graphs: {datanum}')
graphs_label = [graph.graph['label'] for graph in graphs]

Total graphs: 405


In [61]:
model_name = "student_model_registeredBZR"
model_version = 1

model_student = mlflow.pytorch.load_model(
    model_uri=f"models:/{model_name}/{model_version}"
)

model_name_t = "teacher_model_registeredBZR"
model_version_t = 1

model_teacher = mlflow.pytorch.load_model(
    model_uri=f"models:/{model_name_t}/{model_version_t}"
)

In [63]:
kfd=StratifiedKFold(n_splits=5, shuffle = True)

In [65]:
train_index = []
test_index = []
for k, (train_index, test_index) in enumerate(kfd.split(graphs, graphs_label)):
#     print(k, train_index, f'test_index: {[str(jk) + "-" + str(graphs_label[jk]) for jk in test_index]}')
    print(k, train_index,  test_index)
    print(list(test_index))
    break
    
# '229-1', '236-1', '237-1', '244-1', '250-0', '254-0', '266-0', '271-0', '278-1', '279-1', '282-1', '287-1', '308-1',
# '311-0', '320-0', '331-0', '337-0', '340-0', '343-1', '344-0', '353-1', '357-1', '363-0', '365-0', '368-0', '371-0',
# '376-0', '384-1', '387-0', '390-1', '397-1'

0 [  1   3   4   5   6   8   9  10  11  12  16  17  19  20  21  22  23  26
  27  28  29  30  31  32  33  35  36  37  38  39  40  41  42  43  44  45
  46  48  49  51  52  53  54  55  56  57  59  60  61  62  64  65  66  67
  68  69  70  71  72  73  74  75  76  77  81  83  84  85  86  87  88  90
  91  92  93  94  96  98 100 101 102 103 104 105 107 108 110 111 112 113
 114 115 116 117 118 119 121 122 123 124 125 126 127 128 129 130 131 132
 133 134 136 137 138 139 141 143 144 145 148 149 150 151 152 153 154 155
 158 159 160 161 162 163 164 165 167 168 170 172 173 174 175 176 177 178
 179 180 181 183 185 186 187 188 189 190 191 192 194 195 197 198 202 203
 204 205 206 207 208 209 210 211 212 213 215 216 217 218 219 220 221 222
 223 224 225 226 227 228 231 232 237 238 239 240 241 242 243 244 245 247
 248 249 251 252 253 256 257 258 259 260 261 262 263 265 266 267 268 269
 270 271 273 274 275 276 277 278 279 282 283 284 285 286 289 290 291 293
 294 295 296 297 298 299 302 303 304 306 308 310 

In [66]:
# test_index = [1989]

test_index = [0, 4, 5, 7, 19, 24, 29, 32, 33, 39, 50, 58, 62, 63, 82, 85, 91, 99, 101, 104, 108, 112, 114, 115, 127, 128, 133,
              134, 142, 145, 148, 162, 163, 168, 169, 180, 186, 190, 203, 205, 215, 222, 226, 232, 233, 234, 235, 239, 240, 245, 251, 252, 253, 255, 258, 265, 270, 276, 287, 288, 291, 295, 296, 297, 300, 310, 316, 320, 325, 327, 342, 349, 350, 362, 374, 378, 383, 384, 396, 401, 402]

# graphs_train_ = [graphs[i] for i in train_index]
graphs_test = [graphs[i] for i in test_index]


In [67]:
result_auc=[]
# for k, (train_index,test_index) in enumerate(kfd.split(graphs, graphs_label)):
#     graphs_train_ = [graphs[i] for i in train_index]
# graphs_test = [graphs[i] for i in test_index]

#     graphs_train = []
#     for graph in graphs_train_:
#         if graph.graph['label'] != 0:
#             graphs_train.append(graph)


#     num_train = len(graphs_train)
#     num_test = len(graphs_test)
#     print(num_train, num_test)

dataset_sampler_test = GraphSampler(graphs_test, features='default', normalize=False, max_num_nodes=max_nodes_num)
data_test_loader = torch.utils.data.DataLoader(dataset_sampler_test, 
                                                shuffle=False,
                                                batch_size=1)
result =  (data_test_loader, model_teacher, model_student)     
# result_auc.append(result)
print(result)

# result_auc = np.array(result_auc)    
# auc_avg = np.mean(result_auc)
# auc_std = np.std(result_auc)
# print('auroc{}, average: {}, std: {}'.format(result_auc, auc_avg, auc_std))


[(1, 0.004248675), (1, 0.0042414484), (1, 0.004025937), (1, 0.0042294026), (1, 0.0039534336), (1, 0.0041608256), (1, 0.0037293725), (1, 0.003983832), (1, 0.0040952903), (1, 0.0042329244), (1, 0.0042449613), (1, 0.003843474), (0, 0.0037630645), (1, 0.0036964335), (0, 0.004251176), (1, 0.0040000924), (1, 0.004467126), (0, 0.0038617295), (1, 0.003908807), (0, 0.004537329), (1, 0.004444516), (1, 0.0040260153), (1, 0.004050011), (1, 0.003946598), (1, 0.0039716996), (1, 0.003877414), (1, 0.0038537455), (1, 0.0036769977), (1, 0.0039170412), (1, 0.003916952), (1, 0.003908207), (1, 0.004064182), (1, 0.0040504565), (1, 0.0040922803), (1, 0.0040578027), (1, 0.0039943634), (1, 0.0041703833), (1, 0.004045112), (1, 0.0039070705), (0, 0.0038943528), (1, 0.0039312514), (1, 0.0036507114), (0, 0.003925094), (0, 0.0036367557), (1, 0.005145555), (1, 0.005179301), (0, 0.0039127814), (0, 0.004153766), (0, 0.0037588878), (0, 0.003695527), (1, 0.0044848793), (1, 0.0041781096), (1, 0.0043243864), (1, 0.0044128

In [54]:
!python main.py --DS BZR

DS: BZR
Total graphs: 405
69 81
[(1, 0.007970339), (1, 0.008080216), (1, 0.008575026), (1, 0.008199787), (1, 0.0080187265), (1, 0.008015599), (1, 0.008066808), (0, 0.008097487), (1, 0.008505048), (1, 0.008112711), (1, 0.008346773), (1, 0.008520313), (1, 0.008434446), (1, 0.007956653), (0, 0.008590241), (1, 0.00799224), (1, 0.007978978), (1, 0.008000738), (0, 0.008029713), (0, 0.008292277), (1, 0.008041385), (0, 0.0091774), (1, 0.0080980575), (1, 0.008134931), (1, 0.008234943), (1, 0.008308126), (1, 0.008103153), (1, 0.009250442), (1, 0.008535922), (1, 0.008028246), (1, 0.008173814), (1, 0.008169668), (1, 0.00811972), (1, 0.007866517), (1, 0.00785544), (1, 0.007837372), (1, 0.008456616), (1, 0.00845078), (1, 0.008796893), (1, 0.0087869745), (1, 0.008830386), (1, 0.008927103), (1, 0.008614138), (1, 0.008965741), (1, 0.008707566), (0, 0.008094705), (1, 0.008664601), (0, 0.008900249), (0, 0.00828347), (0, 0.008276843), (0, 0.0074739913), (0, 0.0076042074), (1, 0.008970333), (1, 0.009157831

Successfully registered model 'teacher_model_registeredBZR'.
2023/02/17 12:22:32 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: teacher_model_registeredBZR, version 1
Created version '1' of model 'teacher_model_registeredBZR'.
Successfully registered model 'student_model_registeredBZR'.
2023/02/17 12:22:38 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: student_model_registeredBZR, version 1
Created version '1' of model 'student_model_registeredBZR'.
