In [4]:
import numpy as np
from os.path import join
data_path = join('./','data')
# emb
with open(join(data_path,'t1.emb')) as f:
    num_nodes, D = f.readline().strip().split(' ')
    num_nodes = int(num_nodes)
    D = int(D)
    
    ls = f.readlines()
node_emb_dict = {}
for l in ls:
    buf = l.strip().split(' ')
    node_id, emb = int(buf[0]), buf[1:]
    x = np.asarray([float(i) for i in emb], dtype=np.float32)
    node_emb_dict[node_id] = x
    
# training data
with open(join(data_path,'t1-merge.txt')) as f:
    ls = f.readlines()
N = len(ls)
X = []
adj_mat = np.zeros([N,N], dtype=np.uint8)
for l in ls:
    buf = l.strip().split(' ')
    src, dst = int(buf[0]), int(buf[1])
    adj_mat[src, dst] = 1
    fea = np.concatenate([node_emb_dict[src], node_emb_dict[dst]], axis=-1)
    X.append(fea)
X = np.vstack(X)

# test data
with open(join(data_path,'t1-test.txt')) as f:
    ls = f.readlines()
N2 = len(ls)
test_X = []
for l in ls:
    buf = l.strip().split(' ')
    src, dst = int(buf[0]), int(buf[1])
    if src not in node_emb_dict:
        src = 27026
    if dst not in node_emb_dict:
        dst = 101
    fea = np.concatenate([node_emb_dict[src], node_emb_dict[dst]], axis=-1)
    
    test_X.append(fea)
test_X = np.vstack(test_X)
print X.shape, test_X.shape
print 'done'
    

(285789, 256) (88074, 256)
done


In [5]:
import numpy as np
batch_size = 128
def naive_bootsrap_generator(X, adj_mat, node_emb_dict, batch_size=128, neg_rate=1.):
    exist_node_list = node_emb_dict.keys()
    exist_N = len(exist_node_list)
    num_edge = X.shape[0]
        
    while True:
        idx = np.random.choice(num_edge, batch_size)
        pos_X = X[idx, :]
        
        neg_count = int(batch_size*neg_rate)
        neg_idx = np.random.randint(exist_N, size=[neg_count, 2])
        neg_X = []
        for i in range(neg_count):
            src, dst = neg_idx[i]
            src = exist_node_list[src]
            dst = exist_node_list[dst]
            if src != dst and adj_mat[src, dst] == 0:
                fea = np.concatenate([node_emb_dict[src], node_emb_dict[dst]], axis=-1)
                neg_X.append(fea)
        neg_X = np.vstack(neg_X)

        ret_X = np.vstack([pos_X, neg_X])
        ret_Y = np.zeros([ret_X.shape[0], 1])
        ret_Y[:batch_size, 0] = 1
        yield ret_X, ret_Y
N = X.shape[0]
idx = np.random.permutation(N)
train_idx = idx[N//10:]
val_idx = idx[:N//10]

train_X = X[train_idx,:]
val_X = X[val_idx,:]
G = naive_bootsrap_generator(train_X, adj_mat, node_emb_dict, batch_size=batch_size)
val_G = naive_bootsrap_generator(val_X, adj_mat, node_emb_dict, batch_size=batch_size, neg_rate=0.1)
x,y = next(G)
print x.shape,y.shape
x,y = next(val_G)
print x.shape,y.shape

(256, 256) (256, 1)
(140, 256) (140, 1)


In [3]:
import keras
from keras.models import *
from keras.layers import *

epochs = 100
def build_model():

    model = Sequential()
    model.add(Dense(256, activation='selu', input_shape=(256,)))
    model.add(Dense(256, activation='selu'))
    model.add(Dropout(0.2))
    model.add(Dense(128, activation='selu'))
    model.add(Dense(128, activation='selu'))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='selu'))
    model.add(Dense(64, activation='selu'))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='selu'))
    model.add(Dense(32, activation='selu'))
    model.add(Dropout(0.2))
    model.add(Dense(16, activation='selu'))
    model.add(Dense(16, activation='selu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adam(lr=0.001),
              metrics=['accuracy'])
    return model


Using TensorFlow backend.


In [None]:
x,y = next(G)
print x.shape, y.shape
np.random.seed(1337)
model = build_model()
model.fit(x,y)
ck = keras.callbacks.ModelCheckpoint('./weights.hdf5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
tfb = keras.callbacks.TensorBoard(log_dir='./logs')
model.fit_generator(G,
                    steps_per_epoch=train_X.shape[0]//batch_size,
                    epochs=1000, verbose=1,
                    validation_data=val_G,
                    validation_steps=val_X.shape[0]//batch_size,
                    callbacks=[ck,tfb]
                    )


(256, 256) (256, 1)
Epoch 1/1
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
  49/2009 [..............................] - ETA: 8s - loss: 0.0296 - acc: 0.9905

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [6]:
from keras.models import *
model = load_model('./weights.hdf5')
z = model.predict(test_X)
with open('pred.txt', 'w') as f:
    for i in range(z.shape[0]):
        p = z[i,0]
        ans = 1 if p >= 0.5 else 0
        f.write('%d\n' % ans)
pred_file = 'pred.txt'
with open(pred_file, 'r') as f, open(pred_file + '.csv', 'w') as g:
    g.write('query_id,prediction\n')
    for idx, line in enumerate(f):
        g.write('%d,%d\n' % (1 + idx, int(line)))


print 'done'
print z.shape

done
(88074, 1)


In [None]:
import tensorflow
import keras
print tensorflow.__version__
print keras.__version__

In [None]:
88074-38128
97232 - 88074