In [None]:
!pip install networkx
!pip install numpy



In [None]:
import networkx as nx
import numpy as np
from sklearn import tree
from sklearn.metrics import accuracy_score,f1_score,confusion_matrix,classification_report
from keras.models import Sequential
from keras.layers import Dense

In [None]:
np.random.seed(5)

G = nx.karate_club_graph()
s = nx.to_numpy_matrix(G) # adjacency matrix of G
edge_list = []
for i in range(34):
    for j in range(34):
        if (i != j) & (j > i) :
            edge_list.extend([[i, j, s[i,j]]])
edge = np.asarray(edge_list) # convert list to 2-d array

In [None]:
G.edges

EdgeView([(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 10), (0, 11), (0, 12), (0, 13), (0, 17), (0, 19), (0, 21), (0, 31), (1, 2), (1, 3), (1, 7), (1, 13), (1, 17), (1, 19), (1, 21), (1, 30), (2, 3), (2, 7), (2, 8), (2, 9), (2, 13), (2, 27), (2, 28), (2, 32), (3, 7), (3, 12), (3, 13), (4, 6), (4, 10), (5, 6), (5, 10), (5, 16), (6, 16), (8, 30), (8, 32), (8, 33), (9, 33), (13, 33), (14, 32), (14, 33), (15, 32), (15, 33), (18, 32), (18, 33), (19, 33), (20, 32), (20, 33), (22, 32), (22, 33), (23, 25), (23, 27), (23, 29), (23, 32), (23, 33), (24, 25), (24, 27), (24, 31), (25, 31), (26, 29), (26, 33), (27, 33), (28, 31), (28, 33), (29, 32), (29, 33), (30, 32), (30, 33), (31, 32), (31, 33), (32, 33)])

In [None]:
np.shape(s)

(34, 34)

In [None]:
s

matrix([[0., 1., 1., ..., 1., 0., 0.],
        [1., 0., 1., ..., 0., 0., 0.],
        [1., 1., 0., ..., 0., 1., 0.],
        ...,
        [1., 0., 0., ..., 0., 1., 1.],
        [0., 0., 1., ..., 1., 0., 1.],
        [0., 0., 0., ..., 1., 1., 0.]])

In [None]:
np.shape(edge)

(561, 3)

In [None]:
a=np.array(edge)
a.shape

(561, 3)

In [None]:
edge

array([[ 0.,  1.,  1.],
       [ 0.,  2.,  1.],
       [ 0.,  3.,  1.],
       ...,
       [31., 32.,  1.],
       [31., 33.,  1.],
       [32., 33.,  1.]])

In [None]:
# calculate link prediction algorithms for every two nodes in G

pred1 = []
jc = nx.jaccard_coefficient(G, edge[:,:2])
for u, v, p in jc:
    pred1.extend([[u, v, p]])
link_pred1 = np.asarray(pred1)

pred2 = []
rai = nx.resource_allocation_index(G, edge[:,:2])
for u, v, p in rai:
    pred2.extend([[u, v, p]])
link_pred2 = np.asarray(pred2)

pred3 = []
aai = nx.adamic_adar_index(G, edge[:,:2])
for u, v, p in aai:
    pred3.extend([[u, v, p]])
link_pred3 = np.asarray(pred3)

pred4 = []
pa = nx.preferential_attachment(G, edge[:,:2])
for u, v, p in pa:
    pred4.extend([[u, v, p]])
link_pred4 = np.asarray(pred4)

In [None]:
# create features array
link_pred = np.concatenate((link_pred1,link_pred2, link_pred3, link_pred4, edge[:,2].reshape((561,1))),axis=1)
link_pred[:550]

array([[0.00000000e+00, 1.00000000e+00, 3.88888889e-01, ...,
        1.00000000e+00, 1.44000000e+02, 1.00000000e+00],
       [0.00000000e+00, 2.00000000e+00, 2.38095238e-01, ...,
        2.00000000e+00, 1.60000000e+02, 1.00000000e+00],
       [0.00000000e+00, 3.00000000e+00, 2.94117647e-01, ...,
        3.00000000e+00, 9.60000000e+01, 1.00000000e+00],
       ...,
       [2.80000000e+01, 3.00000000e+01, 1.66666667e-01, ...,
        3.00000000e+01, 1.20000000e+01, 0.00000000e+00],
       [2.80000000e+01, 3.10000000e+01, 1.25000000e-01, ...,
        3.10000000e+01, 1.80000000e+01, 1.00000000e+00],
       [2.80000000e+01, 3.20000000e+01, 2.50000000e-01, ...,
        3.20000000e+01, 3.60000000e+01, 0.00000000e+00]])

In [None]:
link_pred = np.delete(link_pred, [3,4,6,7,9,10], axis=1)
print(link_pred[0:3])

[[  0.           1.           0.38888889   2.05         6.13071687
  144.           1.        ]
 [  0.           2.           0.23809524   0.92777778   2.97724763
  160.           1.        ]
 [  0.           3.           0.29411765   1.16111111   3.67479159
   96.           1.        ]]


In [None]:
# create dataset: %50 -> 0 & %50 -> 1
c = -1
data_list = []
for i in range(561):
    if (link_pred[i,6] == 1):
        data_list.extend([[link_pred[i, 2],link_pred[i, 3], link_pred[i, 4], link_pred[i, 5], link_pred[i, 6]]])
    if (link_pred[i,6] == 0):
        c += 1
        # every 6 times that face with 0, put it to data_list:  (561-77)/6 = 80
        if (c % 6 == 0):
            data_list.extend([[link_pred[i, 2],link_pred[i, 3], link_pred[i, 4], link_pred[i, 5], link_pred[i, 6]]])

data = np.asarray(data_list)
np.random.shuffle(data)

In [None]:
# create train and test dataset
x_train, x_test, y_train, y_test = data[0:109,0:4], data[109:,0:4], data[0:109,4], data[109:,4]


**Neural Network**

In [None]:
# create model (ANN)
model = Sequential()
model.add(Dense(20, input_dim=4, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

Instructions for updating:
Colocations handled automatically by placer.


In [None]:
# Compile model
# sgd = keras.optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Fit the model
model.fit(x_train, y_train, epochs=150)

In [None]:
# Test the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test accuracy:', test_acc * 100)

Test accuracy: 85.99999928474426


In [None]:
# predict with model
predictions = model.predict(data[:,0:4])
rounded = np.asarray([round(x[0]) for x in predictions])

In [None]:
# evaluate model
true_p = 0
true_n = 0
false_p = 0
false_n = 0
for i in range(159):
    if (rounded[i] == data[i,4]) & (rounded[i] == 1):
        true_p += 1
    elif (rounded[i] == data[i,4]) & (rounded[i] == 0):
        true_n += 1
    elif (rounded[i] != data[i,4]) & (rounded[i] == 1):
        false_p += 1
    elif (rounded[i] != data[i,4]) & (rounded[i] == 0):
        false_n += 1

print('\nTrue Positive: ',true_p)
print('True Negative: ',true_n)
print('False Positive: ',false_p)
print('False Negative: ',false_n)
print('All: ', true_n + true_p + false_n + false_p)


True Positive:  66
True Negative:  59
False Positive:  22
False Negative:  12
All:  159


**Decision Tree**

In [None]:
from sklearn import tree
from sklearn.metrics import accuracy_score,f1_score,confusion_matrix,classification_report

In [None]:
from sklearn import tree
from sklearn.metrics import accuracy_score,f1_score,confusion_matrix,classification_report

In [None]:
def measure_performance(x_test,y_test,clf, show_accuracy=True, show_classification_report=True, show_confusion_matrix=True):
    y_pre=clf.predict(x_test)
    accuracy_score(list(y_test),list(y_pre))
    print(classification_report(list(y_test),list(y_pre)))
    classification_report(list(y_test),list(y_pre))

In [None]:
#y_pre=clf.predict(x_test)

In [None]:
accuracy_score(list(y_test),list(y_pre))

In [None]:
# evaluate model
true_p = 0
true_n = 0
false_p = 0
false_n = 0
for i in range(159):
    if (rounded[i] == data[i,4]) & (rounded[i] == 1):
        true_p += 1
    elif (rounded[i] == data[i,4]) & (rounded[i] == 0):
        true_n += 1
    elif (rounded[i] != data[i,4]) & (rounded[i] == 1):
        false_p += 1
    elif (rounded[i] != data[i,4]) & (rounded[i] == 0):
        false_n += 1

print('\nTrue Positive: ',true_p)
print('True Negative: ',true_n)
print('False Positive: ',false_p)
print('False Negative: ',false_n)
print('All: ', true_n + true_p + false_n + false_p)


True Positive:  66
True Negative:  60
False Positive:  21
False Negative:  12
All:  159
