In [38]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity as cos
import scipy.sparse as sp
from scipy.linalg import fractional_matrix_power, inv


def knn(feat, num_node, k, data_name, view_name):
    adj = np.zeros((num_node, num_node), dtype=np.int64)
    dist = cos(feat)
    col = np.argpartition(dist, -(k + 1), axis=1)[:, -(k + 1):].flatten()
    adj[np.arange(num_node).repeat(k + 1), col] = 1  
    adj = sp.coo_matrix(adj)
    sp.save_npz("./"+data_name+"/"+view_name+"_knn.npz", adj)


def adj(adj, data_name, view_name):
    adj = sp.coo_matrix(adj)
    sp.save_npz("./"+data_name+"/"+view_name+"_adj.npz", adj)


def diff(adj, alpha, data_name, view_name):   
    d = np.diag(np.sum(adj, 1))                                    
    dinv = fractional_matrix_power(d, -0.5)                       
    at = np.matmul(np.matmul(dinv, adj), dinv)                      
    adj = alpha * inv((np.eye(adj.shape[0]) - (1 - alpha) * at))   
    adj = sp.coo_matrix(adj)
    sp.save_npz("./"+data_name+"/"+view_name+"_diff.npz", adj)

data_name = "citeseer"
view_name = "v2"  # v1 or v2
view_type = "knn"  # knn adj diff

adj = sp.load_npz("./"+data_name+"/ori_adj.npz")####
print(adj)
adj.toarray()
num_node = adj.shape[0]
feat = sp.load_npz("./"+data_name+"/feat.npz")
feat

a = adj.A

# if a[0, 0] == 0:
#     a += np.eye(num_node)
#     print("self-loop!")
adj = a
view_type = "knn" 
if view_type == "knn":  # set k
    knn(feat, num_node, 5, data_name, view_name)
elif view_type == "adj":
    adj(adj, data_name, view_name)
elif view_type == "diff":  # set alpha: 0~1
    diff(adj, alpha, data_name, view_name)



  (0, 628)	1
  (1, 158)	1
  (1, 486)	1
  (1, 1097)	1
  (1, 2919)	1
  (1, 2933)	1
  (2, 3285)	1
  (3, 1431)	1
  (3, 3219)	1
  (4, 467)	1
  (5, 648)	1
  (6, 1501)	1
  (7, 1833)	1
  (7, 2137)	1
  (8, 178)	1
  (8, 1033)	1
  (9, 1007)	1
  (10, 1670)	1
  (10, 2622)	1
  (11, 2034)	1
  (12, 113)	1
  (12, 557)	1
  (12, 677)	1
  (12, 794)	1
  (12, 839)	1
  :	:
  (3307, 1969)	1
  (3308, 19)	1
  (3308, 1451)	1
  (3308, 2824)	1
  (3309, 41)	1
  (3310, 116)	1
  (3311, 717)	1
  (3312, 1981)	1
  (3312, 2022)	1
  (3313, 1974)	1
  (3314, 998)	1
  (3315, 645)	1
  (3316, 1181)	1
  (3317, 2911)	1
  (3318, 83)	1
  (3319, 3320)	1
  (3320, 3319)	1
  (3321, 1750)	1
  (3322, 3323)	1
  (3323, 3322)	1
  (3324, 131)	1
  (3324, 268)	1
  (3324, 2820)	1
  (3325, 1643)	1
  (3326, 33)	1


In [18]:
import torch

def get_khop_indices(k, view):
    view = (view.A > 0).astype("int32")
    view_ = view
    for i in range(1, k):
        view_ = (np.matmul(view_, view.T)>0).astype("int32")
    view_ = torch.tensor(view_).to_sparse()
    return view_.indices()
    
def topk(k, adj):
    pos = np.zeros(adj.shape)
    adj=adj.A
    print(adj)

    for i in range(len(adj)):
      one = adj[i].nonzero()[0]
      if len(one)>k:
        oo = np.argsort(-adj[i, one])
        sele = one[oo[:k]]
        pos[i, sele] = adj[i, sele]
      else:
        pos[i, one] = adj[i, one]
    return pos

#####################
## get k-hop scope ##
## take citeseer   ##
#####################
adj = sp.load_npz("./citeseer/v1_adj.npz")
indice = get_khop_indices(2, adj)
torch.save(indice, "./citeseer/v1_2.pt")

#####################
## get top-k scope ##
## take citeseer   ##
#####################
adj = sp.load_npz("./citeseer/v2_diff.npz")
kn = topk(40, adj)
kn = sp.coo_matrix(kn)
indice = get_khop_indices(1, kn)
torch.save(indice, "./citeseer/v2_40.pt")


[[5.99999881e-01 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 2.74383004e-01 0.00000000e+00 ... 4.28966787e-05
  0.00000000e+00 1.11539050e-05]
 [0.00000000e+00 0.00000000e+00 5.99999881e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [0.00000000e+00 4.28966787e-05 0.00000000e+00 ... 3.04965500e-01
  0.00000000e+00 3.30291619e-07]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  5.99999881e-01 0.00000000e+00]
 [0.00000000e+00 1.11539050e-05 0.00000000e+00 ... 3.30291619e-07
  0.00000000e+00 3.81772145e-01]]


In [25]:
feature=sp.load_npz("./citeseer/feat.npz")

In [26]:
feature = feature.todense()
feature

matrix([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [36]:
len((np.load("./citeseer/label.npy")))

3327

In [37]:
np.load("./citeseer/train.npy"),np.load("./citeseer/test.npy")

(array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
         26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
         39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
         52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
         78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
         91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
        104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
        117, 118, 119]),
 array([2312, 2313, 2314, 2315, 2316, 2317, 2318, 2319, 2320, 2321, 2322,
        2323, 2324, 2325, 2326, 2327, 2328, 2329, 2330, 2331, 2332, 2333,
        2334, 2335, 2336, 2337, 2338, 2339, 2340, 2341, 2342, 2343, 2344,
        2345, 2346, 2347, 2348, 2349, 2350, 2351, 2352, 2353, 2354, 2355,
        2356, 2357, 23

In [32]:
sp.load_npz("./citeseer/v2_knn.npz")     

<3327x3327 sparse matrix of type '<class 'numpy.int64'>'
	with 19962 stored elements in COOrdinate format>

In [33]:
torch.load("./citeseer/v1_2.pt")

tensor([[   0,    0,    1,  ..., 3326, 3326, 3326],
        [   0,  628,    1,  ..., 3193, 3197, 3326]])