### printing precision

In [3]:
import numpy as np
np.set_printoptions(threshold=5)
print(np.arange(10))
np.set_printoptions(threshold=np.nan)
print(np.arange(10))

[0 1 2 ..., 7 8 9]
[0 1 2 3 4 5 6 7 8 9]


### A.dot(v) =  A*v if possible, else v*A(low priority)
右乘优先

In [14]:
import numpy as np
from scipy.sparse import csr_matrix
A = csr_matrix([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
v = np.array([[1, 1, 1], [0, 0, 0], [0, 0, 0]])
A.dot(v)

array([[1, 1, 1],
       [0, 0, 0],
       [4, 4, 4]], dtype=int64)

In [18]:
import numpy as np
from scipy.sparse import csr_matrix
A = csr_matrix([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
v = np.array([1, 1, 1])
A.dot(v)

array([3, 3, 9], dtype=int64)

In [19]:
import numpy as np
from scipy.sparse import csr_matrix
A = csr_matrix([[1, 2, 0], [0, 0, 3], [4, 0, 5]])
v = np.array([[1], [1], [1]])
A.dot(v)

array([[3],
       [3],
       [9]], dtype=int64)

### square spares M

In [15]:
import numpy 
from scipy.sparse import *
from scipy.sparse.linalg import inv
M=numpy.loadtxt('/Users/hanxinlei/Downloads/AdjacencyMatrix.txt',dtype=int)
#M=numpy.loadtxt('/Users/hanxinlei/tmp/adj_ijk.txt',dtype=int)
row=M[:,0]
col=M[:,1]
data=M[:,2]
#convert to SQUARE sparse matrix
m,n=row.max(),col.max()
row=numpy.r_[row,max(m,n)]
col=numpy.r_[col,max(m,n)]
data=numpy.r_[data,0]
M=coo_matrix((data,(row,col)),dtype=numpy.float64)
M=M.tocsc()

#init 1/Si 
s=numpy.array(M.sum(axis=0))
for i in range(len(s[0])):
    if s[0][i]!=0:
        for j in range(M.indptr[i],M.indptr[i+1]):
            M.data[j]/=s[0][i]
numpy.set_printoptions(threshold=numpy.nan)
print(M.shape)



(81434, 81434)


# generic PageRank using spares matrix CSC & COO

In [20]:
import numpy
from scipy.sparse import *

#readfile and convert to spares matrix
def get_Matrix_CSC(fname):
    M=numpy.loadtxt(fname,dtype=int)
    row=M[:,0]
    col=M[:,1]
    data=M[:,2]
    #convert to SQUARE sparse matrix
    m,n=row.max(),col.max()
    row=numpy.r_[row,max(m,n)]
    col=numpy.r_[col,max(m,n)]
    data=numpy.r_[data,0]
    M=coo_matrix((data,(row,col)),dtype=numpy.float64)
    M=M.tocsc()

    #init 1/Si 
    s=numpy.array(M.sum(axis=0))
    for i in range(len(s[0])):
        if s[0][i]!=0:
            for j in range(M.indptr[i],M.indptr[i+1]):
                M.data[j]/=s[0][i]
    return M

#whether exit iteration
def check_stability(current,last,threshold):
    diff=current-last
    for val in diff:
        if abs(val)>threshold:
            return False
    return True

#read M which is already initialized

def pagerank(M,maxn,beta=0.85,threshold=0.001):
    count=0
    r_last=r=numpy.ones((maxn,1))/maxn
    while True:
        count+=1
        r=beta*M.dot(r)+(1-beta)*1/maxn
        if(check_stability(r,r_last,threshold)):
            break
        r_last=r
    return r

#main()
fname='/Users/hanxinlei/Downloads/AdjacencyMatrix.txt'
#fname='/Users/hanxinlei/tmp/adj_ijk.txt'
M=get_Matrix_CSC(fname)
r=pagerank(M,M.shape[0],threshold=1.0e-04)
numpy.set_printoptions(threshold=numpy.nan)
print(r)


[[  1.84198246e-06]
 [  1.12524445e-04]
 [  1.84198246e-06]
 [  2.26099009e-06]
 [  2.09800233e-06]
 [  2.33607919e-06]
 [  2.23956013e-06]
 [  4.69256486e-06]
 [  2.26067291e-06]
 [  2.93734780e-06]
 [  1.87409636e-06]
 [  1.90040295e-06]
 [  1.84198246e-06]
 [  2.12859505e-06]
 [  2.56643945e-06]
 [  2.04937719e-06]
 [  1.86180126e-06]
 [  1.99493025e-06]
 [  3.17961362e-06]
 [  3.74760727e-06]
 [  2.71235552e-06]
 [  3.44262625e-06]
 [  2.05591584e-06]
 [  2.07297294e-06]
 [  2.12464458e-06]
 [  2.78764135e-06]
 [  2.20517321e-06]
 [  2.23116392e-06]
 [  3.07860132e-06]
 [  2.15885691e-06]
 [  2.75056615e-06]
 [  1.84198246e-06]
 [  1.85135782e-06]
 [  4.42993926e-06]
 [  3.19432674e-06]
 [  3.33944816e-06]
 [  4.28035952e-06]
 [  3.51066079e-06]
 [  2.84224156e-06]
 [  2.30649300e-06]
 [  3.40394690e-06]
 [  1.84198246e-06]
 [  2.58713495e-06]
 [  2.30649300e-06]
 [  4.77261747e-06]
 [  3.15114908e-06]
 [  6.29108272e-06]
 [  4.99134186e-06]
 [  1.86769800e-06]
 [  1.95426079e-06]
