## PageRank 实验

### Problem 1

In [1]:
import numpy as np
def to_matrix(filename, n):
    """Return the n x n adjacency matrix described by datafile.

    Parameters:
        datafile (str): The name of a .txt file describing a directed graph.
        Lines describing edges should have the form '<from node>\t<to node>\n'.
        The file may also include comments.
        n (int): The number of nodes in the graph described by datafile

    Returns:
        An adjacency matrix (ndarray).
    """
    with open(filename, 'r') as f:
        martix = np.zeros((n,n))
        for line in f:
            line = line.strip().split()
            martix[int(line[0])][int(line[1])] = 1
    return martix

to_matrix('./matrix.txt',8)
        

array([[0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 1., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 1., 1., 0.],
       [1., 0., 0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.]])

### Problem 2

In [2]:
A = np.array([[ 0, 0, 0, 0, 0, 0, 0, 1],
[ 1, 0, 0, 0, 0, 0, 0, 0],
[ 1, 1, 1, 1, 1, 1, 1, 1],
[ 1, 0, 1, 0, 0, 0, 1, 0],
[ 1, 0, 0, 0, 0, 1, 1, 0],
[ 1, 0, 0, 0, 0, 0, 1, 0],
[ 1, 0, 0, 0, 0, 0, 0, 0],
[ 1, 0, 0, 0, 0, 0, 0, 0]])
def calculateK(A,N):
    """Compute the matrix K as described in the lab.

    Parameters:
        A (ndarray): adjacency matrix of a gragh
        N (int): the number of nodes in the graph

    Returns:
        K (ndarray)
    """
    D = np.zeros((N, N))
    K = np.zeros((N, N))
    for n in range(N):
        D[n][n] = round(1/(A[n][0]+A[n][1]+A[n][2]+A[n][3]+A[n][4]+A[n][5]+A[n][6]+A[n][7]),3)
    K = np.dot(D,A)
    return K.T
calculateK(A, len(A))


array([[0.   , 1.   , 0.125, 0.333, 0.333, 0.5  , 1.   , 1.   ],
       [0.   , 0.   , 0.125, 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.125, 0.333, 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.125, 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.125, 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.125, 0.   , 0.333, 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.125, 0.333, 0.333, 0.5  , 0.   , 0.   ],
       [1.   , 0.   , 0.125, 0.   , 0.   , 0.   , 0.   , 0.   ]])

### Problem 3

In [64]:
def iter_solve(adj, max_iter, d =0.85, tol = 1e-5):
    """Return the page ranks of the network described by 'adj'.
    Iterate through the PageRank algorithm until the error is less than 'tol'.

    Parameters:
        adj (ndarray): The adjacency matrix of a directed graph.
        max_iter (int): the Maximum number of iterations.
        d (float): The damping factor, a float between 0 and 1.
        tol (float): Stop iterating when the change in approximations to
        the solution is less than 'tol'.

    Returns:
        The approximation to the steady state.
    """
    R0 = np.ones((len(adj), 1))*(1 / len(adj))
    Rt = np.add(d * np.dot(adj, R0), ((1 - d) / len(adj)) * np.ones((len(adj), 1)))
    for i in range(max_iter):
        if abs(R0[0] - Rt[0]) < tol:
            break;
        Rt = np.add(d * np.dot(adj, R0) ,((1 - d) / len(adj)) * np.ones((len(adj), 1)))
        R0 = Rt 

    return Rt

print(iter_solve(A, 100))
print(iter_solve(to_matrix('./matrix.txt',8), 100))

[[0.125  ]
 [0.125  ]
 [0.86875]
 [0.3375 ]
 [0.3375 ]
 [0.23125]
 [0.125  ]
 [0.125  ]]
[[0.125  ]
 [0.125  ]
 [0.01875]
 [0.3375 ]
 [0.3375 ]
 [0.23125]
 [0.125  ]
 [0.125  ]]


### Problem 4

In [65]:
def team_rank(filename='ncaa2013.csv'):
    """Use iter_solve() to predict the rankings of the teams in the given
    dataset of games. The dataset should have two columns, representing
    winning and losing teams. Each row represents a game, with the winner on
    the left, loser on the right. Parse this data to create the adjacency
    matrix, and feed this into the solver to predict the team ranks.

    Parameters:
        filename (str): The name of the data file.
    Returns:
        ranks (list): The ranks of the teams from best to worst.
        teams (list): The names of the teams, also from best to worst.
    """
    with open(filename, 'r') as f:
        f.readline()
        team = []
        for line in f:
            teams = line.strip().split(',') 
            if teams[1] not in team:
                team.append(teams[1])    #将所有队名都提取出来
    score = np.zeros((len(team),len(team)))  #新建一个相邻矩阵
    
    with open(filename, 'r') as f:
        f.readline()
        for line in f:
                teams = line.strip().split(',') 
                score[team.index(teams[0])][team.index(teams[1])] = 1 #获胜的队伍和失败队伍的下标的值为1
    y = -iter_solve(score, 100, d = 0.7)
    print("--------------------best to worst-----------------")
    for i in range(len(y.argsort(0))):
        print(team[int(y.argsort(0)[i])])
team_rank()

--------------------best to worst-----------------
Louisville
VA Commonwealth
Syracuse
Butler
Duke
St Louis
Indiana
Mississippi
Pittsburgh
Missouri
Florida
Memphis
Notre Dame
Temple
Charlotte
Ohio St
Georgetown
Michigan
Kansas
Cincinnati
Michigan St
Arizona
Minnesota
Creighton
Akron
New Mexico
Massachusetts
Marquette
Oregon
UCLA
La Salle
Wichita St
Gonzaga
NC State
Arizona St
Virginia
Robert Morris
Bucknell
UNLV
Oklahoma St
Ohio
Miami FL
Kentucky
Illinois
Southern Miss
Wisconsin
Maryland
Middle Tenn St
Connecticut
Richmond
Albany NY
North Carolina
Col Charleston
Iowa
Iowa St
St Mary's CA
Kansas St
Murray St
Valparaiso
Belmont
Arkansas
Tennessee
E Kentucky
Colorado St
Colorado
Louisiana Tech
Xavier
Villanova
Bryant
Texas A&M
LSU
San Diego St
Illinois St
UCF
Loyola MD
Dayton
Alabama
Boise St
Stony Brook
California
Oklahoma
NC A&T
Tulane
Canisius
Norfolk St
Kent
BYU
St Joseph's PA
Houston
Davidson
Florida St
Iona
Indiana St
Evansville
St John's
New Mexico St
Vermont
Mt St Mary's
Bradley
W