In [2]:
import numpy as np
from scipy import linalg as la
import pandas as pd

In [3]:
def problem_1(A,k,Ad):
    cond1 = np.allclose(np.dot(A,Ad),np.dot(Ad,A))
    cond2 = np.allclose(np.dot(np.linalg.matrix_power(A,k+1),Ad),np.linalg.matrix_power(A,k))
    cond3 = np.allclose(np.dot(np.dot(Ad,A),Ad),Ad)
    if cond1 and cond2 and cond3:
        return True
    else:
        return False

In [4]:
def problem_2(A,tol=1e-7):
    n,n=A.shape
    f = lambda x: abs(x)>tol
    g = lambda x: abs(x)<=tol
    Q1,S,k1 = la.schur(A,sort=f)
    Q2,T,k2 = la.schur(A,sort=g)
    U = np.hstack((S[:,:k1],T[:,:n-k1]))
    U1 = np.linalg.inv(U)
    V = np.dot(np.dot(U1,A),U)
    Z = np.zeros((n,n),dtype=float)
    if k1!=0:
        M1 = np.linalg.inv(V[:k1,:k1])
        Z[:k1,:k1] = M1
    return np.dot(np.dot(U,Z),U1)

In [5]:
# Following function helps with testing, finds index of matrix
def index(A, tol=1e-5):
    """Compute the index of the matrix A. Used for testing

    Parameters:
        A ((n,n) ndarray): An nxn matrix.

    Returns:
        k (int): The index of A.
    """

    # test for non-singularity
    if not np.allclose(la.det(A),0):
        return 0

    n = len(A)
    k = 1
    Ak = A.copy()
    while k <= n:
        r1 = np.linalg.matrix_rank(Ak)
        r2 = np.linalg.matrix_rank(np.dot(A,Ak))
        if r1 == r2:
            return k
        Ak = np.dot(A,Ak)
        k += 1

    return k

In [6]:
# Testing the Drazin Inverse
A = np.random.random((6,6))
k = index(A)
Ad = problem_2(A)
print(problem_1(A,k,Ad))

True


In [7]:
def problem_3(A):
    n,n=A.shape
    L = np.diag(np.sum(A,axis=0)) - A # Laplacian matrix
    I = np.eye(n,dtype=float)
    R = np.empty_like(A,dtype=float)
    for j in range(n):
        Lj = np.empty_like(L)
        Lj = np.copy(L)
        Lj[j,:] = I[j,:]
        Ljd = problem_2(Lj)
        for i in range(n):
            if i == j:
                R[i,j] = 0
            else:
                R[i,j] = Ljd[i,i]
    return R

In [8]:
A=np.array([[0, 4],[4,0]])
B=np.array([[0,1,1],[1,0,1],[1,1,0]])
C=np.array([[0,1,0,0],[1,0,1,0],[0,1,0,1],[0,0,1,0]])
print(problem_3(A)[0,1]==0.25)
print(problem_3(B)[0,1]==2/3)
print(problem_3(C)[0,3]-3.0<1e-10)

True
True
True


In [133]:
# Problem 4 and 5
class LinkPredictor(object):
    def __init__(self,filename):
        data = pd.read_csv(filename,header=None,index_col=False).astype(str)
        mat_data = data.as_matrix()
        m,n = mat_data.shape
        names, inv = np.unique(mat_data, return_inverse=True)
        inv = inv.reshape((m,n))
        
        A = np.zeros([len(names),len(names)],dtype=float)
        for i in range(m):
            A[inv[i,0],inv[i,1]] = 1
            A[inv[i,1],inv[i,0]] = 1
        R = problem_3(A)
        
        self.names = names
        self.adjacency = A
        self.resistance = R
    
    def predict_link(self,node=None):
        resis = self.resistance
        resis[resis==0] = 1e10
        
        if node!=None:
            if np.any(self.names[self.names==node]):
                index = np.where(self.names==node)[0][0]
                resis = resis[index,:]
                mask = self.adjacency[index,:]!=1
                min_val = np.min(resis[mask])
                loc = np.where(resis==min_val)
                return self.names[loc[0]]
            else:
                raise ValueError("Given name doesn't exist in network.")
        else:
            mask = self.adjacency!=1
            minval = np.min(resis[mask])
            loc1,loc2 = np.where(resis==minval)
            return (self.names[loc1[0]],self.names[loc2[0]])
        
    def add_link(self,name1,name2):
        if np.any(self.names[self.names==name1]) and np.any(self.names[self.names==name2]):
            loc1 = np.where(self.names==name1)[0][0]
            loc2 = np.where(self.names==name2)[0][0]
            self.adjacency[loc1,loc2] = self.adjacency[loc1,loc2]+1
            self.adjacency[loc2,loc1] = self.adjacency[loc2,loc1]+1
            self.resistance = problem_3(self.adjacency)
        else:
            raise ValueError("Given name doesn't exist in network.")

In [134]:
social = LinkPredictor("/Users/rubyzhang/Desktop/UChicago/OSML/BootCamp2017/Computation/Wk3_Decomp/social_network.csv")
print(social.predict_link()==('Oliver','Emily'))
print(social.predict_link('Melanie')==['Carol'])
print(social.predict_link('Alan')==['Sonia'])
social.add_link('Alan','Sonia')
print(social.predict_link('Alan')==['Piers'])
social.add_link('Alan','Piers')
print(social.predict_link('Alan')==['Abigail'])

True
[ True]
[ True]
[ True]
[ True]
