In [1]:
import copy
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("scores.csv")

In [3]:
data.columns.values.tolist()

['ES', 'LS', 'RX', 'A8', 'A6', '3series', '5series', '7series', 'XJ', 'Sclass']

In [41]:
class PageRank(object):
    '''A PageRank object to generate PR'''
    
    def __init__(self):
        self.nodes = []
        self.unweightedPref = {}
        self.weightedPref = {}
        self.unweightedPR = {}
        self.weightedPR = {}
        
    def  PrefGenerator(self, data):
        '''Convert dataframe into arrows between nodes'''
        self.nodes = data.columns.values.tolist()
        for node in self.nodes:
            self.unweightedPref[node] = {}
            self.weightedPref[node] = {}
            for other in list(set(self.nodes) - set([node])):
                pref = data[node] - data[other]
                self.unweightedPref[node][other] = max([1 if x>0 else 0 for x in pref])
                self.weightedPref[node][other] = sum([x for x in pref if x>0])
        return
    
    def traverseNodes(self):
        '''Traverse all nodes and calculate total outlinks for each node'''
        self.uwol = {} # Unweighted outlinks
        self.wol = {} # Weighted outlinks
        for node in self.nodes:
            for key in self.unweightedPref[node]:
                try:
                    self.uwol[key] += self.unweightedPref[node][key]
                except KeyError:
                    self.uwol[key] = self.unweightedPref[node][key]
            for key in self.weightedPref[node]:
                try:
                    self.wol[key] += self.weightedPref[node][key]
                except KeyError:
                    self.wol[key] = self.weightedPref[node][key]
        return
    
    def updatePR(self, PR, outlinks, Pref, d):
        '''update PageRank'''
        tmp = {}
        for node in self.nodes:
            tmp[node] = 1.0*(1-d)/len(self.nodes) + d*1.0*\
                        sum([PR[ref]*1.0/outlinks[ref]*Pref[node][ref] for ref in Pref[node]])
        return tmp
    
    def PRGenerator(self, d=0.85, maxiter=100, threshold=0.001):
        '''d: damping factor
        maxiter: max iteration #
        threshold: stable iterative fluctuation of PR
        Check out this guy's implementation:
        https://github.com/ashkonf/PageRank'''
        self.uwtmp = {} # Temporary variable to store iteration results
        self.wtmp = {}
        self.traverseNodes() # Calculate all outlinks
        for node in self.nodes: # Initialize all PR to 1/N
            self.unweightedPR[node] = 1.0/len(self.nodes)
            self.weightedPR[node] = 1.0/len(self.nodes)
            
        for i in range(maxiter):
            self.flag = 1 # A flag to end iteration
            
            self.uwtmp = self.updatePR(self.unweightedPR, self.uwol, self.unweightedPref, d)
            self.wtmp = self.updatePR(self.weightedPR, self.wol, self.weightedPref, d)
            for node in self.nodes:
                if abs(self.unweightedPR[node] - self.uwtmp[node]) > threshold or\
                abs(self.weightedPR[node] - self.wtmp[node]) > threshold:
                    self.flag = 0
            
            self.unweightedPR, self.weightedPR = copy.deepcopy(self.uwtmp), copy.deepcopy(self.wtmp)
            
            if self.flag == 1:
                print "Converge after {} iterations".format(i)
                return
        return

In [51]:
pr = PageRank()
pr.PrefGenerator(data)
for d in [0.5, 0.6, 0.7, 0.8, 0.85, 0.9]:
    pr.PRGenerator(d=d)
    print sorted(list(pr.unweightedPR), key=lambda tup: tup[1])

Converge after 4 iterations
['A6', 'A8', 'XJ', 'LS', 'ES', 'RX', 'Sclass', '7series', '3series', '5series']
Converge after 4 iterations
['A6', 'A8', 'XJ', 'LS', 'ES', 'RX', 'Sclass', '7series', '3series', '5series']
Converge after 5 iterations
['A6', 'A8', 'XJ', 'LS', 'ES', 'RX', 'Sclass', '7series', '3series', '5series']
Converge after 6 iterations
['A6', 'A8', 'XJ', 'LS', 'ES', 'RX', 'Sclass', '7series', '3series', '5series']
Converge after 6 iterations
['A6', 'A8', 'XJ', 'LS', 'ES', 'RX', 'Sclass', '7series', '3series', '5series']
Converge after 7 iterations
['A6', 'A8', 'XJ', 'LS', 'ES', 'RX', 'Sclass', '7series', '3series', '5series']


In [29]:
pr.weightedPR

{'3series': 0.044386607161187919,
 '5series': 0.045623193619579977,
 '7series': 0.087399252350042783,
 'A6': 0.03622986235614778,
 'A8': 0.12573224784687434,
 'ES': 0.089964034364456866,
 'LS': 0.25945691347773309,
 'RX': 0.073276107509468988,
 'Sclass': 0.18825079403210365,
 'XJ': 0.049680987282404693}

In [30]:
pr.unweightedPR(d=0.5)

{'3series': 0.0326093817031912,
 '5series': 0.03874052558542043,
 '7series': 0.08454635707782107,
 'A6': 0.04297395260677694,
 'A8': 0.13157923248266917,
 'ES': 0.08800100023483184,
 'LS': 0.26196827740415674,
 'RX': 0.07117502707701291,
 'Sclass': 0.19473014897981467,
 'XJ': 0.05367609684830506}