In [1]:
import numpy as np
import pregel
import pandas as pd

In [2]:
# Read the Excel file
payments_df = pd.read_excel('Payments.xlsx')
bad_nodes_df = pd.read_excel('bad_sender.xlsx')


# Extract columns into separate lists
sender_list = payments_df['Sender'].astype(int).tolist()
receiver_list = payments_df['Receiver'].astype(int).tolist()
amount_list = payments_df['Amount'].astype(int).tolist()
bad_sender_list = bad_nodes_df['Bad Sender'].astype(int).tolist()

In [3]:
nodes_list = list(set(sender_list + receiver_list + bad_sender_list))
outgoing_edges = {}
# Replaced multiple directed edges from a node to another anoder node with single directed edge with amounts added
for sender, receiver, amount in zip(sender_list, receiver_list, amount_list):
    if sender not in outgoing_edges:
        outgoing_edges[sender] = []
    found = False
    for i, (recv, amt) in enumerate(outgoing_edges[sender]):
        if recv == receiver:
            outgoing_edges[sender][i] = (recv, amt + amount)
            found = True
            break
    if not found:
        outgoing_edges[sender].append((receiver, amount))
        
len(bad_sender_list)

20

In [4]:
good_senders_num = len(nodes_list) - len(bad_sender_list)
good_senders_num

779

In [5]:
class TrustRankVertex(pregel.Vertex):
    
    def __init__(self, vertex_id, bad_sender_list, outgoing_edges, good_senders_num, damping_factor=0.85, max_iterations=50):
        
        out_vertices = []
        if vertex_id in outgoing_edges:
            out_vertices = outgoing_edges[vertex_id]
        
        # print(len(out_vertices))
        super().__init__(vertex_id, 0, list(out_vertices))

        self.damping_factor = damping_factor
        self.max_iterations = max_iterations
        if vertex_id in outgoing_edges:
            self.outgoing_edges = outgoing_edges[vertex_id]
        else:
            self.outgoing_edges = []

        if vertex_id in bad_sender_list:
            self.bad_node = True
            self.value = 0
        else:
            self.bad_node = False
            self.value = 1/good_senders_num

        self.out_deg = 0

        self.out_deg = sum(amount for _, amount in out_vertices)
            
        self.const_value = self.value

    def update(self):
        if self.superstep < self.max_iterations:
            
            self.value = (1-self.damping_factor) * self.const_value + self.damping_factor* sum([trustrank for (vertex,trustrank) in self.incoming_messages])
            if self.out_deg != 0:
                # self.outgoing_messages = [(id, self.value * amount/ max(1, self.out_deg)) for id, amount in self.outgoing_edges]
                
                self.outgoing_messages = [(id, self.value * amount/ self.out_deg) for id, amount in self.out_vertices]
            else:
                self.outgoing_messages = []
        else:
            # stop after 50 supersteps
            self.active = False

In [6]:
vertices = []

for node_id in nodes_list:
    vertices.append(TrustRankVertex(node_id, bad_sender_list, outgoing_edges, good_senders_num, 0.85, 50 ))




In [7]:
Pregel = pregel.Pregel(vertices, 4)
Pregel.run()
for vertex in Pregel.vertices:
    print(vertex.id, vertex.value)

2050 0.00019255455712451864
2052 0.00019255455712451864
2053 0.00019255455712451864
2054 0.00019255455712451864
2057 0.00019255455712451864
2060 0.00019255455712451864
2061 0.00019255455712451864
2062 0.00019255455712451864
2066 0.00019255455712451864
2071 0.00019255455712451864
2072 0.00019255455712451864
2073 0.00019255455712451864
2075 0.00019255455712451864
2076 0.00019255455712451864
2078 0.00019255455712451864
2080 0.00019255455712451864
2081 0.00019255455712451864
2084 0.00019255455712451864
2085 0.00019255455712451864
2088 0.00019255455712451864
2089 0.00019255455712451864
2090 0.00019255455712451864
2093 0.00019255455712451864
2097 0.00019255455712451864
2100 0.00019255455712451864
2101 0.00019255455712451864
2104 0.00019255455712451864
2107 0.00019255455712451864
2108 0.00019255455712451864
2109 0.00019255455712451864
2111 0.00019255455712451864
2124 0.00019255455712451864
2126 0.00019255455712451864
2127 0.00019255455712451864
2136 0.00019255455712451864
2137 0.0001925545571