In [1]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import networkx as nx
import numpy as np
import pandas as pd
import math
import random
from collections import Counter
import plotly.graph_objects as go

In [3]:
def initialize_scores(G):
  fairness = {}
  goodness = {}

  nodes = G.nodes()
  for node in nodes:
    fairness[node] = 1
    try:
      goodness[node] = G.in_degree(node, weight='weight')*1.0/G.in_degree(node)
    except:
      goodness[node] = 0
  return fairness, goodness
  
def compute_fairness_goodness(G):
  fairness, goodness = initialize_scores(G)

  nodes = G.nodes()
  iter = 0
  while iter < 100:
    df = 0
    dg = 0

    print('--------------------')
    print('Iteration number', iter)
    
    print('Updating goodness')
    for node in nodes:
      inedges = G.in_edges(node, data='weight')
      g = 0
      for edge in inedges:
        g += fairness[edge[0]] * edge[2]

      try:
        dg += abs(g/len(inedges) - goodness[node])
        goodness[node] = g/len(inedges)
      except:
        pass
    
    print('Updating fairness')
    for node in nodes:
      outedges = G.out_edges(node, data='weight')
      f = 0
      for edge in outedges:
        f += 1.0 - abs(edge[2] - goodness[edge[1]])/2.0
      try:
        df += abs(f/len(outedges) - fairness[node])
        fairness[node] = f/len(outedges)
      except:
        pass
      
    print(f'Differences in fairness score = {df} and goodness score = {dg}')
    if df < math.pow(10, -6) and dg < math.pow(10, -6):
      break
    iter += 1

  return fairness, goodness

In [4]:
def read_graph(filename):
    G = nx.DiGraph()
    f = open(f'/content/gdrive/MyDrive/{filename}', "r")
    for l in f:
        ls = l.strip().split(",")
        G.add_edge(ls[0], ls[1], weight = float(ls[2])/10.0) # the weight should already be in the range of -1 to 1
    f.close()
    return G

In [7]:
def plot_fairness_distribution(fairness):
    fairness_values = list(fairness.values())
    x = range(0, 20)
    x = [i/20 for i in x]
    frac = []
    for i, r in enumerate(x):
        min = r
        max = r + 2/len(x)
        frac.append(0)
        for value in fairness_values:
            if value >= min and value < max:
                frac[-1] += 1

    frac = [value/sum(frac) for value in frac]

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x, y=frac, line=dict(color='rosybrown', width=4)))
    fig.update_layout(title='Fairness distribution',
                      xaxis_title='Fairness score',
                      yaxis_title='Fraction of nodes with Fairness f')
    fig.show()

In [15]:
def plot_goodness_distribution(goodness):
    goodness_values = list(goodness.values())
    x = range(-20, 20)
    x = [i/20 for i in x]
    frac = []
    for i, r in enumerate(x):
        min = r
        max = r + 2/len(x)
        frac.append(0)
        for value in goodness_values:
            if value >= min and value < max:
                frac[-1] += 1
    
    frac = [value/sum(frac) for value in frac]

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x, y=frac, line=dict(color='rosybrown', width=4)))
    fig.update_layout(title='Goodness distribution',
                      xaxis_title='Goodness score',
                      yaxis_title='Fraction of nodes with Goodness g')
    fig.show()

**Bitcoin OTC**

In [5]:
G_otc = read_graph('soc-sign-bitcoinotc.csv')
# these two dictionaries have the required scores
fairness_otc, goodness_otc = compute_fairness_goodness(G_otc)

--------------------
Iteration number 0
Updating goodness
Updating fairness
Differences in fairness score = 382.2132311869623 and goodness score = 0.0
--------------------
Iteration number 1
Updating goodness
Updating fairness
Differences in fairness score = 34.465839234315546 and goodness score = 102.29167364039796
--------------------
Iteration number 2
Updating goodness
Updating fairness
Differences in fairness score = 2.390951436243564 and goodness score = 6.965249258409005
--------------------
Iteration number 3
Updating goodness
Updating fairness
Differences in fairness score = 0.3894800149026927 and goodness score = 1.0599943633431452
--------------------
Iteration number 4
Updating goodness
Updating fairness
Differences in fairness score = 0.09640950547087862 and goodness score = 0.23918064834894587
--------------------
Iteration number 5
Updating goodness
Updating fairness
Differences in fairness score = 0.027072632207669256 and goodness score = 0.06264694547009621
-----------

In [6]:
num_fraudulent_users = 0
num_honest_users = 0
user_labels_otc = {}
for (k, v) in goodness_otc.items():
  if v > 0:
    user_labels_otc[k] = 0
    num_honest_users += 1
  else:
    user_labels_otc[k] = 1
    num_fraudulent_users += 1
print(f'Honest users: {num_honest_users}')
print(f'Fraudulent users: {num_fraudulent_users}')

Honest users: 5030
Fraudulent users: 851


In [16]:
plot_fairness_distribution(fairness_otc)
plot_goodness_distribution(goodness_otc)

**Bitcoin Alpha**

In [10]:
G_alpha = read_graph('soc-sign-bitcoinalpha.csv')
# these two dictionaries have the required scores
fairness_alpha, goodness_alpha = compute_fairness_goodness(G_alpha)

--------------------
Iteration number 0
Updating goodness
Updating fairness
Differences in fairness score = 222.7880495905861 and goodness score = 0.0
--------------------
Iteration number 1
Updating goodness
Updating fairness
Differences in fairness score = 19.262035480152207 and goodness score = 52.77075812205254
--------------------
Iteration number 2
Updating goodness
Updating fairness
Differences in fairness score = 0.899897124291851 and goodness score = 2.7312110400250127
--------------------
Iteration number 3
Updating goodness
Updating fairness
Differences in fairness score = 0.12091092477087845 and goodness score = 0.3060354601267039
--------------------
Iteration number 4
Updating goodness
Updating fairness
Differences in fairness score = 0.023750526935523875 and goodness score = 0.05200211343477482
--------------------
Iteration number 5
Updating goodness
Updating fairness
Differences in fairness score = 0.005540735795811713 and goodness score = 0.010606993766081516
--------

In [11]:
num_fraudulent_users = 0
num_honest_users = 0
user_labels_alpha = {}
for (k, v) in goodness_alpha.items():
  if v > 0:
    user_labels_alpha[k] = 0
    num_honest_users += 1
  else:
    user_labels_alpha[k] = 1
    num_fraudulent_users += 1
print(f'Honest users: {num_honest_users}')
print(f'Fraudulent users: {num_fraudulent_users}')

Honest users: 3469
Fraudulent users: 314


In [17]:
plot_fairness_distribution(fairness_alpha)
plot_goodness_distribution(goodness_alpha)