In [29]:
import pandas as pd
import networkx as nx
import operator
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import math
%matplotlib inline



In [2]:
# Weighted Signed Directed Bitcoin Alpha web of trust network
data = pd.read_csv("/Users/MicrostrRes/Downloads/soc-sign-bitcoinalpha 2.csv", header = 0) 

In [3]:
# SOURCE: node id of source, i.e., rater
# TARGET: node id of target, i.e., ratee
# RATING: the source's rating for the target, ranging from -10 to +10 in steps of 1
# TIME: the time of the rating, measured as seconds since Epoch. 
data.columns = ["RATER", "RATEE", "RATING", "TIME"]

In [4]:
data2 = data.drop(["TIME"], axis = 1)
data2.columns = ["RATER", "RATEE", "TRUSTWORTHINESS"]

In [5]:
data2.head()


Unnamed: 0,RATER,RATEE,TRUSTWORTHINESS
0,430,1,10
1,3134,1,10
2,3026,1,10
3,3010,1,10
4,804,1,10


In [6]:
# Generate graph using networkx
g = nx.from_pandas_dataframe(data2, 'RATER', 'RATEE', 'TRUSTWORTHINESS')



In [7]:
# Calculate number of nodes of the graph
nx.number_of_nodes(g)



3782

In [8]:
# Calculate number of edges of the graph
nx.number_of_edges(g)

14123

In [9]:
# Compute degree of nodes in graph
deg = nx.degree(g) 
deg_sorted = sorted(deg.items(),key = operator.itemgetter(1), reverse = True)
deg_sorted

[(1, 510),
 (8, 279),
 (3, 263),
 (2, 239),
 (11, 238),
 (7, 234),
 (177, 229),
 (4, 221),
 (15, 213),
 (5, 190),
 (10, 181),
 (26, 166),
 (9, 163),
 (6, 158),
 (22, 153),
 (13, 142),
 (33, 139),
 (58, 133),
 (95, 133),
 (12, 132),
 (30, 130),
 (16, 121),
 (17, 117),
 (24, 113),
 (25, 111),
 (43, 109),
 (19, 108),
 (7603, 108),
 (14, 104),
 (21, 103),
 (85, 102),
 (145, 100),
 (7564, 96),
 (129, 94),
 (2336, 93),
 (35, 92),
 (5342, 91),
 (27, 88),
 (29, 87),
 (40, 87),
 (42, 86),
 (51, 85),
 (7604, 85),
 (798, 83),
 (45, 82),
 (18, 81),
 (36, 81),
 (38, 81),
 (32, 78),
 (23, 74),
 (31, 74),
 (34, 74),
 (69, 74),
 (125, 72),
 (41, 71),
 (79, 69),
 (491, 69),
 (49, 68),
 (87, 68),
 (20, 66),
 (65, 65),
 (50, 64),
 (52, 64),
 (104, 64),
 (48, 63),
 (67, 63),
 (39, 62),
 (46, 62),
 (73, 62),
 (188, 62),
 (57, 61),
 (124, 61),
 (114, 60),
 (166, 60),
 (64, 59),
 (70, 59),
 (88, 59),
 (7595, 59),
 (37, 58),
 (117, 58),
 (90, 56),
 (28, 55),
 (44, 55),
 (78, 55),
 (92, 55),
 (272, 55),
 (75, 

In [10]:
# Summarize trust variable 
data2["TRUSTWORTHINESS"].describe()



count    24185.000000
mean         1.463593
std          2.903198
min        -10.000000
25%          1.000000
50%          1.000000
75%          2.000000
max         10.000000
Name: TRUSTWORTHINESS, dtype: float64

In [11]:
# Summarize rater variable
data2["RATER"].describe()

count    24185.000000
mean       863.767831
std       1591.599114
min          1.000000
25%         58.000000
50%        238.000000
75%        898.000000
max       7604.000000
Name: RATER, dtype: float64

In [12]:
# Summarize ratee variable
data2["RATEE"].describe()

count    24185.000000
mean      1051.137234
std       1871.792021
min          1.000000
25%         66.000000
50%        279.000000
75%       1068.000000
max       7604.000000
Name: RATEE, dtype: float64

In [13]:
# Eigen centrality
eigen = nx.eigenvector_centrality(g)
eigen_sorted = sorted(eigen.items(),key = operator.itemgetter(1), reverse = True)

In [37]:
eigen_sorted

[(11, 0.1932449711562966),
 (2, 0.1739879157652673),
 (177, 0.166275082140266),
 (3, 0.1586413153253278),
 (7, 0.15478457471530055),
 (8, 0.15118553565284182),
 (26, 0.14740334728408766),
 (1, 0.13638925819159495),
 (95, 0.13096085828013265),
 (24, 0.12949034471014811),
 (22, 0.1294694321733148),
 (10, 0.127322677454197),
 (9, 0.12464189701552676),
 (5, 0.1200633900580442),
 (30, 0.11705721602247998),
 (6, 0.11627186031210951),
 (15, 0.11617660918079435),
 (58, 0.11363442576886644),
 (19, 0.10967869590590124),
 (85, 0.10774843862299868),
 (33, 0.10669259040737779),
 (43, 0.10367719016483445),
 (29, 0.10179928410531014),
 (4, 0.10091434373828315),
 (25, 0.09768085340188346),
 (17, 0.09722023118608747),
 (21, 0.09472962655570749),
 (31, 0.09423272773937179),
 (42, 0.09418219363507971),
 (145, 0.0913457256120562),
 (7603, 0.08926611855157168),
 (32, 0.08906482983092065),
 (51, 0.08610688938422509),
 (7604, 0.08479966122819589),
 (12, 0.08468914334646473),
 (47, 0.0815921472194721),
 (36, 

In [18]:
# Parse raw counterparty data into trustworthy and untrustworthy samples
trustworthy = data2.loc[data2['TRUSTWORTHINESS'] > 0]
untrustworthy = data2.loc[data2['TRUSTWORTHINESS'] < 0]

In [65]:
# examine trustworthy counterparty distributions
trustworthy.describe()

Unnamed: 0,RATER,RATEE,TRUSTWORTHINESS
count,22649.0,22649.0,22649.0
mean,843.57018,807.946973,1.99532
std,1533.300113,1416.463534,1.850009
min,1.0,1.0,1.0
25%,60.0,59.0,1.0
50%,248.0,252.0,1.0
75%,909.0,933.0,2.0
max,7604.0,7604.0,10.0


In [66]:
trustworthy.head()


Unnamed: 0,RATER,RATEE,TRUSTWORTHINESS
0,430,1,10
1,3134,1,10
2,3026,1,10
3,3010,1,10
4,804,1,10


In [67]:
# examine untrustworthy counterparty distributions
untrustworthy.describe()


Unnamed: 0,RATER,RATEE,TRUSTWORTHINESS
count,1536.0,1536.0,1536.0
mean,1161.591146,4637.085286,-6.376953
std,2264.507963,3443.351957,4.075408
min,1.0,3.0,-10.0
25%,24.0,405.25,-10.0
50%,145.0,7408.5,-10.0
75%,708.0,7567.25,-1.0
max,7604.0,7604.0,-1.0


In [22]:
untrustworthy.tail()

Unnamed: 0,RATER,RATEE,TRUSTWORTHINESS
24157,7595,7556,-10
24160,7564,7596,-10
24163,7595,7604,-10
24171,7598,7603,-10
24184,7604,7603,-10


In [69]:
# Load each group of counterparties into a graph

# Generate graph of trustworthy nodes using networkx
g_trustworthy = nx.from_pandas_dataframe(trustworthy, 'RATER', 'RATEE', 'TRUSTWORTHINESS')

# Generate graph of untrustworthy nodes using networkx
g_untrustworthy = nx.from_pandas_dataframe(untrustworthy, 'RATER', 'RATEE', 'TRUSTWORTHINESS')



In [70]:
# Compute Eigenvector Centrality for trustworthy and untrustworthy counterparties
eigen_trustworthy = nx.eigenvector_centrality(g_trustworthy)
eigen_untrustworthy = nx.eigenvector_centrality(g_untrustworthy)

In [72]:
eigen_trustworthy

{1: 0.15274094411532108,
 2: 0.18521554567671897,
 3: 0.17001782501552076,
 4: 0.1094328403221647,
 5: 0.12277676155590522,
 6: 0.12245305888156732,
 7: 0.1508122759898467,
 8: 0.1280433697713301,
 9: 0.12613019931497632,
 10: 0.13144912452047688,
 11: 0.1960256360034461,
 12: 0.09446255096092597,
 13: 0.07350451234565626,
 14: 0.07642346264926536,
 15: 0.09433824116462997,
 16: 0.06310465666956527,
 17: 0.10591586552674447,
 18: 0.03671023671374007,
 19: 0.11715094359829731,
 20: 0.0744813962847965,
 21: 0.1020262236364369,
 22: 0.13564116335865864,
 23: 0.02426780352576224,
 24: 0.13689342059981952,
 25: 0.10477388336820204,
 26: 0.1527564468268986,
 27: 0.0749296702620166,
 28: 0.011040687885704961,
 29: 0.10928194635153053,
 30: 0.11395496829435645,
 31: 0.09722152103013607,
 32: 0.09687301507415524,
 33: 0.10918438648676278,
 34: 0.07913697601925807,
 35: 0.05051676652129348,
 36: 0.08476667106893368,
 37: 0.05476079420031414,
 38: 0.050372886566826515,
 39: 0.05608644547775984,
 

In [27]:
eigen_untrustworthy

{1: 0.0019908521824746647,
 2: 0.04368754690823099,
 3: 0.027817441738742496,
 4: 0.0014655876980823337,
 5: 0.05822296562954932,
 6: 0.05603051422795859,
 7: 0.20917972838501941,
 8: 0.14320448788277654,
 9: 0.07304532675835418,
 10: 0.06436640205516712,
 11: 0.17398193890104086,
 12: 2.2675861762012357e-72,
 13: 0.03922411778131094,
 14: 0.013122873550625476,
 15: 0.1492237928828233,
 16: 0.004844257457618111,
 17: 5.6766085781065436e-05,
 19: 0.03594319666302315,
 20: 0.03207248652647012,
 22: 0.05095540048593394,
 24: 0.0377789910119697,
 25: 0.026781905888305545,
 26: 0.11849669005061235,
 28: 1.400627654820746e-57,
 29: 0.023708878790257716,
 30: 0.11337541415060551,
 31: 0.058870869113093915,
 32: 0.04614776610660324,
 33: 0.03191626848075027,
 34: 0.0020408613821060353,
 35: 0.0307303474134864,
 36: 0.012538753077597121,
 37: 0.003140056914909098,
 38: 0.0034765586522646943,
 39: 0.023746652044182207,
 41: 0.005588357956389677,
 42: 3.769202129015761e-05,
 43: 0.159328480310411

In [73]:
# We perform a t-test at a 95% confidence level to determine whether or not we can 
# reject the null hypothesis that each sample comes from the same distribution as the population dist.
# NULL: There is no difference in infuence of nodes based on trustworthiness 

# First, we convert dicts to dataframes
eigen_trustworthy_df  = pd.DataFrame(eigen_trustworthy.items())
eigen_untrustworthy_df = pd.DataFrame(eigen_untrustworthy.items())





In [74]:
# Now we conduct our hypothesis test 

# H_0: There is no difference in eigenvector centrality or influence between the two samples representing 
# trustworthy and untrustworthy nodes.

#H_1: There is a difference in eigenvector centrality or influence between the two samples representing 
# trustworthy and untrustworthy nodes 
    
stats.ttest_ind(eigen_trustworthy_df[1],eigen_untrustworthy_df[1])

Ttest_indResult(statistic=-13.486172378807034, pvalue=1.139543038012769e-40)

In [75]:
# Since we have a very small p-value, we can reject the null hypothesis and conclude that there is a difference
# in node influence based on the trustworthiness of the participant in the bitcoin alpha network.