In [17]:
import networkx as nx
import matplotlib.pyplot as plt
from itertools import combinations as comb
from pprint import pprint
import pandas as pd


def graph_and_stats(fname):
    G = nx.Graph()
    self_loop_count = 0
    pos_count = 0
    neg_count = 0
    with open(fname, "r") as f:
        for line in f:
            reviewer, reviewee, weight = tuple(map(int, line.split(",")))
            self_loop_count += 1 if reviewer == reviewee else 0
            pos_count += 1 if weight == 1 and reviewer != reviewee else 0
            neg_count += 1 if weight == -1 and reviewer != reviewee else 0
            G.add_edge(reviewer, reviewee, weight=weight)
    return self_loop_count, pos_count, neg_count, G

self_loop_count, pos_count, neg_count, G = graph_and_stats("epinions_small.csv")

In [18]:
weights = nx.get_edge_attributes(G, 'weight')

In [19]:
triads = [x for x in nx.enumerate_all_cliques(G) if len(x) == 3]

In [20]:
triads_and_weights = list(map(lambda x: list(map(lambda x: (x, weights[x]), comb(x, 2))), triads))

In [21]:
from pprint import pprint
pprint(triads_and_weights[:20]) #ABBREVIATED

[[((264472, 291725), 1), ((264472, 205645), 1), ((291725, 205645), 1)],
 [((264472, 291725), 1), ((264472, 282870), 1), ((291725, 282870), 1)],
 [((264472, 291725), 1), ((264472, 205068), 1), ((291725, 205068), 1)],
 [((264472, 291725), 1), ((264472, 244660), 1), ((291725, 244660), 1)],
 [((264472, 233969), 1), ((264472, 212893), 1), ((233969, 212893), -1)],
 [((264472, 233969), 1), ((264472, 277629), 1), ((233969, 277629), -1)],
 [((264472, 233969), 1), ((264472, 204622), 1), ((233969, 204622), 1)],
 [((264472, 233969), 1), ((264472, 212234), 1), ((233969, 212234), 1)],
 [((264472, 233969), 1), ((264472, 213839), 1), ((233969, 213839), -1)],
 [((264472, 233969), 1), ((264472, 261565), 1), ((233969, 261565), 1)],
 [((264472, 233969), 1), ((264472, 210929), 1), ((233969, 210929), 1)],
 [((264472, 233969), 1), ((264472, 203599), 1), ((233969, 203599), 1)],
 [((264472, 233969), 1), ((264472, 282870), 1), ((233969, 282870), 1)],
 [((264472, 233969), 1), ((264472, 224409), 1), ((233969, 224

In [22]:
print("NUMBER OF SELF LOOPS:", self_loop_count)

NUMBER OF SELF LOOPS: 73


In [59]:
print("NUMBER OF TOTNODES:", pos_count+neg_count-self_loop_count)

NUMBER OF TOTNODES: 65916


In [23]:
print("NUMBER OF TRUST EDGES:", pos_count)

NUMBER OF TRUST EDGES: 57010


In [24]:
print("NUMBER OF DISTRUST EDGES:", neg_count)

NUMBER OF DISTRUST EDGES: 8979


In [25]:
print("NUMBER OF NODES IN TRIADS:",len(set([val for sublist in triads for val in sublist])))

NUMBER OF NODES IN TRIADS: 3587


In [26]:
print("NUMBER OF NODES TOTAL:",len(G.nodes()))

NUMBER OF NODES TOTAL: 10386


In [27]:
def get_trust_category(entry):
    categories = {
        (1,1,1) : "TTT",
        (-1,1,1) : "TTD",
        (-1,-1,1) : "TDD",
        (-1,-1,-1) : "DDD"
    }
    return categories[tuple(sorted([x[1] for x in entry]))]

for i in range(len(triads_and_weights)):
    triads_and_weights[i].append(get_trust_category(triads_and_weights[i]))

In [28]:
col_format = tuple(zip(*triads_and_weights))
table = pd.DataFrame({
    "trust_category": col_format[3],
    "edge_1": tuple(zip(*col_format[0]))[0],
    "trust_1": tuple(zip(*col_format[0]))[1],
    "edge_2": tuple(zip(*col_format[1]))[0],
    "trust_2": tuple(zip(*col_format[1]))[1],
    "edge_3": tuple(zip(*col_format[2]))[0],
    "trust_3": tuple(zip(*col_format[2]))[1]
})

In [44]:
triad_table = table.sort_values(['trust_category'],ascending=False).reset_index(drop=True)

In [50]:
triad_table.trust_category.unique()

array(['TTT', 'TTD', 'TDD', 'DDD'], dtype=object)

In [31]:
num_edges = pos_count + neg_count
p_pos = pos_count / num_edges
p_neg = 1 - p_pos
p_type_1 = p_pos * p_pos * p_pos
p_type_2 = 3 * (p_pos * p_pos * p_neg)
p_type_3 = 3 * (p_pos * p_neg * p_neg)
p_type_4 = p_neg * p_neg * p_neg

In [60]:
print("PROBABILITY THAT AN EDGE WILL BE POSITIVE:", p_pos)
print("PROBABILITY THAT AN EDGE WILL BE NEGATIVE:", p_neg)

PROBABILITY THAT AN EDGE WILL BE POSITIVE: 0.8639318674324509
PROBABILITY THAT AN EDGE WILL BE NEGATIVE: 0.13606813256754913


In [57]:
print("Expected distribution of TTT, TTD, TDD, and DDD triads are respectively {}".format((p_type_1,p_type_2,p_type_3,p_type_4)))

Expected distribution of TTT, TTD, TDD, and DDD triads are respectively (0.6448199739644335, 0.3046748928026652, 0.04798589479872181, 0.0025192384341795188)


In [33]:
n_triads = len(triad_table)
type_1 = len(triad_table[triad_table['trust_category'] == 'TTT'])
type_2 = len(triad_table[triad_table['trust_category'] == 'TTD'])
type_3 = len(triad_table[triad_table['trust_category'] == 'TDD'])
type_4 = len(triad_table[triad_table['trust_category'] == 'DDD'])

In [58]:
print("Actual distribution of TTT, TTD, TDD, and DDD triads are respectively {}".format((type_1 / n_triads, type_2 / n_triads, type_3 / n_triads, type_4 / n_triads)))

Actual distribution of TTT, TTD, TDD, and DDD triads are respectively (0.7333896034789629, 0.15341107176278465, 0.09767151690856712, 0.015527807849685315)
