In [1]:
using CSV
using DataFrames


In [2]:
path = "../../../data/epinions"

df = CSV.read(joinpath(path, "user_rating.txt"), DataFrame, delim="\t", header=false)

rename!(df, [:source, :target, :sign, :date])

first(df)

Row,source,target,sign,date
Unnamed: 0_level_1,Int64,Int64,Int64,String15
1,3287060356,232085,-1,2001/01/10


In [5]:
# get all edges
edges = [(df.source[i], df.target[i]) for i in 1:size(df, 1)]

edges[1:5]
print([length(unique(edges)), length(edges)])

# remove self loops
edges = [(u, v) for (u, v) in edges if u != v]
length(edges)

[841372, 841372]

840799

In [6]:
# check if for repeated edges the sign is the same

# get all edges
edges_sign = [(df.source[i], df.target[i], df.sign[i]) for i in 1:size(df, 1)]

# edges_sign = unique(edges_sign)

# remove self loops
edges_sign = [(u, v, s) for (u, v, s) in edges_sign if u != v]

# get the number of unique edges
length(edges_sign)

840799

We have some edges that change sign. Let's see if these edges form any triads

In [7]:
nrow(filter(x -> x.source != x.target, df)), nrow(df)

(840799, 841372)

In [30]:
# get list of all nodes
nodes = unique([[u for (u, v, s) in edges_sign]..., [v for (u, v, s) in edges_sign]...])

print(length(nodes))

# filter out self loops
df_filtered = filter(x -> x.source != x.target, df)

# get list of outgoing edges for each node by grouping by source
node_outs_gbf = groupby(df_filtered, :source)

# # filter out self loops
# node_outs_gbf = filter(x -> x[1, :source] != x[1, :target], node_outs_gbf)

# get list of outgoing edges for each node by collecting targets
node_outs = combine(node_outs_gbf, x -> [x[!, :target]])
rename!(node_outs, :x1 => :targets)

# create a dictionary with the outgoing edges for each node
node_outs_dict = Dict(zip(node_outs.source, unique.(node_outs.targets)))

131580

Dict{Int64, Vector{Int64}} with 95039 entries:
  244336      => [244337]
  484878      => [562040]
  53990887300 => [330549]
  718997      => [450647]
  427681      => [426422, 429119]
  35968356228 => [394444]
  552449      => [532475]
  308174      => [308175]
  14502432644 => [12873338756]
  19984650116 => [517636, 657993]
  40101711748 => [469829]
  442240      => [359990]
  375504      => [254134, 221863, 229318]
  390703      => [390704, 407541]
  380458      => [421074, 308968]
  519172      => [599981]
  622587      => [635726, 210284]
  17013510020 => [355176]
  674175      => [422545]
  ⋮           => ⋮

In [9]:
# get transitive triads

triads = []
for (source, targets) in node_outs_dict
    for target in targets
        if target in keys(node_outs_dict)
            for target_target in node_outs_dict[target]
                if target_target in targets
                    append!(triads, [(source, target, target_target)])
                end
            end
        end
    end
end
length(triads), length(unique(triads))

(10961993, 10961993)

In [8]:
# get cyclic triads
# thanks to sorting, each such triad will be counted only once

source_nodes = sort(collect(keys(node_outs_dict)))

cyclic_triads = []
for source in source_nodes
    targets = sort(node_outs_dict[source])
    # filter out targets that have smaller id than source
    targets = [target for target in targets if target > source]
    for target in targets
        if target in keys(node_outs_dict)
            target_targets = sort(node_outs_dict[target])
            # filter out target_targets that have smaller id than source
            target_targets = [target_target for target_target in target_targets if target_target > source]
            # I hope above is correct
            # target_targets = node_outs_dict[target]
            for target_target in target_targets
                if target_target in keys(node_outs_dict)
                    if source in node_outs_dict[target_target]
                        append!(cyclic_triads, [(source, target, target_target)])
                    end
                end
            end
        end
    end
end
length(cyclic_triads), length(unique(cyclic_triads))

(2355679, 2355679)

In [20]:
cyclic_triads[end]  

(7666, 7691, 6946)

In [9]:
node_outs_dict_repeated = Dict(zip(node_outs.source, node_outs.targets))

# find edges that are repeated in node_outs_dict_repeated
repeated_edges = unique([(source, target) for (source, targets) in node_outs_dict_repeated for target in targets if sum(target .== targets) > 1])

Tuple{Int64, Any}[]

In [10]:
df_triads = DataFrame(triads)
rename!(df_triads, [:P, :O, :X]);

po_edges = [(df_triads.P[i], df_triads.O[i]) for i in 1:size(df_triads, 1)]
ox_edges = [(df_triads.O[i], df_triads.X[i]) for i in 1:size(df_triads, 1)]
px_edges = [(df_triads.P[i], df_triads.X[i]) for i in 1:size(df_triads, 1)];

In [11]:
df_cyclic_triads = DataFrame(cyclic_triads)
rename!(df_cyclic_triads, [:P, :O, :X]);

po_edges_cyclic = [(df_cyclic_triads.P[i], df_cyclic_triads.O[i]) for i in 1:size(df_cyclic_triads, 1)]
ox_edges_cyclic = [(df_cyclic_triads.O[i], df_cyclic_triads.X[i]) for i in 1:size(df_cyclic_triads, 1)]
xp_edges_cyclic = [(df_cyclic_triads.X[i], df_cyclic_triads.P[i]) for i in 1:size(df_cyclic_triads, 1)];


In [12]:
df_all_triads = DataFrame([triads..., cyclic_triads...])
rename!(df_all_triads, [:P, :O, :X]);
nrow(df_all_triads)
# po_edges_all = [(df_all_triads.P[i], df_all_triads.O[i]) for i in 1:size(df_all_triads, 1)]

13317672

In [13]:
# find number of triads containing repeated edges

affected_triads = []
for edge in repeated_edges
    append!(affected_triads, findall([edge] .== po_edges))
    # break
end
length(affected_triads), length(unique(affected_triads))

(0, 0)

In [14]:
using BenchmarkTools

In [15]:
edge = po_edges[1]
@benchmark findlast([edge] .== edges)

BenchmarkTools.Trial: 5263 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m726.676 μs[22m[39m … [35m 2.289 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m912.546 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m936.398 μs[22m[39m ± [32m84.168 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▆[39m█[39m▇[39m▅[39m▂[34m▂[39m[39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▂[39m▁[39m▂[39m▂[39m▁

In [16]:
@benchmark findall([edge] .== po_edges)

BenchmarkTools.Trial: 446 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m 9.812 ms[22m[39m … [35m 13.132 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m11.136 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m11.200 ms[22m[39m ± [32m678.708 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▃[39m [39m [39m [39m [39m [39m [39m [39m [39m▃[39m [39m▄[39m▇[39m [39m▂[39m [39m█[34m▄[39m[39m▆[32m▇[39m[39m▇[39m [39m [39m▂[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▅[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▅[39m▂[39m▂[39m▂[39m▂[

In [22]:
edges_sign[findlast([edge] .== edges)][3]

1

In [23]:
# get sign of edges in triads. Take the last value of edge

df_triads[!, "po_sign"] = [edges_sign[findlast([edge] .== edges)][3] for edge in po_edges]
df_triads[!, "ox_sign"] = [edges_sign[findlast([edge] .== edges)][3] for edge in ox_edges]
df_triads[!, "px_sign"] = [edges_sign[findlast([edge] .== edges)][3] for edge in px_edges]
first(df_triads, 5)

Row,P,O,X,po_sign,ox_sign,px_sign
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64
1,427681,426422,429119,1,1,1
2,427681,429119,426422,1,1,1
3,390703,390704,407541,1,1,1
4,390703,407541,390704,1,1,1
5,564810,565868,319000,1,1,1


In [15]:
# get sign of edges in cyclic triads. Take the last value of edge

df_cyclic_triads[!, "po_sign"] = [edges_sign[findall([edge] .== edges)][end][3] for edge in po_edges_cyclic]
df_cyclic_triads[!, "ox_sign"] = [edges_sign[findall([edge] .== edges)][end][3] for edge in ox_edges_cyclic]
df_cyclic_triads[!, "xp_sign"] = [edges_sign[findall([edge] .== edges)][end][3] for edge in xp_edges_cyclic]
first(df_cyclic_triads, 5)

Row,P,O,X,po_sign,ox_sign,xp_sign
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64
1,199781,205491,222004,1,1,-1
2,199781,205491,238184,1,1,1
3,199781,205491,301950,1,1,1
4,199781,206437,301950,1,1,1
5,199781,210284,342046,-1,-1,1


In [16]:
# get sign of edges in all triads. Take the last value of edge

df_all_triads[!, "po_sign"] = [df_triads[!, "po_sign"]..., df_cyclic_triads[!, "po_sign"]...]
df_all_triads[!, "ox_sign"] = [df_triads[!, "ox_sign"]..., df_cyclic_triads[!, "ox_sign"]...]
df_all_triads[!, "xpx_sign"] = [df_triads[!, "px_sign"]..., df_cyclic_triads[!, "xp_sign"]...]

first(df_all_triads, 5)

Row,P,O,X,po_sign,ox_sign,xpx_sign
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64
1,427681,426422,429119,1,1,1
2,427681,429119,426422,1,1,1
3,390703,390704,407541,1,1,1
4,390703,407541,390704,1,1,1
5,564810,565868,319000,1,1,1


In [24]:
# save df_triads
CSV.write(joinpath(path, "epinions-triads.csv"), df_triads)

"../../../data/epinions/epinions-triads.csv"

In [17]:
# save df_cyclic_triads
CSV.write(joinpath(path, "epinions-cyclic-triads.csv"), df_cyclic_triads)

"../../../data/epinions/epinions-cyclic-triads.csv"

In [18]:
# save df_all_triads
CSV.write(joinpath(path, "epinions-all-triads.csv"), df_all_triads)

"../../../data/epinions/epinions-all-triads.csv"

In [3]:
df_triads = CSV.read(joinpath(path, "epinions-triads.csv"), DataFrame)

triads = [(df_triads.P[i], df_triads.O[i], df_triads.X[i]) for i in 1:size(df_triads, 1)]

ArgumentError: ArgumentError: "../../../data/epinions/epinions-triads.csv" is not a valid file or doesn't exist

In [4]:
df_cyclic_triads = CSV.read(joinpath(path, "epinions-cyclic-triads.csv"), DataFrame)

cyclic_triads = [(df_cyclic_triads.P[i], df_cyclic_triads.O[i], df_cyclic_triads.X[i]) for i in 1:size(df_cyclic_triads, 1)]

2355679-element Vector{Tuple{Int64, Int64, Int64}}:
 (199781, 205491, 222004)
 (199781, 205491, 238184)
 (199781, 205491, 301950)
 (199781, 206437, 301950)
 (199781, 210284, 342046)
 (199781, 215415, 238184)
 (199781, 216430, 235645)
 (199781, 216430, 239694)
 (199781, 216430, 241578)
 (199781, 216430, 342046)
 ⋮
 (71446925188, 79903166340, 81057648516)
 (71543787396, 71547195268, 71546933124)
 (76839882628, 77010800516, 76850368388)
 (77582012292, 79489109892, 80065564548)
 (77582012292, 80065564548, 79489109892)
 (77582012292, 80488009604, 77939838852)
 (78012845956, 80488009604, 81057648516)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82519232388, 82222485380)

In [25]:
df_all_triads = CSV.read(joinpath(path, "epinions-all-triads.csv"), DataFrame)

Row,P,O,X,po_sign,ox_sign,xpx_sign
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64
1,427681,426422,429119,1,1,1
2,427681,429119,426422,1,1,1
3,390703,390704,407541,1,1,1
4,390703,407541,390704,1,1,1
5,564810,565868,319000,1,1,1
6,564810,319000,565868,1,1,1
7,355643,462288,502726,1,1,1
8,355643,568813,502726,1,1,1
9,355643,568813,535161,1,1,1
10,355643,568813,535160,1,1,1


In [26]:
# identify triads that consist of the same edges

all_triads = [triads..., cyclic_triads...]

all_triads_ordered = sort.([[p, o, x] for (p, o, x) in all_triads])
all_triads_unique = unique(all_triads_ordered)

4910076-element Vector{Vector{Int64}}:
 [426422, 427681, 429119]
 [390703, 390704, 407541]
 [319000, 564810, 565868]
 [355643, 462288, 502726]
 [355643, 502726, 568813]
 [355643, 535161, 568813]
 [355643, 535160, 568813]
 [355643, 490981, 568813]
 [355643, 568813, 636621]
 [355643, 542386, 568813]
 ⋮
 [55554969476, 58242994052, 64653987716]
 [55554969476, 60367671172, 64653987716]
 [55554969476, 64300879748, 64653987716]
 [55554969476, 64653987716, 65500057476]
 [55554969476, 64653987716, 66143620996]
 [55554969476, 64653987716, 70562844548]
 [57838243716, 64653987716, 72217235332]
 [59745079172, 59807862660, 64353832836]
 [64653987716, 70183391108, 72217235332]

In [37]:
y = rand(1:10, 10)

map(x->findall(Ref(x) .== y) , unique(y)), y

([[1, 6, 8], [2], [3, 7], [4], [5], [9, 10]], [6, 5, 9, 2, 3, 6, 9, 6, 1, 1])

In [None]:
using BenchmarkTools

maxiter = 10000
y = all_triads_unique[1:maxiter]

@benchmark Dict(x->findall(Ref(x) .== y) , unique(y))

BenchmarkTools.Trial: 8 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m665.846 ms[22m[39m … [35m723.258 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.58% … 0.67%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m687.223 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.63%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m692.352 ms[22m[39m ± [32m 18.622 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.72% ± 0.26%

  [39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m [39m█[39m [34m█[39m[39m [39m [39m [39m [39m [39m [39m█[39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m [39m [39m [39m [39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [39m█[39m▁[39m▁[39m▁

In [63]:
Ref(y[1])

Base.RefValue{Tuple{Int64, Int64, Int64}}((426422, 427681, 429119))

In [55]:
@benchmark Dict(y .=> [findall(Ref(x) .== all_triads_ordered) for x in y])

BenchmarkTools.Trial: 3 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m1.840 s[22m[39m … [35m  1.860 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.32% … 0.32%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m1.851 s              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.32%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m1.850 s[22m[39m ± [32m10.147 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.38% ± 0.10%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[

In [54]:
@benchmark Dict(y .=> map(x->findall(Ref(x) .== all_triads_ordered) , y))

BenchmarkTools.Trial: 3 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m1.842 s[22m[39m … [35m   2.102 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.32% … 0.48%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m1.846 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.34%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m1.930 s[22m[39m ± [32m148.755 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.38% ± 0.09%

  [34m█[39m[39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m█[39m▁[39m▁[39m▁[39m▁[39m▁[39m

In [62]:
@benchmark Dict(y .=> [findall(all_triads_ordered .== [triad]) for triad in y])

BenchmarkTools.Trial: 3 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m1.957 s[22m[39m … [35m  2.008 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.33% … 0.42%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m1.982 s              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.32%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m1.982 s[22m[39m ± [32m25.389 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.34% ± 0.07%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[

In [31]:
all_triads_ordered = [tuple(sort([p, o, x])...) for (p, o, x) in all_triads]
all_triads_unique = unique(all_triads_ordered)

# y = all_triads_unique[1:maxiter]

4910076-element Vector{Tuple{Int64, Int64, Int64}}:
 (426422, 427681, 429119)
 (390703, 390704, 407541)
 (319000, 564810, 565868)
 (355643, 462288, 502726)
 (355643, 502726, 568813)
 (355643, 535161, 568813)
 (355643, 535160, 568813)
 (355643, 490981, 568813)
 (355643, 568813, 636621)
 (355643, 542386, 568813)
 ⋮
 (55554969476, 58242994052, 64653987716)
 (55554969476, 60367671172, 64653987716)
 (55554969476, 64300879748, 64653987716)
 (55554969476, 64653987716, 65500057476)
 (55554969476, 64653987716, 66143620996)
 (55554969476, 64653987716, 70562844548)
 (57838243716, 64653987716, 72217235332)
 (59745079172, 59807862660, 64353832836)
 (64653987716, 70183391108, 72217235332)

In [None]:
sort!(all_triads_unique)

4910076-element Vector{Tuple{Int64, Int64, Int64}}:
 (199781, 200233, 223783)
 (199781, 200233, 226726)
 (199781, 200233, 239694)
 (199781, 200233, 241223)
 (199781, 200233, 262822)
 (199781, 205491, 222004)
 (199781, 205491, 225480)
 (199781, 205491, 238184)
 (199781, 205491, 239694)
 (199781, 205491, 241374)
 ⋮
 (78715129732, 78920388484, 79250952068)
 (79810891652, 80065564548, 81057648516)
 (79880884100, 79946289028, 79977090948)
 (80918843268, 80919891844, 80920022916)
 (80975073156, 81057648516, 81386901380)
 (82078437252, 82641653636, 83384438660)
 (82153148292, 82222485380, 82389602180)
 (82153148292, 82222485380, 82519232388)
 (82425515908, 83329126276, 83592843140)

In [35]:
maxiter = 100
y = all_triads_unique[1:maxiter]

map(x->findall(Ref(x) .== all_triads_ordered) , y)

100-element Vector{Vector{Int64}}:
 [1, 2]
 [3, 4, 1353099, 12135199]
 [5, 6]
 [7, 61, 4044931, 12098341]
 [8, 63, 422577, 422586, 4044926, 4044933, 12098348, 12098384]
 [9, 422582]
 [10, 45, 422584, 422590, 7207188, 7207207, 12098361, 12098385]
 [11, 422585]
 [12, 27, 422583, 422598, 4710397, 4710451, 12098387, 12098394]
 [13, 78, 422587, 422601, 5874875, 5874916, 12098368, 12098386]
 ⋮
 [127, 10911256]
 [128, 10911258, 10922261, 11316784]
 [129, 294, 10911254, 11318266]
 [130, 457, 10911266, 11318265]
 [131, 10911255]
 [132, 10911252]
 [133, 637, 10911276, 11318258]
 [134, 442, 10911265, 11318256]
 [135, 212, 10911251, 11200044]

In [38]:
all_triads_ordered2 = sort(all_triads_ordered)

13317672-element Vector{Tuple{Int64, Int64, Int64}}:
 (199781, 200233, 223783)
 (199781, 200233, 223783)
 (199781, 200233, 226726)
 (199781, 200233, 226726)
 (199781, 200233, 239694)
 (199781, 200233, 239694)
 (199781, 200233, 241223)
 (199781, 200233, 241223)
 (199781, 200233, 241223)
 (199781, 200233, 241223)
 ⋮
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82425515908, 83329126276, 83592843140)
 (82425515908, 83329126276, 83592843140)

In [None]:
# all_triads_dict = Dict(all_triads_unique .=> [findall(Ref(triad) .== all_triads_ordered) for triad in all_triads_unique])
# a = [searchsorted(all_triads_ordered2, z) for z in y]

d = Dict(y .=> [searchsorted(all_triads_ordered2, z) for z in y])


Dict{Tuple{Int64, Int64, Int64}, UnitRange{Int64}} with 100 entries:
  (326177, 341801, 342642)           => 6456905:6456906
  (319000, 564810, 565868)           => 6299099:6299100
  (355643, 462288, 502726)           => 6917503:6917506
  (355643, 535160, 535161)           => 6917631:6917632
  (355643, 390490, 502726)           => 6917495:6917496
  (216022, 13322784644, 26148310916) => 2210350:2210351
  (355643, 490981, 568813)           => 6917530:6917531
  (355643, 542386, 561665)           => 6917699:6917702
  (355643, 390490, 548537)           => 6917499:6917500
  (355643, 561665, 636621)           => 6917755:6917758
  (355643, 490981, 636621)           => 6917532:6917533
  (329152, 594600, 3279064964)       => 6482974:6482974
  (355643, 521370, 535161)           => 6917602:6917602
  (355643, 521370, 527795)           => 6917598:6917601
  (355643, 502726, 548135)           => 6917562:6917569
  (528352, 594600, 687460)           => 9587332:9587332
  (355643, 548135, 548537)         

In [53]:
y[1], all_triads_ordered2[a[1]]
ain = collect(a[1])
triads[inverse_inds[ain]]
print(triads[inds[ain]])

Any[(427681, 426422, 429119), (427681, 429119, 426422)]

In [41]:
inds = sortperm(all_triads_ordered)
inverse_inds = sortperm(inds)

13317672-element Vector{Int64}:
  7401731
  7401732
  7162545
  7162546
  6299099
  6299100
  6917503
  6917582
  6917683
  6917661
        ⋮
 13317612
 13317616
 13317628
 13317645
 13317646
 13317638
 13317651
 13317669
 13317670

In [44]:
# identify triads that consist of the same edges

all_triads = [triads..., cyclic_triads...]

# all_triads_ordered = sort.([[p, o, x] for (p, o, x) in all_triads])
# all_triads_unique = unique(all_triads_ordered)
all_triads_ordered = [tuple(sort([p, o, x])...) for (p, o, x) in all_triads]
all_triads_unique = unique(all_triads_ordered)

sort!(all_triads_unique)
inds = sortperm(all_triads_ordered)
inverse_inds = sortperm(inds);

sort!(all_triads_ordered)

13317672-element Vector{Tuple{Int64, Int64, Int64}}:
 (199781, 200233, 223783)
 (199781, 200233, 223783)
 (199781, 200233, 226726)
 (199781, 200233, 226726)
 (199781, 200233, 239694)
 (199781, 200233, 239694)
 (199781, 200233, 241223)
 (199781, 200233, 241223)
 (199781, 200233, 241223)
 (199781, 200233, 241223)
 ⋮
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82153148292, 82222485380, 82519232388)
 (82425515908, 83329126276, 83592843140)
 (82425515908, 83329126276, 83592843140)

In [45]:


# create a dictionary with unique triads as keys and indices of their appearance as values
# all_triads_dict = Dict(all_triads_unique .=> [findall(all_triads_ordered .== [triad]) for triad in all_triads_unique])
# all_triads_dict = Dict(all_triads_unique .=> [findall(Ref(triad) .== all_triads_ordered) for triad in all_triads_unique])
all_triads_dict = Dict(all_triads_unique .=> [searchsorted(all_triads_ordered2, z) for z in all_triads_unique])



Dict{Tuple{Int64, Int64, Int64}, UnitRange{Int64}} with 4910076 entries:
  (269583, 292702, 315310)             => 5100059:5100062
  (337504, 457810, 3171913604)         => 6583715:6583718
  (554414, 554415, 565802)             => 9956220:9956221
  (205068, 367823, 51853889412)        => 1176587:1176587
  (256079, 350170, 6811848580)         => 4551518:4551519
  (247372, 284943, 2951843716)         => 4310259:4310259
  (246739, 305660, 319864)             => 4286555:4286555
  (280754, 293646, 538598)             => 5356277:5356280
  (505092, 3197669252, 3206844292)     => 9046582:9046583
  (232924, 259809, 301232)             => 2999784:2999784
  (3173552004, 3180629892, 3214053252) => 11635836:11635839
  (205645, 240192, 252420)             => 1378038:1378039
  (349865, 562296, 677552)             => 6775639:6775640
  (210284, 240308, 407778)             => 1757154:1757154
  (205076, 277533, 437476)             => 1237217:1237218
  (431279, 500974, 12317069188)        => 7414546:74145

In [None]:
# using JLD2, FileIO

# save(joinpath(path, "epinions-all-triads-dict.jld2"), "all_triads_dict", all_triads_dict)

In [46]:
# create dataframe from dict
df_all_triads_dict = DataFrame(triad = collect(keys(all_triads_dict)), inds = collect(values(all_triads_dict)),
                           count = length.(collect(values(all_triads_dict))))

# collect(keys(triads_dict))

Row,triad,inds,count
Unnamed: 0_level_1,Tuple…,UnitRang…,Int64
1,"(269583, 292702, 315310)",5100059:5100062,4
2,"(337504, 457810, 3171913604)",6583715:6583718,4
3,"(554414, 554415, 565802)",9956220:9956221,2
4,"(205068, 367823, 51853889412)",1176587:1176587,1
5,"(256079, 350170, 6811848580)",4551518:4551519,2
6,"(247372, 284943, 2951843716)",4310259:4310259,1
7,"(246739, 305660, 319864)",4286555:4286555,1
8,"(280754, 293646, 538598)",5356277:5356280,4
9,"(505092, 3197669252, 3206844292)",9046582:9046583,2
10,"(232924, 259809, 301232)",2999784:2999784,1


In [None]:
# add columns with the type of triad (transitive or cyclic) and inds in the source dataframe

# length(triads), length(cyclic_triads), length(all_triads)

df_all_triads_dict[!, "proper_inds"]

df_all_triads_dict[!, "triad_type"] = [(inds .> length(triads)) .+ 1 for inds in df_all_triads_dict.inds]

df_all_triads_dict[!, "exact_inds"] = [inds .- length(triads) .* (type .- 1) for (inds, type) in zip(df_all_triads_dict.inds, df_all_triads_dict.triad_type)]

first(df_all_triads_dict, 5)

In [51]:
# test
cyclic_triads[18406]

(1769, 2504, 3456)

In [50]:
println(df_triads_dict.triad[2])

triads[triads_dict[df_triads_dict.triad[2]]]

[1542, 1628, 2193]


2-element Vector{Tuple{Int64, Int64, Int64}}:
 (1628, 1542, 2193)
 (1542, 1628, 2193)

In [74]:
using PyCall

i = 1
triad = df_all_triads_dict.triad[i]
types = df_all_triads_dict.triad_type[i]
inds = df_all_triads_dict.inds[i]
exact_inds = df_all_triads_dict.exact_inds[i]

# get triad list
sources = [triads, cyclic_triads]
type_sources = sources[types]
# triad_list = sources[types][exact_inds]
triad_list = [triad_source[exact_ind] for (triad_source, exact_ind) in zip(type_sources, exact_inds)]
# triad_list = triads[inds]

# get edges from triad list
get_transitive_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[1], triad[3])]
get_cyclic_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[3], triad[1])]
function get_edges(triad, type)
    if type == 1
        return get_transitive_edges(triad)
    elseif type == 2
        return get_cyclic_edges(triad)
    end
end
# function get_edges.(triad_list, types)
#     return [get_edges(triad, type) for (triad, type) in zip(triad_list, types)]
# end

# type_funs = [get_transitive_edges, get_cyclic_edges]
edges_list = get_edges.(triad_list, types)
edges_list = reduce(vcat, edges_list)
unique_edges = unique(edges_list)

py"""
import networkx as nx
"""

G = py"nx.DiGraph"([edges_list...])

# print(G.edges)

triad_census = py"nx.triadic_census"(G)

triad_census_triads = collect(keys(triad_census))

tc_ind = collect(values(triad_census)) .== 1

triad_census_triads[tc_ind][1]

# get signed triad type
sign_vals_s = values(df_all_triads[inds, 4:6])
sign_vals_s = Matrix(sign_vals_s)
sign_vals_s = mapslices(x->[x], sign_vals_s, dims=2)[:]

n_arr = zeros(Int, 8)

for sign_vals in sign_vals_s
    connections_values = sign_vals .== 1
    n_arr[connections_values[1]*4 + connections_values[3]*2 + connections_values[2]+1] += 1
end
n_arr
# n_arr

8-element Vector{Int64}:
 0
 0
 0
 0
 0
 0
 0
 2

In [104]:
using PyCall

i = 2
triad = df_triads_dict.triad[i]
inds = df_triads_dict.inds[i]

# get triad list
triad_list = triads[inds]

# get edges from triad list
edges_list = [[(triad[1], triad[2]), (triad[2], triad[3]), (triad[1], triad[3])] for triad in triad_list]
edges_list = reduce(vcat, edges_list)
unique_edges = unique(edges_list)

py"""
import networkx as nx
"""

G = py"nx.DiGraph"([unique_edges...])

# print(G.edges)

triad_census = py"nx.triadic_census"(G)

triad_census_triads = collect(keys(triad_census))

tc_ind = collect(values(triad_census)) .== 1

triad_census_triads[tc_ind]

# get signed triad type
sign_vals_s = values(df_triads[inds, 4:6])
sign_vals_s = Matrix(sign_vals_s)
sign_vals_s = mapslices(x->[x], sign_vals_s, dims=2)[:]

n_arr = zeros(Int, 8)

for sign_vals in sign_vals_s
    connections_values = sign_vals .== 1
    n_arr[connections_values[1]*4 + connections_values[3]*2 + connections_values[2]+1] += 1
end
n_arr

8-element Vector{Int64}:
 0
 0
 0
 0
 0
 0
 0
 2

In [107]:
values(sign_vals)
mm = Matrix(sign_vals)
sign_vals_s = mapslices(x->[x], mm, dims=2)[:]

all(prod.(sign_vals_s) .== 1)

true

In [None]:
using PyCall
py"""
import networkx as nx
"""

G = py"nx.DiGraph"()

triad_census = py"nx.triadic_census"(G)
triad_census_triads = collect(keys(triad_census))

# signed triad types:
n_arr_dict = Dict( triad_census_triads .=> [zeros(Int, 8) for _ in 1:length(triad_census_triads)])
n_bal_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_hier_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_tot_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])

counter = 0
inform_after = 100000
for (triad, inds, count) in eachrow(df_triads_dict)
    triad_list = triads[inds]

    # get edges list from triad list
    edges_list = [[(triad[1], triad[2]), (triad[2], triad[3]), (triad[1], triad[3])] for triad in triad_list]
    edges_list = reduce(vcat, edges_list)
    unique_edges = unique(edges_list)

    # call Python from julia, use networkx to get triad census on subgraphs
    G = py"nx.DiGraph"([unique_edges...])
    triad_census = py"nx.triadic_census"(G)
    triad_census_triads = collect(keys(triad_census))
    tc_ind = collect(values(triad_census)) .== 1
    triad_type = triad_census_triads[tc_ind][1]

    # get signed triad type
    sign_vals_s = values(df_triads[inds, 4:6])
    sign_vals_s = Matrix(sign_vals_s)
    sign_vals_s = mapslices(x->[x], sign_vals_s, dims=2)[:]

    # n_arr = zeros(Int, 8)

    hier = true
    for sign_vals in sign_vals_s
        connections_values = sign_vals .== 1
        signed_triad_type = connections_values[1]*4 + connections_values[3]*2 + connections_values[2]+1
        n_arr_dict[triad_type][signed_triad_type] += 1

        if signed_triad_type == 3 || signed_triad_type == 6
            hier = false
        end
    end
    n_tot_dict[triad_type] += 1
    n_bal_dict[triad_type] += all(prod.(sign_vals_s) .== 1)
    n_hier_dict[triad_type] += hier

    # n_arr

    # println(triad_list, inds)
    counter += 1
    # if counter > 5000
    #     break
    # end
    if counter % inform_after == 0
        # print time
        # print()
        println("Processed $counter triads")
    end
end

Processed 100000 triads
Processed 200000 triads
Processed 300000 triads
Processed 400000 triads
Processed 500000 triads
Processed 600000 triads


In [126]:
# create dataframe from dicts

df_n_arr = DataFrame(triad = collect(keys(n_arr_dict)), n_arr = collect(values(n_arr_dict)),
                     n_bal = collect(values(n_bal_dict)), n_hier = collect(values(n_hier_dict)),
                     n_tot = collect(values(n_tot_dict)))

Row,triad,n_arr,n_bal,n_hier,n_tot
Unnamed: 0_level_1,String,Array…,Int64,Int64,Int64
1,030C,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
2,111U,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
3,120D,"[656, 2718, 804, 5957, 1529, 5232, 3536, 70680]",35682,39837,45556
4,120C,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
5,201,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
6,111D,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
7,210,"[148, 598, 344, 2666, 782, 2092, 2452, 36743]",11150,13025,15275
8,003,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
9,012,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
10,021D,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0


In [128]:
df_n_arr.per_bal = df_n_arr.n_bal ./ df_n_arr.n_tot
df_n_arr.per_hier = df_n_arr.n_hier ./ df_n_arr.n_tot
df_n_arr

Row,triad,n_arr,n_bal,n_hier,n_tot,per_bal,per_hier
Unnamed: 0_level_1,String,Array…,Int64,Int64,Int64,Float64,Float64
1,030C,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
2,111U,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
3,120D,"[656, 2718, 804, 5957, 1529, 5232, 3536, 70680]",35682,39837,45556,0.783256,0.874462
4,120C,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
5,201,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
6,111D,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
7,210,"[148, 598, 344, 2666, 782, 2092, 2452, 36743]",11150,13025,15275,0.729951,0.8527
8,003,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
9,012,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
10,021D,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,


In [None]:
# save above dataframe
CSV.write(joinpath(path, "soc-wiki-elec-bal_hier_triads-types.csv"), df_n_arr)
CSV.write(joinpath("soc-wiki-elec-bal_hier_triads-types.csv"), df_n_arr)

# save JLD2
save(joinpath(path, "soc-wiki-elec-bal_hier_triads-types.jld2"), "df_n_arr", df_n_arr)
save("soc-wiki-elec-bal_hier_triads-types.jld2", "df_n_arr", df_n_arr)

### Balance and hierarchy analysis including cyclic triads

In [None]:
get_transitive_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[1], triad[3])]
get_cyclic_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[3], triad[1])]
function get_edges(triad, type)
    if type == 1
        return get_transitive_edges(triad)
    elseif type == 2
        return get_cyclic_edges(triad)
    end
end

In [96]:
using PyCall

i = 1
triad = df_all_triads_dict.triad[i]
types = df_all_triads_dict.triad_type[i]
inds = df_all_triads_dict.inds[i]
exact_inds = df_all_triads_dict.exact_inds[i]

# get triad list
sources = [triads, cyclic_triads]
type_sources = sources[types]
# triad_list = sources[types][exact_inds]
triad_list = [triad_source[exact_ind] for (triad_source, exact_ind) in zip(type_sources, exact_inds)]
# triad_list = triads[inds]

# get edges from triad list
get_transitive_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[1], triad[3])]
get_cyclic_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[3], triad[1])]
function get_edges(triad, type)
    if type == 1
        return get_transitive_edges(triad)
    elseif type == 2
        return get_cyclic_edges(triad)
    end
end
# function get_edges.(triad_list, types)
#     return [get_edges(triad, type) for (triad, type) in zip(triad_list, types)]
# end

# type_funs = [get_transitive_edges, get_cyclic_edges]
edges_list = get_edges.(triad_list, types)
edges_list = reduce(vcat, edges_list)
unique_edges = unique(edges_list)

py"""
import networkx as nx
"""

G = py"nx.DiGraph"([edges_list...])

# print(G.edges)

triad_census = py"nx.triadic_census"(G)

triad_census_triads = collect(keys(triad_census))

tc_ind = collect(values(triad_census)) .== 1

println(triad_census_triads[tc_ind][1])

# get signed triad type
sign_vals_s = values(df_all_triads[inds, 4:6])
sign_vals_s = Matrix(sign_vals_s)
sign_vals_s = mapslices(x->[x], sign_vals_s, dims=2)[:]

n_arr = zeros(Int, 8)

for sign_vals in sign_vals_s
    connections_values = sign_vals .== 1
    n_arr[connections_values[1]*4 + connections_values[3]*2 + connections_values[2]+1] += 1
end
n_arr
# n_arr

8-element Vector{Int64}:
 0
 0
 0
 0
 0
 0
 0
 2

In [97]:
triad_census_triads[tc_ind][1]

"120C"

In [None]:
using PyCall
py"""
import networkx as nx
"""

G = py"nx.DiGraph"()

triad_census = py"nx.triadic_census"(G)
triad_census_triads = collect(keys(triad_census))

# signed triad types:
n_arr_dict = Dict( triad_census_triads .=> [zeros(Int, 12) for _ in 1:length(triad_census_triads)])
# first 8 as standard n_arr_dict, the next 4 from cycles: 9 - 0 neg links, 10 - 1 neg link etc.
n_bal_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_hier_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_tot_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])

n_trans_bal_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_trans_hier_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_trans_tot_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])

n_cyclic_bal_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_cyclic_hier_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_cyclic_tot_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])

sources = [triads, cyclic_triads]

counter = 0
inform_after = 10000
for (triad, inds, count, trans_or_cyclics, exact_inds) in eachrow(df_all_triads_dict)
    type_sources = sources[trans_or_cyclics]
    triad_list = [triad_source[exact_ind] for (triad_source, exact_ind) in zip(type_sources, exact_inds)]

    # get edges list from triad list
    edges_list = get_edges.(triad_list, trans_or_cyclics)
    edges_list = reduce(vcat, edges_list)
    unique_edges = unique(edges_list)

    # call Python from julia, use networkx to get triad census on subgraphs
    G = py"nx.DiGraph"([unique_edges...])
    triad_census = py"nx.triadic_census"(G)
    triad_census_triads = collect(keys(triad_census))
    tc_ind = collect(values(triad_census)) .== 1
    triad_type = triad_census_triads[tc_ind][1]

    # get signed triad type
    sign_vals_s = values(df_all_triads[inds, 4:6])
    sign_vals_s = Matrix(sign_vals_s)
    sign_vals_s = mapslices(x->[x], sign_vals_s, dims=2)[:]

    hier = true
    hier_trans = true
    hier_cyclic = true
    bal_trans = true
    bal_cyclic = true
    # indicators that trans/cyclic triads were considered at all
    some_trans = false
    some_cyclic = false
    for (sign_vals, trans_or_cyclic) in zip(sign_vals_s, trans_or_cyclics)
        if trans_or_cyclic == 1
            some_trans = true
            n_trans_tot_dict[triad_type] += 1
            if prod(sign_vals) != 1
                bal_trans = false
            end

            connections_values = sign_vals .== 1
            signed_triad_type = connections_values[1]*4 + connections_values[3]*2 + connections_values[2]+1
            
        elseif trans_or_cyclic == 2
            some_cyclic = true

            n_cyclic_tot_dict[triad_type] += 1
            if prod(sign_vals) != 1
                bal_cyclic = false
            end

            num_pos_links = sum(sign_vals .== 1)
            signed_triad_type = 9 + num_pos_links
        else
            println(trans_or_cyclics)
            error("Wrong type")
        end
        n_arr_dict[triad_type][signed_triad_type] += 1

        if signed_triad_type in [3, 6, 9, 12]
            hier = false
            if signed_triad_type in [3, 6]
                hier_trans = false
            else
                hier_cyclic = false
            end
        end
    end
    n_tot_dict[triad_type] += 1
    n_bal_dict[triad_type] += all(prod.(sign_vals_s) .== 1)
    n_hier_dict[triad_type] += hier

    n_trans_bal_dict[triad_type] += bal_trans * some_trans
    n_cyclic_bal_dict[triad_type] += bal_cyclic * some_cyclic

    n_trans_hier_dict[triad_type] += hier_trans * some_trans
    n_cyclic_hier_dict[triad_type] += hier_cyclic * some_cyclic

    # n_trans_tot_dict[triad_type] += some_trans
    # n_cyclic_tot_dict[triad_type] += some_cyclic

    # n_arr

    # println(triad_list, inds)
    counter += 1
    # if counter > 5000
    #     break
    # end
    if counter % inform_after == 0
        # print time
        # print()
        # break
        println("Processed $counter triads")
        flush(stdout)
    # elseif counter > 23000
    #     println("Processed $counter triads")
    # else
        # break
    end
end

In [None]:
# create dataframe from dicts

df_cyclic_n_arr = DataFrame(triad = collect(keys(n_arr_dict)), n_arr = collect(values(n_arr_dict)),
                     n_bal = collect(values(n_bal_dict)), n_hier = collect(values(n_hier_dict)),
                     n_tot = collect(values(n_tot_dict)), n_trans_bal = collect(values(n_trans_bal_dict)), 
                     n_trans_hier = collect(values(n_trans_hier_dict)), n_trans_tot = collect(values(n_trans_tot_dict)), 
                     n_cyclic_bal = collect(values(n_cyclic_bal_dict)), n_cyclic_hier = collect(values(n_cyclic_hier_dict)),
                        n_cyclic_tot = collect(values(n_cyclic_tot_dict)))

In [None]:
df_cyclic_n_arr.per_bal = df_cyclic_n_arr.n_bal ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_hier = df_cyclic_n_arr.n_hier ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_trans_bal = df_cyclic_n_arr.n_trans_bal ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_trans_hier = df_cyclic_n_arr.n_trans_hier ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_cyclic_bal = df_cyclic_n_arr.n_cyclic_bal ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_cyclic_hier = df_cyclic_n_arr.n_cyclic_hier ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_trans_bal_sep = [sum(n_arr[[2, 3, 5, 8]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_trans_tot)]
df_cyclic_n_arr.per_cyclic_bal_sep = [sum(n_arr[[10, 12]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_cyclic_tot)]
df_cyclic_n_arr.per_trans_hier_sep = [1 - sum(n_arr[[3,6]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_trans_tot)]
df_cyclic_n_arr.per_cyclic_hier_sep = [sum(n_arr[[10, 11]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_cyclic_tot)]
df_cyclic_n_arr.per_bal_sep = [sum(n_arr[[2, 3, 5, 8, 10, 12]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_trans_tot .+ df_cyclic_n_arr.n_cyclic_tot)]
df_cyclic_n_arr.per_hier_sep = [1 - sum(n_arr[[3, 6, 9, 12]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_trans_tot .+ df_cyclic_n_arr.n_cyclic_tot)]
df_cyclic_n_arr

In [None]:
# access dataframe columns

cols = names(df_cyclic_n_arr)[startswith.(names(df_cyclic_n_arr), "per")]

df_cyclic_n_arr[!, ["triad", cols...]]

In [None]:
# save above dataframe
CSV.write(joinpath(path, "epinions-bal_hier_all-triads-types.csv"), df_cyclic_n_arr)
CSV.write(joinpath("epinions-bal_hier_all-triads-types.csv"), df_cyclic_n_arr)

# save JLD2
save(joinpath(path, "epinions-bal_hier_all-triads-types.jld2"), "df_cyclic_n_arr", df_cyclic_n_arr)
save("epinions-bal_hier_all-triads-types.jld2", "df_cyclic_n_arr", df_cyclic_n_arr)