In [1]:
using CSV
using DataFrames


In [2]:
path = "../../../data/soc-wiki-elec"

df = CSV.read(joinpath(path, "soc-wiki-elec.edges"), DataFrame, delim=" ", header=false, skipto = 2)

rename!(df, [:source, :target, :sign, :date])

first(df)

Row,source,target,sign,date
Unnamed: 0_level_1,Int64,Int64,Int64,Int64
1,3,30,1,1095171960


In [3]:
# get all edges
edges = [(df.source[i], df.target[i]) for i in 1:size(df, 1)]

edges[1:5]
print([length(unique(edges)), length(edges)])

# remove self loops
edges = [(u, v) for (u, v) in edges if u != v]
length(edges)

[103738, 107071]

107013

In [4]:
# check if for repeated edges the sign is the same

# get all edges
edges_sign = [(df.source[i], df.target[i], df.sign[i]) for i in 1:size(df, 1)]

# edges_sign = unique(edges_sign)

# remove self loops
edges_sign = [(u, v, s) for (u, v, s) in edges_sign if u != v]

# get the number of unique edges
length(edges_sign)

107013

We have some edges that change sign. Let's see if these edges form any triads

In [5]:
nrow(filter(x -> x.source != x.target, df)), nrow(df)

(107013, 107071)

In [6]:
# get list of all nodes
nodes = unique([[u for (u, v) in edges_sign]..., [v for (u, v) in edges_sign]...])

print(length(nodes))

# filter out self loops
df_filtered = filter(x -> x.source != x.target, df)

# get list of outgoing edges for each node by grouping by source
node_outs_gbf = groupby(df_filtered, :source)

# # filter out self loops
# node_outs_gbf = filter(x -> x[1, :source] != x[1, :target], node_outs_gbf)

# get list of outgoing edges for each node by collecting targets
node_outs = combine(node_outs_gbf, x -> [x[!, :target]])
rename!(node_outs, :x1 => :targets)

# create a dictionary with the outgoing edges for each node
node_outs_dict = Dict(zip(node_outs.source, unique.(node_outs.targets)))

7115

Dict{Int64, Vector{Int64}} with 6110 entries:
  4986 => [3898]
  7329 => [7295, 6832]
  4700 => [4261]
  4576 => [4578, 3447, 4448, 3897, 5463, 4977, 4530, 4536, 15, 3919]
  6073 => [5103]
  2288 => [2290]
  1703 => [230]
  7685 => [7683, 7757, 7908, 5179, 8037, 8124, 8174, 7391, 8198, 8219, 4401, 66…
  3406 => [3408, 3352, 3541, 4587]
  2841 => [72]
  2876 => [2877]
  7353 => [6832]
  185  => [214, 321, 2625, 1679]
  1090 => [1186, 943]
  2015 => [665]
  3293 => [6715, 6665, 6955, 840, 5289, 7908, 5614, 5969, 5605, 7620]
  1704 => [230]
  3220 => [2237]
  8042 => [6634]
  ⋮    => ⋮

In [None]:
# get transitive triads

triads = []
for (source, targets) in node_outs_dict
    for target in targets
        if target in keys(node_outs_dict)
            for target_target in node_outs_dict[target]
                if target_target in targets
                    append!(triads, [(source, target, target_target)])
                end
            end
        end
    end
end
length(triads), length(unique(triads))

(746515, 746515)

In [21]:
# get cyclic triads
# thanks to sorting, each such triad will be counted only once

source_nodes = sort(collect(keys(node_outs_dict)))

cyclic_triads = []
for source in source_nodes
    targets = sort(node_outs_dict[source])
    # filter out targets that have smaller id than source
    targets = [target for target in targets if target > source]
    for target in targets
        if target in keys(node_outs_dict)
            target_targets = sort(node_outs_dict[target])
            # filter out target_targets that have smaller id than source
            target_targets = [target_target for target_target in target_targets if target_target > source]
            # I hope above is correct
            # target_targets = node_outs_dict[target]
            for target_target in target_targets
                if target_target in keys(node_outs_dict)
                    if source in node_outs_dict[target_target]
                        append!(cyclic_triads, [(source, target, target_target)])
                    end
                end
            end
        end
    end
end
length(cyclic_triads), length(unique(cyclic_triads))

(43975, 43975)

In [20]:
cyclic_triads[end]  

(7666, 7691, 6946)

In [9]:
node_outs_dict_repeated = Dict(zip(node_outs.source, node_outs.targets))

# find edges that are repeated in node_outs_dict_repeated
repeated_edges = unique([(source, target) for (source, targets) in node_outs_dict_repeated for target in targets if sum(target .== targets) > 1])

3106-element Vector{Tuple{Int64, Int64}}:
 (3293, 6665)
 (1823, 2565)
 (1823, 6979)
 (637, 4037)
 (730, 989)
 (1050, 8290)
 (1050, 2651)
 (103, 271)
 (844, 857)
 (844, 1717)
 ⋮
 (1869, 1717)
 (1368, 3755)
 (1368, 2654)
 (3028, 2653)
 (3028, 3352)
 (3028, 4385)
 (3028, 4875)
 (3028, 4964)
 (3028, 737)

In [10]:
df_triads = DataFrame(triads)
rename!(df_triads, [:P, :O, :X]);

po_edges = [(df_triads.P[i], df_triads.O[i]) for i in 1:size(df_triads, 1)]
ox_edges = [(df_triads.O[i], df_triads.X[i]) for i in 1:size(df_triads, 1)]
px_edges = [(df_triads.P[i], df_triads.X[i]) for i in 1:size(df_triads, 1)];

In [31]:
df_cyclic_triads = DataFrame(cyclic_triads)
rename!(df_cyclic_triads, [:P, :O, :X]);

po_edges_cyclic = [(df_cyclic_triads.P[i], df_cyclic_triads.O[i]) for i in 1:size(df_cyclic_triads, 1)]
ox_edges_cyclic = [(df_cyclic_triads.O[i], df_cyclic_triads.X[i]) for i in 1:size(df_cyclic_triads, 1)]
xp_edges_cyclic = [(df_cyclic_triads.X[i], df_cyclic_triads.P[i]) for i in 1:size(df_cyclic_triads, 1)];


In [71]:
df_all_triads = DataFrame([triads..., cyclic_triads...])
rename!(df_all_triads, [:P, :O, :X]);
nrow(df_all_triads)
# po_edges_all = [(df_all_triads.P[i], df_all_triads.O[i]) for i in 1:size(df_all_triads, 1)]

790490

In [11]:
# find number of triads containing repeated edges

affected_triads = []
for edge in repeated_edges
    append!(affected_triads, findall([edge] .== po_edges))
    # break
end
length(affected_triads), length(unique(affected_triads))

(55054, 55054)

In [12]:
# get sign of edges in triads. Take the last value of edge

df_triads[!, "po_sign"] = [edges_sign[findall([edge] .== edges)][end][3] for edge in po_edges]
df_triads[!, "ox_sign"] = [edges_sign[findall([edge] .== edges)][end][3] for edge in ox_edges]
df_triads[!, "px_sign"] = [edges_sign[findall([edge] .== edges)][end][3] for edge in px_edges]
first(df_triads, 5)

Row,P,O,X,po_sign,ox_sign,px_sign
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64
1,4576,3447,4448,1,1,1
2,4576,3447,5463,1,1,-1
3,4576,3447,4977,1,1,1
4,4576,3447,4530,1,1,1
5,4576,3447,3897,1,1,1


In [32]:
# get sign of edges in cyclic triads. Take the last value of edge

df_cyclic_triads[!, "po_sign"] = [edges_sign[findall([edge] .== edges)][end][3] for edge in po_edges_cyclic]
df_cyclic_triads[!, "ox_sign"] = [edges_sign[findall([edge] .== edges)][end][3] for edge in ox_edges_cyclic]
df_cyclic_triads[!, "xp_sign"] = [edges_sign[findall([edge] .== edges)][end][3] for edge in xp_edges_cyclic]
first(df_cyclic_triads, 5)

Row,P,O,X,po_sign,ox_sign,xp_sign
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64
1,3,28,6,1,1,1
2,3,28,19,1,1,1
3,3,28,29,1,1,1
4,3,28,34,1,1,1
5,3,28,35,1,1,1


In [72]:
# get sign of edges in all triads. Take the last value of edge

df_all_triads[!, "po_sign"] = [df_triads[!, "po_sign"]..., df_cyclic_triads[!, "po_sign"]...]
df_all_triads[!, "ox_sign"] = [df_triads[!, "ox_sign"]..., df_cyclic_triads[!, "ox_sign"]...]
df_all_triads[!, "xpx_sign"] = [df_triads[!, "px_sign"]..., df_cyclic_triads[!, "xp_sign"]...]

first(df_all_triads, 5)

Row,P,O,X,po_sign,ox_sign,xpx_sign
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64
1,4576,3447,4448,1,1,1
2,4576,3447,5463,1,1,-1
3,4576,3447,4977,1,1,1
4,4576,3447,4530,1,1,1
5,4576,3447,3897,1,1,1


In [13]:
# save df_triads
CSV.write(joinpath(path, "soc-wiki-elec-triads.csv"), df_triads)

"../../../data/soc-wiki-elec/soc-wiki-elec-triads.csv"

In [33]:
# save df_cyclic_triads
CSV.write(joinpath(path, "soc-wiki-elec-cyclic-triads.csv"), df_cyclic_triads)

"../../../data/soc-wiki-elec/soc-wiki-elec-cyclic-triads.csv"

In [73]:
# save df_all_triads
CSV.write(joinpath(path, "soc-wiki-elec-all-triads.csv"), df_all_triads)

"../../../data/soc-wiki-elec/soc-wiki-elec-all-triads.csv"

In [7]:
df_triads = CSV.read(joinpath(path, "soc-wiki-elec-triads.csv"), DataFrame)

triads = [(df_triads.P[i], df_triads.O[i], df_triads.X[i]) for i in 1:size(df_triads, 1)]

746515-element Vector{Tuple{Int64, Int64, Int64}}:
 (4576, 3447, 4448)
 (4576, 3447, 5463)
 (4576, 3447, 4977)
 (4576, 3447, 4530)
 (4576, 3447, 3897)
 (4576, 4448, 4578)
 (4576, 4448, 4536)
 (4576, 4530, 3897)
 (4576, 4530, 15)
 (4576, 4536, 4578)
 ⋮
 (1711, 1697, 1855)
 (1711, 1697, 3136)
 (1711, 1697, 2328)
 (1711, 1211, 1956)
 (1711, 1211, 1151)
 (1711, 1211, 2328)
 (1711, 2328, 1956)
 (1711, 2328, 1211)
 (1711, 2328, 2210)

In [34]:
df_cyclic_triads = CSV.read(joinpath(path, "soc-wiki-elec-cyclic-triads.csv"), DataFrame)

cyclic_triads = [(df_cyclic_triads.P[i], df_cyclic_triads.O[i], df_cyclic_triads.X[i]) for i in 1:size(df_cyclic_triads, 1)]

43975-element Vector{Tuple{Int64, Int64, Int64}}:
 (3, 28, 6)
 (3, 28, 19)
 (3, 28, 29)
 (3, 28, 34)
 (3, 28, 35)
 (3, 28, 54)
 (3, 28, 94)
 (3, 28, 127)
 (3, 28, 178)
 (3, 39, 6)
 ⋮
 (7092, 7378, 7225)
 (7092, 7400, 7225)
 (7092, 7443, 7280)
 (7092, 7649, 7443)
 (7225, 7386, 7280)
 (7225, 7449, 7279)
 (7225, 7449, 7393)
 (7233, 7386, 7280)
 (7280, 7649, 7443)

In [None]:
df_all_triads = CSV.read(joinpath(path, "soc-wiki-elec-all-triads.csv"), DataFrame)

In [37]:
# identify triads that consist of the same edges

all_triads = [triads..., cyclic_triads...]

all_triads_ordered = sort.([[p, o, x] for (p, o, x) in all_triads])
all_triads_unique = unique(all_triads_ordered)

# create a dictionary with unique triads as keys and indices of their appearance as values
all_triads_dict = Dict(all_triads_unique .=> [findall(all_triads_ordered .== [triad]) for triad in all_triads_unique])



Dict{Vector{Int64}, Vector{Int64}} with 608350 entries:
  [1769, 2504, 3456] => [401752, 764921]
  [1542, 1628, 2193] => [370040, 625885]
  [1133, 2968, 3089] => [289925]
  [4632, 5189, 5760] => [310648]
  [457, 5776, 6913]  => [395368]
  [996, 1792, 8290]  => [624347]
  [2565, 2909, 5222] => [110895]
  [11, 1991, 2410]   => [687550]
  [2667, 5176, 5891] => [96199]
  [2145, 2760, 3615] => [549959]
  [3873, 5179, 5531] => [226922]
  [1210, 1744, 2654] => [367672]
  [1166, 1470, 2877] => [76235]
  [1608, 3352, 5288] => [208464]
  [1729, 2747, 3026] => [126602, 445214]
  [1029, 1319, 2625] => [671848]
  [2787, 2972, 3452] => [540996]
  [5472, 6458, 7073] => [741238]
  [1769, 2517, 4219] => [546185]
  ⋮                  => ⋮

In [38]:
using JLD2, FileIO

save(joinpath(path, "soc-wiki-elec-all-triads-dict.jld2"), "all_triads_dict", all_triads_dict)

In [39]:
# create dataframe from dict
df_all_triads_dict = DataFrame(triad = collect(keys(all_triads_dict)), inds = collect(values(all_triads_dict)),
                           count = length.(collect(values(all_triads_dict))))

# collect(keys(triads_dict))

Row,triad,inds,count
Unnamed: 0_level_1,Array…,Array…,Int64
1,"[1769, 2504, 3456]","[401752, 764921]",2
2,"[1542, 1628, 2193]","[370040, 625885]",2
3,"[1133, 2968, 3089]",[289925],1
4,"[4632, 5189, 5760]",[310648],1
5,"[457, 5776, 6913]",[395368],1
6,"[996, 1792, 8290]",[624347],1
7,"[2565, 2909, 5222]",[110895],1
8,"[11, 1991, 2410]",[687550],1
9,"[2667, 5176, 5891]",[96199],1
10,"[2145, 2760, 3615]",[549959],1


In [50]:
# add columns with the type of triad (transitive or cyclic) and inds in the source dataframe

length(triads), length(cyclic_triads), length(all_triads)

df_all_triads_dict[!, "triad_type"] = [(inds .> length(triads)) .+ 1 for inds in df_all_triads_dict.inds]

df_all_triads_dict[!, "exact_inds"] = [inds .- length(triads) .* (type .- 1) for (inds, type) in zip(df_all_triads_dict.inds, df_all_triads_dict.triad_type)]

first(df_all_triads_dict, 5)

Row,triad,inds,count,triad_type,exact_inds
Unnamed: 0_level_1,Array…,Array…,Int64,Array…,Array…
1,"[1769, 2504, 3456]","[401752, 764921]",2,"[1, 2]","[401752, 18406]"
2,"[1542, 1628, 2193]","[370040, 625885]",2,"[1, 1]","[370040, 625885]"
3,"[1133, 2968, 3089]",[289925],1,[1],[289925]
4,"[4632, 5189, 5760]",[310648],1,[1],[310648]
5,"[457, 5776, 6913]",[395368],1,[1],[395368]


In [51]:
# test
cyclic_triads[18406]

(1769, 2504, 3456)

In [50]:
println(df_triads_dict.triad[2])

triads[triads_dict[df_triads_dict.triad[2]]]

[1542, 1628, 2193]


2-element Vector{Tuple{Int64, Int64, Int64}}:
 (1628, 1542, 2193)
 (1542, 1628, 2193)

In [74]:
using PyCall

i = 1
triad = df_all_triads_dict.triad[i]
types = df_all_triads_dict.triad_type[i]
inds = df_all_triads_dict.inds[i]
exact_inds = df_all_triads_dict.exact_inds[i]

# get triad list
sources = [triads, cyclic_triads]
type_sources = sources[types]
# triad_list = sources[types][exact_inds]
triad_list = [triad_source[exact_ind] for (triad_source, exact_ind) in zip(type_sources, exact_inds)]
# triad_list = triads[inds]

# get edges from triad list
get_transitive_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[1], triad[3])]
get_cyclic_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[3], triad[1])]
function get_edges(triad, type)
    if type == 1
        return get_transitive_edges(triad)
    elseif type == 2
        return get_cyclic_edges(triad)
    end
end
# function get_edges.(triad_list, types)
#     return [get_edges(triad, type) for (triad, type) in zip(triad_list, types)]
# end

# type_funs = [get_transitive_edges, get_cyclic_edges]
edges_list = get_edges.(triad_list, types)
edges_list = reduce(vcat, edges_list)
unique_edges = unique(edges_list)

py"""
import networkx as nx
"""

G = py"nx.DiGraph"([edges_list...])

# print(G.edges)

triad_census = py"nx.triadic_census"(G)

triad_census_triads = collect(keys(triad_census))

tc_ind = collect(values(triad_census)) .== 1

triad_census_triads[tc_ind][1]

# get signed triad type
sign_vals_s = values(df_all_triads[inds, 4:6])
sign_vals_s = Matrix(sign_vals_s)
sign_vals_s = mapslices(x->[x], sign_vals_s, dims=2)[:]

n_arr = zeros(Int, 8)

for sign_vals in sign_vals_s
    connections_values = sign_vals .== 1
    n_arr[connections_values[1]*4 + connections_values[3]*2 + connections_values[2]+1] += 1
end
n_arr
# n_arr

8-element Vector{Int64}:
 0
 0
 0
 0
 0
 0
 0
 2

In [104]:
using PyCall

i = 2
triad = df_triads_dict.triad[i]
inds = df_triads_dict.inds[i]

# get triad list
triad_list = triads[inds]

# get edges from triad list
edges_list = [[(triad[1], triad[2]), (triad[2], triad[3]), (triad[1], triad[3])] for triad in triad_list]
edges_list = reduce(vcat, edges_list)
unique_edges = unique(edges_list)

py"""
import networkx as nx
"""

G = py"nx.DiGraph"([unique_edges...])

# print(G.edges)

triad_census = py"nx.triadic_census"(G)

triad_census_triads = collect(keys(triad_census))

tc_ind = collect(values(triad_census)) .== 1

triad_census_triads[tc_ind]

# get signed triad type
sign_vals_s = values(df_triads[inds, 4:6])
sign_vals_s = Matrix(sign_vals_s)
sign_vals_s = mapslices(x->[x], sign_vals_s, dims=2)[:]

n_arr = zeros(Int, 8)

for sign_vals in sign_vals_s
    connections_values = sign_vals .== 1
    n_arr[connections_values[1]*4 + connections_values[3]*2 + connections_values[2]+1] += 1
end
n_arr

8-element Vector{Int64}:
 0
 0
 0
 0
 0
 0
 0
 2

In [107]:
values(sign_vals)
mm = Matrix(sign_vals)
sign_vals_s = mapslices(x->[x], mm, dims=2)[:]

all(prod.(sign_vals_s) .== 1)

true

In [None]:
using PyCall
py"""
import networkx as nx
"""

G = py"nx.DiGraph"()

triad_census = py"nx.triadic_census"(G)
triad_census_triads = collect(keys(triad_census))

# signed triad types:
n_arr_dict = Dict( triad_census_triads .=> [zeros(Int, 8) for _ in 1:length(triad_census_triads)])
n_bal_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_hier_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_tot_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])

counter = 0
inform_after = 100000
for (triad, inds, count) in eachrow(df_triads_dict)
    triad_list = triads[inds]

    # get edges list from triad list
    edges_list = [[(triad[1], triad[2]), (triad[2], triad[3]), (triad[1], triad[3])] for triad in triad_list]
    edges_list = reduce(vcat, edges_list)
    unique_edges = unique(edges_list)

    # call Python from julia, use networkx to get triad census on subgraphs
    G = py"nx.DiGraph"([unique_edges...])
    triad_census = py"nx.triadic_census"(G)
    triad_census_triads = collect(keys(triad_census))
    tc_ind = collect(values(triad_census)) .== 1
    triad_type = triad_census_triads[tc_ind][1]

    # get signed triad type
    sign_vals_s = values(df_triads[inds, 4:6])
    sign_vals_s = Matrix(sign_vals_s)
    sign_vals_s = mapslices(x->[x], sign_vals_s, dims=2)[:]

    # n_arr = zeros(Int, 8)

    hier = true
    for sign_vals in sign_vals_s
        connections_values = sign_vals .== 1
        signed_triad_type = connections_values[1]*4 + connections_values[3]*2 + connections_values[2]+1
        n_arr_dict[triad_type][signed_triad_type] += 1

        if signed_triad_type == 3 || signed_triad_type == 6
            hier = false
        end
    end
    n_tot_dict[triad_type] += 1
    n_bal_dict[triad_type] += all(prod.(sign_vals_s) .== 1)
    n_hier_dict[triad_type] += hier

    # n_arr

    # println(triad_list, inds)
    counter += 1
    # if counter > 5000
    #     break
    # end
    if counter % inform_after == 0
        # print time
        # print()
        println("Processed $counter triads")
    end
end

Processed 100000 triads
Processed 200000 triads
Processed 300000 triads
Processed 400000 triads
Processed 500000 triads
Processed 600000 triads


In [126]:
# create dataframe from dicts

df_n_arr = DataFrame(triad = collect(keys(n_arr_dict)), n_arr = collect(values(n_arr_dict)),
                     n_bal = collect(values(n_bal_dict)), n_hier = collect(values(n_hier_dict)),
                     n_tot = collect(values(n_tot_dict)))

Row,triad,n_arr,n_bal,n_hier,n_tot
Unnamed: 0_level_1,String,Array…,Int64,Int64,Int64
1,030C,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
2,111U,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
3,120D,"[656, 2718, 804, 5957, 1529, 5232, 3536, 70680]",35682,39837,45556
4,120C,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
5,201,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
6,111D,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
7,210,"[148, 598, 344, 2666, 782, 2092, 2452, 36743]",11150,13025,15275
8,003,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
9,012,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0
10,021D,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0


In [128]:
df_n_arr.per_bal = df_n_arr.n_bal ./ df_n_arr.n_tot
df_n_arr.per_hier = df_n_arr.n_hier ./ df_n_arr.n_tot
df_n_arr

Row,triad,n_arr,n_bal,n_hier,n_tot,per_bal,per_hier
Unnamed: 0_level_1,String,Array…,Int64,Int64,Int64,Float64,Float64
1,030C,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
2,111U,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
3,120D,"[656, 2718, 804, 5957, 1529, 5232, 3536, 70680]",35682,39837,45556,0.783256,0.874462
4,120C,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
5,201,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
6,111D,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
7,210,"[148, 598, 344, 2666, 782, 2092, 2452, 36743]",11150,13025,15275,0.729951,0.8527
8,003,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
9,012,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,
10,021D,"[0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,,


In [None]:
# read csv file to dataframe

df = pd.read_csv(Path('../' + dataset + '/' + filename + '-bal_hier_all-triads-types.csv'))
df

Unnamed: 0,triad,n_arr,n_bal,n_hier,n_tot,n_trans_bal,n_trans_hier,n_trans_tot,n_cyclic_bal,n_cyclic_hier,...,per_trans_bal,per_trans_hier,per_cyclic_bal,per_cyclic_hier,per_trans_bal_sep,per_cyclic_bal_sep,per_trans_hier_sep,per_cyclic_hier_sep,per_bal_sep,per_hier_sep
0,030C,"[0, 0, 0, 0, 0, 0, 0, 0, 88, 735, 1384, 1176]",1911,2119,3383,0,0,0,1911,2119,...,0.0,0.0,0.564883,0.626367,,0.564883,,0.626367,0.564883,0.626367
1,111U,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
2,120D,"[1186, 19320, 1399, 3783, 1633, 3549, 2051, 93...",57391,58483,63307,57391,58483,126614,0,0,...,0.906551,0.9238,0.0,0.0,0.916526,,0.960921,,0.916526,0.960921
3,120C,"[310, 1344, 1156, 1561, 500, 413, 3381, 14313,...",16548,7198,22978,17313,21409,22978,16852,8649,...,0.75346,0.931717,0.733397,0.376404,0.75346,0.733397,0.931717,0.376404,0.743428,0.65406
4,201,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
5,111D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
6,210,"[1190, 5809, 2967, 4945, 2142, 4847, 6584, 259...",88390,2374,95961,88390,88353,287883,89025,9667,...,0.921103,0.920718,0.927721,0.100739,0.938982,0.927721,0.972857,0.100739,0.936167,0.754827
7,003,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
8,012,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
9,021D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,


In [None]:
# read csv file to dataframe

df = pd.read_csv(Path('../' + dataset + '/' + filename + '-bal_hier_all-triads-types.csv'))
df

Unnamed: 0,triad,n_arr,n_bal,n_hier,n_tot,n_trans_bal,n_trans_hier,n_trans_tot,n_cyclic_bal,n_cyclic_hier,...,per_trans_bal,per_trans_hier,per_cyclic_bal,per_cyclic_hier,per_trans_bal_sep,per_cyclic_bal_sep,per_trans_hier_sep,per_cyclic_hier_sep,per_bal_sep,per_hier_sep
0,030C,"[0, 0, 0, 0, 0, 0, 0, 0, 88, 735, 1384, 1176]",1911,2119,3383,0,0,0,1911,2119,...,0.0,0.0,0.564883,0.626367,,0.564883,,0.626367,0.564883,0.626367
1,111U,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
2,120D,"[1186, 19320, 1399, 3783, 1633, 3549, 2051, 93...",57391,58483,63307,57391,58483,126614,0,0,...,0.906551,0.9238,0.0,0.0,0.916526,,0.960921,,0.916526,0.960921
3,120C,"[310, 1344, 1156, 1561, 500, 413, 3381, 14313,...",16548,7198,22978,17313,21409,22978,16852,8649,...,0.75346,0.931717,0.733397,0.376404,0.75346,0.733397,0.931717,0.376404,0.743428,0.65406
4,201,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
5,111D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
6,210,"[1190, 5809, 2967, 4945, 2142, 4847, 6584, 259...",88390,2374,95961,88390,88353,287883,89025,9667,...,0.921103,0.920718,0.927721,0.100739,0.938982,0.927721,0.972857,0.100739,0.936167,0.754827
7,003,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
8,012,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
9,021D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,


In [None]:
# read csv file to dataframe

df = pd.read_csv(Path('../' + dataset + '/' + filename + '-bal_hier_all-triads-types.csv'))
df

Unnamed: 0,triad,n_arr,n_bal,n_hier,n_tot,n_trans_bal,n_trans_hier,n_trans_tot,n_cyclic_bal,n_cyclic_hier,...,per_trans_bal,per_trans_hier,per_cyclic_bal,per_cyclic_hier,per_trans_bal_sep,per_cyclic_bal_sep,per_trans_hier_sep,per_cyclic_hier_sep,per_bal_sep,per_hier_sep
0,030C,"[0, 0, 0, 0, 0, 0, 0, 0, 88, 735, 1384, 1176]",1911,2119,3383,0,0,0,1911,2119,...,0.0,0.0,0.564883,0.626367,,0.564883,,0.626367,0.564883,0.626367
1,111U,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
2,120D,"[1186, 19320, 1399, 3783, 1633, 3549, 2051, 93...",57391,58483,63307,57391,58483,126614,0,0,...,0.906551,0.9238,0.0,0.0,0.916526,,0.960921,,0.916526,0.960921
3,120C,"[310, 1344, 1156, 1561, 500, 413, 3381, 14313,...",16548,7198,22978,17313,21409,22978,16852,8649,...,0.75346,0.931717,0.733397,0.376404,0.75346,0.733397,0.931717,0.376404,0.743428,0.65406
4,201,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
5,111D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
6,210,"[1190, 5809, 2967, 4945, 2142, 4847, 6584, 259...",88390,2374,95961,88390,88353,287883,89025,9667,...,0.921103,0.920718,0.927721,0.100739,0.938982,0.927721,0.972857,0.100739,0.936167,0.754827
7,003,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
8,012,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
9,021D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,


In [11]:
# save above dataframe
CSV.write(joinpath(path, "soc-wiki-elec-bal_hier_triads-types.csv"), df_n_arr)
CSV.write(joinpath("soc-wiki-elec-bal_hier_triads-types.csv"), df_n_arr)

# save JLD2
save(joinpath(path, "soc-wiki-elec-bal_hier_triads-types.jld2"), "df_n_arr", df_n_arr)
save("soc-wiki-elec-bal_hier_triads-types.jld2", "df_n_arr", df_n_arr)

UndefVarError: UndefVarError: `df_n_arr` not defined

### Balance and hierarchy analysis including cyclic triads

In [76]:
get_transitive_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[1], triad[3])]
get_cyclic_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[3], triad[1])]
function get_edges(triad, type)
    if type == 1
        return get_transitive_edges(triad)
    elseif type == 2
        return get_cyclic_edges(triad)
    end
end

get_edges (generic function with 1 method)

In [96]:
using PyCall

i = 1
triad = df_all_triads_dict.triad[i]
types = df_all_triads_dict.triad_type[i]
inds = df_all_triads_dict.inds[i]
exact_inds = df_all_triads_dict.exact_inds[i]

# get triad list
sources = [triads, cyclic_triads]
type_sources = sources[types]
# triad_list = sources[types][exact_inds]
triad_list = [triad_source[exact_ind] for (triad_source, exact_ind) in zip(type_sources, exact_inds)]
# triad_list = triads[inds]

# get edges from triad list
get_transitive_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[1], triad[3])]
get_cyclic_edges(triad) = [(triad[1], triad[2]), (triad[2], triad[3]), (triad[3], triad[1])]
function get_edges(triad, type)
    if type == 1
        return get_transitive_edges(triad)
    elseif type == 2
        return get_cyclic_edges(triad)
    end
end
# function get_edges.(triad_list, types)
#     return [get_edges(triad, type) for (triad, type) in zip(triad_list, types)]
# end

# type_funs = [get_transitive_edges, get_cyclic_edges]
edges_list = get_edges.(triad_list, types)
edges_list = reduce(vcat, edges_list)
unique_edges = unique(edges_list)

py"""
import networkx as nx
"""

G = py"nx.DiGraph"([edges_list...])

# print(G.edges)

triad_census = py"nx.triadic_census"(G)

triad_census_triads = collect(keys(triad_census))

tc_ind = collect(values(triad_census)) .== 1

println(triad_census_triads[tc_ind][1])

# get signed triad type
sign_vals_s = values(df_all_triads[inds, 4:6])
sign_vals_s = Matrix(sign_vals_s)
sign_vals_s = mapslices(x->[x], sign_vals_s, dims=2)[:]

n_arr = zeros(Int, 8)

for sign_vals in sign_vals_s
    connections_values = sign_vals .== 1
    n_arr[connections_values[1]*4 + connections_values[3]*2 + connections_values[2]+1] += 1
end
n_arr
# n_arr

8-element Vector{Int64}:
 0
 0
 0
 0
 0
 0
 0
 2

In [97]:
triad_census_triads[tc_ind][1]

"120C"

In [111]:
using PyCall
py"""
import networkx as nx
"""

G = py"nx.DiGraph"()

triad_census = py"nx.triadic_census"(G)
triad_census_triads = collect(keys(triad_census))

# signed triad types:
n_arr_dict = Dict( triad_census_triads .=> [zeros(Int, 12) for _ in 1:length(triad_census_triads)])
# first 8 as standard n_arr_dict, the next 4 from cycles: 9 - 0 neg links, 10 - 1 neg link etc.
n_bal_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_hier_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_tot_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])

n_trans_bal_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_trans_hier_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_trans_tot_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])

n_cyclic_bal_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_cyclic_hier_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])
n_cyclic_tot_dict = Dict( triad_census_triads .=> [0 for _ in 1:length(triad_census_triads)])

sources = [triads, cyclic_triads]

counter = 0
inform_after = 10000
for (triad, inds, count, trans_or_cyclics, exact_inds) in eachrow(df_all_triads_dict)
    type_sources = sources[trans_or_cyclics]
    triad_list = [triad_source[exact_ind] for (triad_source, exact_ind) in zip(type_sources, exact_inds)]

    # get edges list from triad list
    edges_list = get_edges.(triad_list, trans_or_cyclics)
    edges_list = reduce(vcat, edges_list)
    unique_edges = unique(edges_list)

    # call Python from julia, use networkx to get triad census on subgraphs
    G = py"nx.DiGraph"([unique_edges...])
    triad_census = py"nx.triadic_census"(G)
    triad_census_triads = collect(keys(triad_census))
    tc_ind = collect(values(triad_census)) .== 1
    triad_type = triad_census_triads[tc_ind][1]

    # get signed triad type
    sign_vals_s = values(df_all_triads[inds, 4:6])
    sign_vals_s = Matrix(sign_vals_s)
    sign_vals_s = mapslices(x->[x], sign_vals_s, dims=2)[:]

    hier = true
    hier_trans = true
    hier_cyclic = true
    bal_trans = true
    bal_cyclic = true
    # indicators that trans/cyclic triads were considered at all
    some_trans = false
    some_cyclic = false
    for (sign_vals, trans_or_cyclic) in zip(sign_vals_s, trans_or_cyclics)
        if trans_or_cyclic == 1
            some_trans = true
            n_trans_tot_dict[triad_type] += 1
            if prod(sign_vals) != 1
                bal_trans = false
            end

            connections_values = sign_vals .== 1
            signed_triad_type = connections_values[1]*4 + connections_values[3]*2 + connections_values[2]+1
            
        elseif trans_or_cyclic == 2
            some_cyclic = true

            n_cyclic_tot_dict[triad_type] += 1
            if prod(sign_vals) != 1
                bal_cyclic = false
            end

            num_pos_links = sum(sign_vals .== 1)
            signed_triad_type = 9 + num_pos_links
        else
            println(trans_or_cyclics)
            error("Wrong type")
        end
        n_arr_dict[triad_type][signed_triad_type] += 1

        if signed_triad_type in [3, 6, 9, 12]
            hier = false
            if signed_triad_type in [3, 6]
                hier_trans = false
            else
                hier_cyclic = false
            end
        end
    end
    n_tot_dict[triad_type] += 1
    n_bal_dict[triad_type] += all(prod.(sign_vals_s) .== 1)
    n_hier_dict[triad_type] += hier

    n_trans_bal_dict[triad_type] += bal_trans * some_trans
    n_cyclic_bal_dict[triad_type] += bal_cyclic * some_cyclic

    n_trans_hier_dict[triad_type] += hier_trans * some_trans
    n_cyclic_hier_dict[triad_type] += hier_cyclic * some_cyclic

    # n_trans_tot_dict[triad_type] += some_trans
    # n_cyclic_tot_dict[triad_type] += some_cyclic

    # n_arr

    # println(triad_list, inds)
    counter += 1
    # if counter > 5000
    #     break
    # end
    if counter % inform_after == 0
        # print time
        # print()
        # break
        println("Processed $counter triads")
        flush(stdout)
    # elseif counter > 23000
    #     println("Processed $counter triads")
    # else
        # break
    end
end

In [112]:
# create dataframe from dicts

df_cyclic_n_arr = DataFrame(triad = collect(keys(n_arr_dict)), n_arr = collect(values(n_arr_dict)),
                     n_bal = collect(values(n_bal_dict)), n_hier = collect(values(n_hier_dict)),
                     n_tot = collect(values(n_tot_dict)), n_trans_bal = collect(values(n_trans_bal_dict)), 
                     n_trans_hier = collect(values(n_trans_hier_dict)), n_trans_tot = collect(values(n_trans_tot_dict)), 
                     n_cyclic_bal = collect(values(n_cyclic_bal_dict)), n_cyclic_hier = collect(values(n_cyclic_hier_dict)),
                        n_cyclic_tot = collect(values(n_cyclic_tot_dict)))

Row,triad,n_arr,n_bal,n_hier,n_tot,n_trans_bal,n_trans_hier,n_trans_tot,n_cyclic_bal,n_cyclic_hier,n_cyclic_tot
Unnamed: 0_level_1,String,Array…,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64
1,030C,"[0, 0, 0, 0, 0, 0, 0, 0, 20, 426, 2309, 4040]",4466,2735,6795,0,0,0,4466,2735,6795
2,111U,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
3,120D,"[656, 2718, 804, 5957, 1529, 5232, 3536, 70680, 0, 0, 0, 0]",35682,39837,45556,35682,39837,91112,0,0,0
4,120C,"[120, 318, 252, 1857, 346, 629, 1601, 12544, 62, 812, 4918, 11875]",11919,5310,17667,13460,16786,17667,12687,5730,17667
5,201,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
6,111D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
7,210,"[148, 598, 344, 2666, 782, 2092, 2452, 36743, 31, 413, 2943, 11888]",11150,2018,15275,11150,13025,45825,12301,3356,15275
8,003,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
9,012,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
10,021D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0


In [9]:
df_cyclic_n_arr.per_bal = df_cyclic_n_arr.n_bal ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_hier = df_cyclic_n_arr.n_hier ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_trans_bal = df_cyclic_n_arr.n_trans_bal ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_trans_hier = df_cyclic_n_arr.n_trans_hier ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_cyclic_bal = df_cyclic_n_arr.n_cyclic_bal ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_cyclic_hier = df_cyclic_n_arr.n_cyclic_hier ./ df_cyclic_n_arr.n_tot
df_cyclic_n_arr.per_trans_bal_sep = [sum(n_arr[[2, 3, 5, 8]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_trans_tot)]
df_cyclic_n_arr.per_cyclic_bal_sep = [sum(n_arr[[10, 12]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_cyclic_tot)]
df_cyclic_n_arr.per_trans_hier_sep = [1 - sum(n_arr[[3,6]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_trans_tot)]
df_cyclic_n_arr.per_cyclic_hier_sep = [sum(n_arr[[10, 11]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_cyclic_tot)]
df_cyclic_n_arr.per_bal_sep = [sum(n_arr[[2, 3, 5, 8, 10, 12]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_trans_tot .+ df_cyclic_n_arr.n_cyclic_tot)]
df_cyclic_n_arr.per_hier_sep = [1 - sum(n_arr[[3, 6, 9, 12]]) / n_tot for (n_arr, n_tot) in zip(df_cyclic_n_arr.n_arr, df_cyclic_n_arr.n_trans_tot .+ df_cyclic_n_arr.n_cyclic_tot)]
df_cyclic_n_arr

Row,triad,n_arr,n_bal,n_hier,n_tot,n_trans_bal,n_trans_hier,n_trans_tot,n_cyclic_bal,n_cyclic_hier,n_cyclic_tot,per_bal,per_hier,per_trans_bal,per_trans_hier,per_cyclic_bal,per_cyclic_hier,per_trans_bal_sep,per_cyclic_bal_sep,per_trans_hier_sep,per_cyclic_hier_sep,per_bal_sep,per_hier_sep
Unnamed: 0_level_1,String,Array…,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,030C,"[0, 0, 0, 0, 0, 0, 0, 0, 20, 426, 2309, 4040]",4466,2735,6795,0,0,0,4466,2735,6795,0.657248,0.402502,0.0,0.0,0.657248,0.402502,,0.657248,,0.402502,0.657248,0.402502
2,111U,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0,,,,,,,,,,,,
3,120D,"[656, 2718, 804, 5957, 1529, 5232, 3536, 70680, 0, 0, 0, 0]",35682,39837,45556,35682,39837,91112,0,0,0,0.783256,0.874462,0.783256,0.874462,0.0,0.0,0.831186,,0.933752,,0.831186,0.933752
4,120C,"[120, 318, 252, 1857, 346, 629, 1601, 12544, 62, 812, 4918, 11875]",11919,5310,17667,13460,16786,17667,12687,5730,17667,0.674648,0.30056,0.761872,0.950133,0.718119,0.324334,0.761872,0.718119,0.950133,0.324334,0.739995,0.637233
5,201,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0,,,,,,,,,,,,
6,111D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0,,,,,,,,,,,,
7,210,"[148, 598, 344, 2666, 782, 2092, 2452, 36743, 31, 413, 2943, 11888]",11150,2018,15275,11150,13025,45825,12301,3356,15275,0.729951,0.132111,0.729951,0.8527,0.805303,0.219705,0.839433,0.805303,0.946841,0.219705,0.8309,0.765057
8,003,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0,,,,,,,,,,,,
9,012,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0,,,,,,,,,,,,
10,021D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0,,,,,,,,,,,,


In [10]:
# access dataframe columns

cols = names(df_cyclic_n_arr)[startswith.(names(df_cyclic_n_arr), "per")]

df_cyclic_n_arr[!, ["triad", cols...]]

Row,triad,per_bal,per_hier,per_trans_bal,per_trans_hier,per_cyclic_bal,per_cyclic_hier,per_trans_bal_sep,per_cyclic_bal_sep,per_trans_hier_sep,per_cyclic_hier_sep,per_bal_sep,per_hier_sep
Unnamed: 0_level_1,String,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,030C,0.657248,0.402502,0.0,0.0,0.657248,0.402502,,0.657248,,0.402502,0.657248,0.402502
2,111U,,,,,,,,,,,,
3,120D,0.783256,0.874462,0.783256,0.874462,0.0,0.0,0.831186,,0.933752,,0.831186,0.933752
4,120C,0.674648,0.30056,0.761872,0.950133,0.718119,0.324334,0.761872,0.718119,0.950133,0.324334,0.739995,0.637233
5,201,,,,,,,,,,,,
6,111D,,,,,,,,,,,,
7,210,0.729951,0.132111,0.729951,0.8527,0.805303,0.219705,0.839433,0.805303,0.946841,0.219705,0.8309,0.765057
8,003,,,,,,,,,,,,
9,012,,,,,,,,,,,,
10,021D,,,,,,,,,,,,


In [None]:
# read csv file to dataframe

df = pd.read_csv(Path('../' + dataset + '/' + filename + '-bal_hier_all-triads-types.csv'))
df

Unnamed: 0,triad,n_arr,n_bal,n_hier,n_tot,n_trans_bal,n_trans_hier,n_trans_tot,n_cyclic_bal,n_cyclic_hier,...,per_trans_bal,per_trans_hier,per_cyclic_bal,per_cyclic_hier,per_trans_bal_sep,per_cyclic_bal_sep,per_trans_hier_sep,per_cyclic_hier_sep,per_bal_sep,per_hier_sep
0,030C,"[0, 0, 0, 0, 0, 0, 0, 0, 88, 735, 1384, 1176]",1911,2119,3383,0,0,0,1911,2119,...,0.0,0.0,0.564883,0.626367,,0.564883,,0.626367,0.564883,0.626367
1,111U,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
2,120D,"[1186, 19320, 1399, 3783, 1633, 3549, 2051, 93...",57391,58483,63307,57391,58483,126614,0,0,...,0.906551,0.9238,0.0,0.0,0.916526,,0.960921,,0.916526,0.960921
3,120C,"[310, 1344, 1156, 1561, 500, 413, 3381, 14313,...",16548,7198,22978,17313,21409,22978,16852,8649,...,0.75346,0.931717,0.733397,0.376404,0.75346,0.733397,0.931717,0.376404,0.743428,0.65406
4,201,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
5,111D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
6,210,"[1190, 5809, 2967, 4945, 2142, 4847, 6584, 259...",88390,2374,95961,88390,88353,287883,89025,9667,...,0.921103,0.920718,0.927721,0.100739,0.938982,0.927721,0.972857,0.100739,0.936167,0.754827
7,003,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
8,012,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,
9,021D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,...,,,,,,,,,,


In [12]:
# save above dataframe
CSV.write(joinpath(path, "soc-wiki-elec-bal_hier_all-triads-types.csv"), df_cyclic_n_arr)
CSV.write(joinpath("soc-wiki-elec-bal_hier_all-triads-types.csv"), df_cyclic_n_arr)

# save JLD2
save(joinpath(path, "soc-wiki-elec-bal_hier_all-triads-types.jld2"), "df_cyclic_n_arr", df_cyclic_n_arr)
save("soc-wiki-elec-bal_hier_all-triads-types.jld2", "df_cyclic_n_arr", df_cyclic_n_arr)

In [8]:
using JLD2
df_cyclic_n_arr = JLD2.load(joinpath(path, "soc-wiki-elec-bal_hier_all-triads-types.jld2"))["df_cyclic_n_arr"]

Row,triad,n_arr,n_bal,n_hier,n_tot,n_trans_bal,n_trans_hier,n_trans_tot,n_cyclic_bal,n_cyclic_hier,n_cyclic_tot
Unnamed: 0_level_1,String,Array…,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64
1,030C,"[0, 0, 0, 0, 0, 0, 0, 0, 20, 426, 2309, 4040]",4466,2735,6795,0,0,0,4466,2735,6795
2,111U,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
3,120D,"[656, 2718, 804, 5957, 1529, 5232, 3536, 70680, 0, 0, 0, 0]",35682,39837,45556,35682,39837,91112,0,0,0
4,120C,"[120, 318, 252, 1857, 346, 629, 1601, 12544, 62, 812, 4918, 11875]",11919,5310,17667,13460,16786,17667,12687,5730,17667
5,201,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
6,111D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
7,210,"[148, 598, 344, 2666, 782, 2092, 2452, 36743, 31, 413, 2943, 11888]",11150,2018,15275,11150,13025,45825,12301,3356,15275
8,003,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
9,012,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
10,021D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,0,0,0,0,0
