In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx

from pathlib import Path

In [6]:
path = Path('../../../data/slashdot')

df = pd.read_csv(path.joinpath('soc-sign-Slashdot090221.txt'), sep='\t', header= 0, skiprows=3)

# name columns
df.columns = ['source', 'target', 'sign']

df.head()

Unnamed: 0,source,target,sign
0,0,1,1
1,0,2,1
2,0,3,1
3,0,4,1
4,0,5,1


In [8]:
sum(df.sign == 1) / len(df)

0.7739811581166857

In [9]:
# create network and calculate triadic census
G = nx.from_pandas_edgelist(df, 'source', 'target', 'sign', create_using=nx.DiGraph())

In [10]:
G.is_directed()

True

In [11]:
tcen = nx.triadic_census(G)
tcen

{'003': 92321741792416,
 '012': 36974092431,
 '102': 3988917084,
 '021D': 16840174,
 '021U': 35458001,
 '021C': 8774062,
 '111D': 4157858,
 '111U': 5094285,
 '030T': 241138,
 '030C': 3383,
 '201': 1555704,
 '120D': 63307,
 '120U': 85869,
 '120C': 22978,
 '210': 95961,
 '300': 66929}

In [12]:
df_tcen_path = Path(path.joinpath('triad_census.h5'))

try:
    df_tcen = pd.DataFrame.from_dict(tcen, orient='index',
                        columns=['slashdot']).transpose()
except:
    df_tcen_path = Path(path.joinpath('triad_census.h5'))

    df_tcen = pd.read_hdf(df_tcen_path)

df_tcen

Unnamed: 0,003,012,102,021D,021U,021C,111D,111U,030T,030C,201,120D,120U,120C,210,300
slashdot,92321741792416,36974092431,3988917084,16840174,35458001,8774062,4157858,5094285,241138,3383,1555704,63307,85869,22978,95961,66929


In [13]:
# save to hdf
df_tcen.to_hdf(df_tcen_path, key='slashdot')

df_tcen.to_hdf(df_tcen_path.name, key='slashdot')

In [14]:
df_tcen2 = pd.read_hdf(df_tcen_path)
df_tcen2

Unnamed: 0,003,012,102,021D,021U,021C,111D,111U,030T,030C,201,120D,120U,120C,210,300
slashdot,92321741792416,36974092431,3988917084,16840174,35458001,8774062,4157858,5094285,241138,3383,1555704,63307,85869,22978,95961,66929
