In [1]:
import pandas as pd
import numpy as np
import math
import collections
import networkx as nx
import matplotlib.pyplot as plt
import helper.graphgeneration as gg

from tqdm import tqdm
from pathlib import Path

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Graph statistics

### Parameters

In [2]:
path_comments = "00_data/attack_annotated_comments.tsv"
path_annotations = "00_data/attack_annotations.tsv"
path_workers = "00_data/attack_worker_demographics.tsv"
path_export = "./01_processed_data/200903_run_004/200903_00_graph_"

types = ['agreement','cohen','krippendorff','heuristic']
types_print_names = ['Agreement Rate','Cohen\'s Kappa','Krippendorff\'s Alpha','Heuristic Function']

### Load data

In [3]:
graphs = []
for graph in types:
    graphs.append(nx.read_gexf(path_export + graph + ".gexf"))

### Transform data

In [4]:
list_of_lists = [
    ['Number of nodes'],
    ['Number of edges'],
    ['Average degree'],
    ['Density'],
    ['Connected componets'],
]
    
for i in range(0,len(types)):
    nodes = graphs[i].number_of_nodes()
    edges = graphs[i].number_of_edges()
    degree = (sum(dict(graphs[i].degree()).values()))/nodes
    density = nx.density(graphs[i])
    comp = nx.number_connected_components(graphs[i])
    list_of_lists[0].append('{:,.0f}'.format(nodes))
    list_of_lists[1].append('{:,.0f}'.format(edges))
    list_of_lists[2].append('{:.1f}'.format(degree))
    list_of_lists[3].append('{:.3f}'.format(density))
    list_of_lists[4].append('{:.0f}'.format(comp))

df_results =  pd.DataFrame(list_of_lists, columns=['Weight function','Agreement Rate','Cohen\'s Kappa','Krippendorff\'s Alpha','Heuristic Function'])
df_results

Unnamed: 0,Weight function,Agreement Rate,Cohen's Kappa,Krippendorff's Alpha,Heuristic Function
0,Number of nodes,4053.0,4053.0,4053.0,4053.0
1,Number of edges,444344.0,91308.0,91308.0,444344.0
2,Average degree,219.3,45.1,45.1,219.3
3,Density,0.054,0.011,0.011,0.054
4,Connected componets,1.0,1.0,1.0,1.0


In [9]:
print(df_results.to_latex(index=False,bold_rows=True))

\begin{tabular}{lllll}
\toprule
     Weight function & Agreement Rate & Cohen's Kappa & Krippendorff's Alpha & Heuristic Function \\
\midrule
     Number of nodes &          4,053 &         4,053 &                4,053 &              4,053 \\
     Number of edges &        444,344 &        91,308 &               91,308 &            444,344 \\
      Average degree &          219.3 &          45.1 &                 45.1 &              219.3 \\
             Density &          0.054 &         0.011 &                0.011 &              0.054 \\
 Connected componets &              1 &             1 &                    1 &                  1 \\
\bottomrule
\end{tabular}

