In [1]:
import os; os.chdir("../src")

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
import pandas as pd

In [3]:
import assortativity

In [4]:
def minority_fraction(nx_graph, attribute="type"): 
    nodes_group = [nx_graph.nodes[n][attribute] for n in nx_graph.nodes]
    _, counts = np.unique(nodes_group, return_counts=True)
    final_count = counts*1.0
    final_count /= final_count.sum()
    return sorted(final_count)[0]

In [5]:
def group_edge_count(nx_graph, attribute="type"):
    nodes_group = [nx_graph.nodes[n][attribute] for n in nx_graph.nodes]
    unique, counts = np.unique(nodes_group, return_counts=True)
    group_map = dict([(unique[i], i) for i in range(len(unique))])

    absolute_mixing_matrix = nx.attribute_mixing_matrix(nx_graph, attribute, normalized=False, mapping=group_map)
    absolute_mixing_matrix /= 2
    return absolute_mixing_matrix[0, 0], absolute_mixing_matrix[0, 1] + absolute_mixing_matrix[1, 0], absolute_mixing_matrix[1, 1]

# Load data

In [6]:
a_ij = pd.read_csv("../notebooks/data/DBLP/co_authorship_1980_1990_2000_2010.txt", header=None)
meta_data = pd.read_csv("../notebooks/data/DBLP/name_gender_1970_2016.csv", header=None)

In [7]:
a_ij[0].unique()

array([2010, 2000, 1990, 1980])

## Create network method

In [8]:
def create_network(a_ij, meta_data, year):
    a_ij = a_ij[a_ij[0] == year].copy()
    meta_data = meta_data.copy()
    a_ij.drop_duplicates(inplace=True)
    g = nx.Graph()
    for k, r in a_ij.iterrows():
        g.add_edge(r[1], r[2])
    meta_data.drop_duplicates(inplace=True)
    meta_data.set_index(0, verify_integrity=True, inplace=True)
    nodes = list(g.nodes)
    gender = meta_data.reindex(nodes)
    remove = []
    for n, g_i in zip(nodes, gender[1].values):
        if g_i in ["m", "f"]:
            g.nodes[n]["type"] = 0 if g_i == "f" else 1
        else:
            remove.append(n)
    g.remove_nodes_from(remove)
    return g

# Statistics 

In [9]:
g = create_network(a_ij, meta_data, 2010)
r, r_adj = assortativity.nx_assortativity(g, "type"), assortativity.nx_adjusted_assortativity(g, "type")
N, f_0, E = g.number_of_nodes(), minority_fraction(g), g.number_of_edges()
E_00, E_01, E_11 = group_edge_count(g)

print("%10d & %1.2f & %10d & %10d & %10d & %10d & %1.2f & %1.2f " % (N, f_0, E, E_00, E_01, E_11, r, r_adj))

    170984 & 0.21 &     322052 &      17468 &      91738 &     212846 & 0.10 & 0.14 


In [10]:
g = create_network(a_ij, meta_data, 2000)
r, r_adj = assortativity.nx_assortativity(g, "type"), assortativity.nx_adjusted_assortativity(g, "type")
N, f_0, E = g.number_of_nodes(), minority_fraction(g), g.number_of_edges()
E_00, E_01, E_11 = group_edge_count(g)

print("%10d & %1.2f & %10d & %10d & %10d & %10d & %1.2f & %1.2f " % (N, f_0, E, E_00, E_01, E_11, r, r_adj))

     54966 & 0.18 &      72369 &       3123 &      18101 &      51145 & 0.11 & 0.16 


In [11]:
g = create_network(a_ij, meta_data, 1990)
r, r_adj = assortativity.nx_assortativity(g, "type"), assortativity.nx_adjusted_assortativity(g, "type")
N, f_0, E = g.number_of_nodes(), minority_fraction(g), g.number_of_edges()
E_00, E_01, E_11 = group_edge_count(g)

print("%10d & %1.2f & %10d & %10d & %10d & %10d & %1.2f & %1.2f " % (N, f_0, E, E_00, E_01, E_11, r, r_adj))

     13764 & 0.15 &      12178 &        384 &       2701 &       9093 & 0.09 & 0.16 


In [12]:
g = create_network(a_ij, meta_data, 1980)
r, r_adj = assortativity.nx_assortativity(g, "type"), assortativity.nx_adjusted_assortativity(g, "type")
N, f_0, E = g.number_of_nodes(), minority_fraction(g), g.number_of_edges()
E_00, E_01, E_11 = group_edge_count(g)

print("%10d & %1.2f & %10d & %10d & %10d & %10d & %1.2f & %1.2f " % (N, f_0, E, E_00, E_01, E_11, r, r_adj))

      2664 & 0.11 &       1765 &         24 &        274 &       1467 & 0.06 & 0.16 
