In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import os
import glob
import itertools

In [2]:
path = "Datasets/"

In [3]:
def getProtectedAttributesTwitch(G, df):
    protected_attributes = dict(zip(df["numeric_id"], df["mature"]))
    newG = nx.set_node_attributes(G, protected_attributes, "mature")
    return newG

def getProtectedAttributesDeezer(G, df):
    protected_attributes = dict(zip(df["id"], df["target"]))
    newG = nx.set_node_attributes(G, protected_attributes, "gender")
    return newG

def getProtectedAttributesPokec(G,df):
    protected_attributes = dict(zip(df[0], df[3]))
    newG = nx.set_node_attributes(G, protected_attributes, "gender")
    return newG

def getprotectedAttributesDict_Facebook(featuresDF,featureNameDF,egoFeatDF):
    gender_index = featureNameDF.index[featureNameDF[1] == "gender;anonymized"].to_list()[0]
    featuresDF = featuresDF[[0, gender_index + 1]]
    egoFeatDF = egoFeatDF[[0, gender_index + 1]]
    featuresDict = dict(zip(featuresDF[0], featuresDF[gender_index + 1]))
    egoFeatDict = dict(zip(egoFeatDF[0], egoFeatDF[gender_index + 1]))
    featuresDict.update(egoFeatDict)
    return featuresDict

def getEgoFeats(path):
    featFiles = []
    featNameFiles = []
    egoFeatFiles = []
    for file in os.listdir(path):
        if file.endswith(".feat"):
            featFiles.append(file)
        if file.endswith(".featnames"):
            featNameFiles.append(file)
        if file.endswith(".egofeat"):
            egoFeatFiles.append(file)
    return featFiles, featNameFiles, egoFeatFiles

def getProtectedAttributesFacebook(G):
    node_gender_dict = {}
    localpath = "Datasets/facebook/facebook/"
    fbFeatFiles, fbFeatNameFiles, fbEgoFeatFiles = getEgoFeats(localpath)
    for index in range(len(fbFeatFiles)):
        localFeaturesDF = pd.read_csv(localpath + fbFeatFiles[index], sep=" ", header=None)
        localFeatureNamesDf = pd.read_csv(localpath + fbFeatNameFiles[index], sep=" ", header=None)
        localEgoFeatDf = pd.read_csv(localpath + fbEgoFeatFiles[index], sep=" ", header=None)
        protectedAttrDict = getprotectedAttributesDict_Facebook(localFeaturesDF, localFeatureNamesDf, localEgoFeatDf)
        node_gender_dict.update(protectedAttrDict)
    
    newG = nx.set_node_attributes(G, node_gender_dict, "gender")
    return newG


In [84]:
def initialize_community_attribute_Counter(communitiesList):
    protectedAttributeCountDict = {}
    communityCount = 0
    for i in range(len(communitiesList)):
        protectedAttributeCountDict["Community_{}".format(communityCount)] = {0: 0, 1: 0}
        communityCount += 1
    return protectedAttributeCountDict

def count_protected_attributes_frequency(communitiesList, protectedAttributeCountDict):
    communityCount = 0
    for community in communitiesList:
        for node in community:
            if twitchGamers_graph.nodes()[node]["mature"] == 0:
                protectedAttributeCountDict["Community_{}".format(communityCount)][0] += 1
            else:
                protectedAttributeCountDict["Community_{}".format(communityCount)][1] += 1
        communityCount += 1
    return protectedAttributeCountDict

def calculate_community_balance(protectedAttributeCountDict):
    for x in protectedAttributeCountDict:
        red = protectedAttributeCountDict[x][0]
        blue = protectedAttributeCountDict[x][1]
        if red > blue:
            balance = blue/red
            protectedAttributeCountDict[x]["balance"] = balance
        else:
            balance = red/blue
            protectedAttributeCountDict[x]["balance"] = balance
    return protectedAttributeCountDict

def calculate_Fairness(communitiesList):
    
    protectedAttributeCountDict = initialize_community_attribute_Counter(communitiesList)

    protectedAttributeCountDict = count_protected_attributes_frequency(communitiesList, protectedAttributeCountDict)
        
    protectedAttributeCountDict = calculate_community_balance(protectedAttributeCountDict)

    return protectedAttributeCountDict

In [4]:
# Twitch gamers Graph
twitchGamers_graph = nx.read_edgelist("{}twitch_gamers/large_twitch_edges.csv".format(path), nodetype=int, delimiter=",")

In [5]:
# Twitch gamers features
twitchGamers_features = pd.read_csv("{}twitch_gamers/large_twitch_features.csv".format(path))

In [6]:
getProtectedAttributesTwitch(twitchGamers_graph, twitchGamers_features)

In [13]:
twitchGamersCC = max(nx.connected_components(twitchGamers_graph), key=len)
twitchGamersCCGraph = twitchGamers_graph.subgraph(twitchGamersCC)

In [14]:
# 11m 35.7s execution time
twitchGamersCCGraphComms = nx.community.label_propagation.label_propagation_communities(twitchGamersCCGraph)

In [30]:
#maybe get the top 5 communities?
twitchLabelPropagationList = list(twitchGamersCCGraphComms)

In [85]:
genderCountTwitch = {}
genderCountTwitch = calculate_Fairness(twitchLabelPropagationList)

In [86]:
genderCountTwitch

{'Community_0': {0: 83131, 1: 76960, 'balance': 0.9257677641313108},
 'Community_1': {0: 1563, 1: 1017, 'balance': 0.6506717850287908},
 'Community_2': {0: 2366, 1: 777, 'balance': 0.32840236686390534},
 'Community_3': {0: 473, 1: 135, 'balance': 0.2854122621564482},
 'Community_4': {0: 2, 1: 0, 'balance': 0.0},
 'Community_5': {0: 580, 1: 53, 'balance': 0.09137931034482759},
 'Community_6': {0: 854, 1: 44, 'balance': 0.05152224824355972},
 'Community_7': {0: 1, 1: 1, 'balance': 1.0},
 'Community_8': {0: 0, 1: 2, 'balance': 0.0},
 'Community_9': {0: 2, 1: 0, 'balance': 0.0},
 'Community_10': {0: 1, 1: 1, 'balance': 1.0},
 'Community_11': {0: 0, 1: 2, 'balance': 0.0},
 'Community_12': {0: 2, 1: 0, 'balance': 0.0},
 'Community_13': {0: 6, 1: 0, 'balance': 0.0},
 'Community_14': {0: 2, 1: 0, 'balance': 0.0},
 'Community_15': {0: 4, 1: 0, 'balance': 0.0},
 'Community_16': {0: 2, 1: 0, 'balance': 0.0},
 'Community_17': {0: 2, 1: 1, 'balance': 0.5},
 'Community_18': {0: 1, 1: 1, 'balance': 1

In [8]:
# Deezer europe graph
deezer_graph = nx.read_edgelist("{}deezer_europe/deezer_europe/deezer_europe_edges.csv".format(path), nodetype=int, delimiter=",")

In [9]:
# Read Deezer genders
deezer_gendersDf = pd.read_csv("{}/deezer_europe/deezer_europe/deezer_europe_target.csv".format(path))

In [10]:
getProtectedAttributesDeezer(deezer_graph, deezer_gendersDf)

In [11]:
pokec_graph = nx.read_edgelist("{}/pokec/soc-pokec-relationships.txt".format(path), nodetype=int, delimiter="\t", create_using=nx.DiGraph())

In [12]:
pokec_features = pd.read_csv("{}/pokec/soc-pokec-profiles.txt".format(path), delimiter="\t", header=None, usecols=[0,3])

In [13]:
getProtectedAttributesPokec(pokec_graph, pokec_features)

In [37]:
facebook_graph_all = nx.read_edgelist("{}facebook/facebook/facebook_combined.txt".format(path), nodetype=int, delimiter=" ")

In [5]:
getProtectedAttributesFacebook(facebook_graph_all)