In [1]:
import sys
sys.path.append('..')

import numpy as np
import socnet as sn

In [2]:
sn.graph_width = 500
sn.graph_height = 500
sn.node_size = 10
sn.edge_width = 1
sn.edge_color = (192, 192, 192)
sn.node_label_position = 'top center'

In [3]:
# Criar grafo inicial sem arestas
total_nodes = 200
percentage_women = 0.4
percentage_man = 1 - percentage_women

g = sn.generate_empty_graph(total_nodes)
g = g.to_directed()

In [4]:
#Definir atributos dos nós
from random import randint

for n in range(int(total_nodes * percentage_man)):
    g.node[n]['gender'] = 'male'
    g.node[n]['ability'] = randint(1, 10)
    g.node[n]['color'] = (255,255,0)
    g.node[n]['label'] = n
    
for n in range(int(total_nodes * percentage_man), total_nodes):
    g.node[n]['gender'] = 'female'
    g.node[n]['ability'] = randint(0, 10)
    g.node[n]['color'] = (255,0,255)
    g.node[n]['label'] = n


In [14]:
#Criar arestas dos nós
for n in g.nodes():
    for i in g.nodes():
        if n != i:
            ability_n = g.node[n]['ability']
            ability_i = g.node[i]['ability']
            if g.node[n]['gender'] == 'male' and g.node[i]['gender'] == 'female':
                ability_i = int(0.7 * ability_i) 
            if ability_i >= ability_n and randint(0,50) == 50:
                g.add_edge(n,i)
    

In [15]:
#Primeiro grafo com posicoes aleatorias
sn.reset_edge_colors(g)
sn.reset_positions(g, None)
sn.show_graph(g, nlab=True)

In [16]:
#Funcoes para calcular pagerank
from scipy.stats.stats import pearsonr
from scipy.optimize import minimize

def equals(a, b):
    return abs(a - b) < 0.000000001


def calculate_pagerank(g):
    length = g.number_of_nodes()

    k = 10
    scale = 0.8
    residue = (1 - scale) / length

    R = sn.build_matrix(g)

    for n in g.nodes:
        total = np.sum(R[n,])

        if equals(total, 0):
            R[n, n] = 1
        else:
            R[n,] /= total

    R = scale * R + residue

    Rt = R.transpose()

    rank = 1 / length

    r = np.full((length, 1), rank)

    for _ in range(k):
        r = Rt.dot(r)

    for n in g.nodes:
        g.nodes[n]['pagerank'] = r[n, 0]

In [17]:
#Calcular pagerank do grafo g
calculate_pagerank(g)

In [22]:
#Peso das arestas para plotar grafo ini
for n,m in g.edges():
    g.edges[n,m]['weight'] = min(g.node[m]['pagerank'],g.node[n]['pagerank'])

In [23]:
sn.reset_edge_colors(g)
sn.reset_positions(g, 'weight')
sn.show_graph(g, nlab=True)

In [11]:
result = []
avg_pagerank_male = 0
avg_pagerank_female = 0
for n in g.nodes():
    result.append({'index': n, 'gender': g.node[n]['gender'], 'pagerank': g.node[n]['pagerank']})
    if (g.node[n]['gender'] == 'male'):
        avg_pagerank_male += g.node[n]['pagerank']
    else:
        avg_pagerank_female += g.node[n]['pagerank']

avg_pagerank_male /= (total_nodes * percentage_man)
avg_pagerank_female /= (total_nodes * percentage_women)

print('avg_male', avg_pagerank_male)
print('avg_female', avg_pagerank_female)
# result = sorted(result, key=itemgetter('pagerank'), reverse=True)

# for n in result:
#     print(n)

avg_male 0.00601290743704
avg_female 0.00348063884444


In [12]:
def snapshot(g, frames):
    frame = sn.generate_frame(g)

    frames.append(frame)

In [13]:
# generate mutiple graphs with different # of woman
def gen(percentage_women, total_nodes):
    percentage_man = 1 - percentage_women

    g_ = sn.generate_empty_graph(total_nodes)
    g_ = g_.to_directed()

    for n in range(int(total_nodes * percentage_man)):
        g_.node[n]['gender'] = 'male'
        g_.node[n]['ability'] = randint(0, 10)
        g_.node[n]['color'] = (255,255,0)
        g_.node[n]['label'] = n

    for n in range(int(total_nodes * percentage_man), total_nodes):
        g_.node[n]['gender'] = 'female'
        g_.node[n]['ability'] = randint(0, 10)
        g_.node[n]['color'] = (255,0,255)
        g_.node[n]['label'] = n
        
    for n in g_.nodes():
        for i in g_.nodes():
            if n != i:
                ability_n = g_.node[n]['ability']
                ability_i = g_.node[i]['ability']
                if g_.node[n]['gender'] == 'male' and g_.node[i]['gender'] == 'female':
                    ability_i = 0.1 * ability_i
                if ability_i >= ability_n and randint(0,2) == 2:
                    g_.add_edge(n,i)

    sn.reset_edge_colors(g_)
    calculate_pagerank(g_)
    return g_


def gen_graph(total_nodes):
    percentage_women = 0.1
    gg = []
    
    while percentage_women < 1:
        g_ = gen(percentage_women, total_nodes)
        percentage_women += 0.1
        gg.append(g_)
    
    return gg

gg = gen_graph(50)

In [None]:
frames = []
i = 0

print(len(gg))

for gugu in gg:
    snapshot(gugu, frames)
    i+=1

In [None]:

sn.show_animation(frames)

In [None]:
def gen_pr(gg):
    pr = []
    i = 0.1

    for g in gg:
        result = []
        avg_pagerank_male = 0
        avg_pagerank_female = 0
        for n in g.nodes():
            result.append({'index': n, 'gender': g.node[n]['gender'], 'pagerank': g.node[n]['pagerank']})
            print(i, g.node[n]['gender'], g.node[n]['pagerank'])
            if (g.node[n]['gender'] == 'male'):
                avg_pagerank_male += g.node[n]['pagerank']
            else:
                avg_pagerank_female += g.node[n]['pagerank']

        percentage_man = 1 - i
        percentage_women = i
        avg_pagerank_male /= (total_nodes * percentage_man)
        avg_pagerank_female /= (total_nodes * percentage_women)
        pr.append({ 'male': avg_pagerank_male, 'female': avg_pagerank_female })
#         print('percentage female:', i)
#         print('avg_male', avg_pagerank_male)
#         print('avg_female', avg_pagerank_female)
#         print('pagerank difference:', avg_pagerank_male - avg_pagerank_female, '\n')
        i+=0.1
    
    return pr

In [None]:
ng = []
prs = []

total_nodes = 50

for _ in range(1):
    _gg = gen_graph(total_nodes)
    pr = gen_pr(_gg)
    ng.append(_gg)
    prs.append(pr)
    
avgs = []

# rip cache
LEN = len(prs[0])
for i in range(LEN):
    avg_male, avg_female = 0, 0
    for j in range(len(prs)):
        avg_male += prs[j][i]['male']
        avg_female += prs[j][i]['female']
    
    avgs.append({ 
        'male': avg_male / len(prs),
        'female': avg_female / len(prs)
    })

In [None]:
i = 0.1

for v in avgs:
    print('''
        female percentage: {:.2f}
        male pr avg:       {:.5f}
        female pr avg:     {:.5f}
        difference:        {:.5f}
    '''.format(i, v['male'], v['female'], v['male'] - v['female']))
    i+=.1

## SIMULATIONS

In [None]:
frames = []

from math import inf

def smallest_successor(g, n):
    s = list(g.successors(n))
    
    if len(s) == 0:
        return None    
    k = min(s, key=lambda k: g.node[k]['pagerank'])
    print(k)
    return k

def find_smallest_pagerank(g):
    result = inf
    node = 0
    for n in g.nodes():
        if g.node[n]['pagerank']  < result:
            result = g.node[n]['pagerank']
            node = n
    return node



# for j in range(0,10):

def create_chart(percentage):
    y1 = []
    y2 = []
    x = []
    g = gen(percentage, 200)  # 30% females in the playerbase
    bla = 0
    while True:
    #     calculate_pagerank(g)
        n = find_smallest_pagerank(g)
        g.remove_node(n)
    #     for n in g.nodes():
    #         s = smallest_successor(g, n)

    #         if s != None:  # None means it has no successors
    #             g.remove_edge(n, s)
        sn.reset_positions(g, None)
        males = 0
        females = 0
        for n in g.nodes():
            if (g.node[n]['gender'] == 'male'):
                males += 1
            else:
                females += 1
        bla += 1
    #     print('males:{}, females:{}'.format(males,females))
        y1.append(males)
        y2.append(females)
        x.append(bla)
        if females == 0:
    #         print(0.1, bla)
            break
    return [y1,y2,x]

#     snapshot(g, frames)

            
# sn.show_animation(frames)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

y1, y2, x = create_chart(0.5)

plt.figure()
plt.plot(x, y1, label="Homens")
plt.plot(x, y2, label="Mulheres")
plt.legend()
plt.title("Remoção de menores pageranks para 50% de mulheres")
plt.xlabel('Iterações')
plt.ylabel('Número de pessoas')

plt.show()

In [None]:
y1, y2, x = create_chart(0.2)

plt.figure()
plt.plot(x, y1, label="Homens")
plt.plot(x, y2, label="Mulheres")
plt.legend()
plt.title("Remoção de menores pageranks para 20% de mulheres")
plt.xlabel('Iterações')
plt.ylabel('Número de pessoas')

plt.show()

In [None]:
y1, y2, x = create_chart(0.8)

plt.figure()
plt.plot(x, y1, label="Homens")
plt.plot(x, y2, label="Mulheres")
plt.legend()
plt.title("Remoção de menores pageranks para 80% de mulheres")
plt.xlabel('Iterações')
plt.ylabel('Número de pessoas')

plt.show()