In [1]:
# Trust metrics
from itertools import combinations
import networkx as nx
import numpy as np
import pandas as pd

def cluster(pubid):
    return network_data[network_data['pubid'] == pubid]

def authname(authid):
    return network_data[network_data['authid'] == authid]['author'].iloc[0]

def edges(pubid):
    pub_cluster = cluster(pubid)
    authors = pub_cluster['authid'].unique()
    weight = pub_cluster['type_weitght'].iloc[0]

    return [(*edge, {'weight': weight, 'pubid': pubid}) for edge in combinations(authors, 2)]

def path_length(path_weights, alpha):
    return sum([1 / weight ** alpha for weight in path_weights])

def jumps(l, n=2):
    return [l[i:i+n] for i in range(len(l))][:-1]

def sum_weights(n, j):
    return sum([n[j[0]][j[1]][k]['weight'] for k in n[j[0]][j[1]]])

def sum_of_weights(jumps, network):
    return [sum_weights(network, jump) for jump in jumps]

def deg_arr(deg):
    return np.array([v for _, v in deg])

def degree_centrality(network, alpha):
    k = deg_arr(network.degree()) ** (1 - alpha)
    s = deg_arr(network.degree(weight='weight')) ** alpha
    centralities = k * s
    return [{node: centralities[i]} for i, node in enumerate(network.nodes)]

PUB_WEIGHTS = {
  'Livro Publicado ou Organizado ': 9,
  'Artigo Publicado': 8,
  'Capítulo de Livro Publicado': 8,
  'Artigo Aceito para Publicação': 7,
  'Texto em Jornal/Revista': 6,
  'Trabalho em Evento': 3,
  'Outra Produção Bibliográfica': 2,
  'Prefácio/Posfácio': 2,
  'nan': 1
}

network_data = pd.read_csv('trust_network.csv')
network_data['pub_type'] = network_data['pub_type'].astype(str)
network_data['type_weitght'] = network_data.pub_type.apply(lambda p: PUB_WEIGHTS[p])

In [2]:
trust_network = nx.MultiGraph()

for authid in network_data['authid'].unique():
    trust_network.add_node(authid, name=authname(authid)) 

for pubid in network_data['pubid'].unique():
    trust_network.add_edges_from(edges(pubid))

In [13]:
import re
import string
from gensim.utils import deaccent
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

def preprocessing(line):
    line = str(line).lower()
    line = re.sub(r"[{}]".format(string.punctuation), " ", line)
    return deaccent(line)

peer_profiles = pd.DataFrame(columns=['id', 'profile'])

for pid in network_data['authid'].unique():
    profile = preprocessing(network_data[network_data['authid'] == pid]['title'].str.cat(sep=' '))
    peer_profiles = peer_profiles.append({'id': pid, 'profile': profile}, ignore_index=True)

tfidf = TfidfVectorizer().fit_transform(peer_profiles['profile'])

def content_based_recommendation(pid):
    target_profile = tfidf.getrow(pid)
    cosine_similarities = linear_kernel(target_profile, tfidf).flatten()

    return [sorted(trust_network.nodes)[index] for index in cosine_similarities.argsort()[:-25:-1]]

In [14]:
from itertools import groupby

alpha = 0.5

recommendations = content_based_recommendation(1)

centralities = {list(peer_centrality)[0]: peer_centrality[list(peer_centrality)[0]] for peer_centrality in degree_centrality(trust_network, alpha) if list(peer_centrality)[0] in recommendations}
all_distances = [(target, path_length(sum_of_weights(jumps(p), trust_network), alpha)) for target in recommendations for p in nx.all_simple_paths(trust_network, 1, target, cutoff=5)]
distances = {pid: min(dist)[1] for pid, dist in groupby(all_distances, key=lambda d: d[0])}

In [15]:
centrality_recommendations = sorted(recommendations, key=lambda recommended: centralities[recommended], reverse=True)
distance_recommendations = sorted(recommendations, key=lambda recommended: distances.get(recommended, 999), reverse=False)

In [16]:
recommendations

[1,
 0,
 43,
 50,
 64,
 80,
 101,
 66,
 38,
 3,
 54,
 78,
 2,
 53,
 37,
 65,
 41,
 51,
 56,
 108,
 107,
 40,
 120,
 106]

In [17]:
centralities

{1: 236.92825918408298,
 50: 118.94536560959406,
 0: 183.25665062965658,
 66: 50.91168824543142,
 120: 12.449899597988733,
 108: 210.35683967962632,
 106: 143.10835055998655,
 2: 40.19950248448356,
 78: 38.23610858861032,
 53: 179.55500549970753,
 38: 48.28043081829324,
 107: 50.07993610219566,
 41: 42.988370520409354,
 37: 25.25866188063018,
 43: 51.16639522186412,
 101: 54.772255750516614,
 56: 6.4807406984078595,
 54: 12.449899597988733,
 3: 14.142135623730953,
 64: 9.38083151964686,
 40: 5.196152422706632,
 80: 4.69041575982343,
 51: 4.69041575982343,
 65: 2.8284271247461903}

In [18]:
centrality_recommendations

[1,
 108,
 0,
 53,
 106,
 50,
 101,
 43,
 66,
 107,
 38,
 41,
 2,
 78,
 37,
 3,
 54,
 120,
 64,
 56,
 40,
 80,
 51,
 65]

In [19]:
distances

{43: 0.4214985851425088,
 64: 1.2908044844199997,
 80: 0.989293139842236,
 38: 0.19245008972987526,
 54: 0.9916159632170918,
 78: 0.5354591616021468,
 2: 0.7973182500014535,
 53: 0.7654962612458839,
 37: 1.208428962060957,
 65: 1.4842049739280605,
 41: 1.1538305145963372,
 56: 0.817538307261394,
 40: 0.696923425058676,
 120: 0.3333333333333333,
 106: 0.11043152607484653}

In [20]:
distance_recommendations

[106,
 38,
 120,
 43,
 78,
 40,
 53,
 2,
 56,
 80,
 54,
 41,
 37,
 64,
 65,
 1,
 0,
 50,
 101,
 66,
 3,
 51,
 108,
 107]