In [31]:
# Trust metrics
from itertools import combinations
import networkx as nx
import numpy as np
import pandas as pd

def cluster(pubid):
    return network_data[network_data['pubid'] == pubid]

def authname(authid):
    return network_data[network_data['authid'] == authid]['author'].iloc[0]

def edges(pubid):
    pub_cluster = cluster(pubid)
    authors = pub_cluster['authid'].unique()
    weight = pub_cluster['type_weitght'].iloc[0]

    return [(*edge, {'weight': weight, 'pubid': pubid}) for edge in combinations(authors, 2)]

def path_length(path_weights, alpha):
    return sum([1 / weight ** alpha for weight in path_weights])

def jumps(l, n=2):
    return [l[i:i+n] for i in range(len(l))][:-1]

def sum_weights(n, j):
    return sum([n[j[0]][j[1]][k]['weight'] for k in n[j[0]][j[1]]])

def sum_of_weights(jumps, network):
    return [sum_weights(network, jump) for jump in jumps]

def deg_arr(deg):
    return np.array([v for _, v in deg])

def degree_centrality(network, alpha):
    k = deg_arr(network.degree()) ** (1 - alpha)
    s = deg_arr(network.degree(weight='weight')) ** alpha
    centralities = k * s
    return [{node: centralities[i]} for i, node in enumerate(network.nodes)]

PUB_WEIGHTS = {
  'Livro Publicado ou Organizado ': 9,
  'Artigo Publicado': 8,
  'Capítulo de Livro Publicado': 8,
  'Artigo Aceito para Publicação': 7,
  'Texto em Jornal/Revista': 6,
  'Trabalho em Evento': 3,
  'Outra Produção Bibliográfica': 2,
  'Prefácio/Posfácio': 2,
  'nan': 1
}

network_data = pd.read_csv('trust_network.csv')
network_data['pub_type'] = network_data['pub_type'].astype(str)
network_data['type_weitght'] = network_data.pub_type.apply(lambda p: PUB_WEIGHTS[p])

In [32]:
trust_network = nx.MultiGraph()

for authid in network_data['authid'].unique():
    trust_network.add_node(authid, name=authname(authid)) 

for pubid in network_data['pubid'].unique():
    trust_network.add_edges_from(edges(pubid))

In [33]:
import re
import string
from gensim.utils import deaccent
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

def preprocessing(line):
    line = str(line).lower()
    line = re.sub(r"[{}]".format(string.punctuation), " ", line)
    return deaccent(line)

peer_profiles = pd.DataFrame(columns=['id', 'profile'])

for pid in network_data['authid'].unique():
    profile = preprocessing(network_data[network_data['authid'] == pid]['title'].str.cat(sep=' '))
    peer_profiles = peer_profiles.append({'id': pid, 'profile': profile}, ignore_index=True)

tfidf = TfidfVectorizer().fit_transform(peer_profiles['profile'])

def content_based_recommendation(pid):
    target_profile = tfidf.getrow(pid)
    cosine_similarities = linear_kernel(target_profile, tfidf).flatten()

    return [sorted(trust_network.nodes)[index] for index in cosine_similarities.argsort()[:-20:-1]]

In [None]:
from itertools import groupby

alpha = 0.5

recommendations = content_based_recommendation(1)

centralities = {list(peer_centrality)[0]: peer_centrality[list(peer_centrality)[0]] for peer_centrality in degree_centrality(trust_network, alpha) if list(peer_centrality)[0] in recommendations}
all_distances = [(target, path_length(sum_of_weights(jumps(p), trust_network), alpha)) for target in recommendations for p in nx.all_simple_paths(trust_network, 1, target, cutoff=6)]
distances = {pid: min(dist)[1] for pid, dist in groupby(all_distances, key=lambda d: d[0])}

In [None]:
centrality_recommendations = sorted(recommendations, key=lambda recommended: centralities[recommended], reverse=True)
distance_recommendations = sorted(recommendations, key=lambda recommended: distances.get(recommended, 999), reverse=False)

In [None]:
recommendations

In [None]:
centralities

In [None]:
centrality_recommendations

In [None]:
distances

In [None]:
distance_recommendations