In [3]:
import numpy as np
import igraph as ig
import pandas as pd
from numpy.linalg import norm

def l1_row(row):
    row_sum = norm(row, ord=1)
    if row_sum == 0:
        return row
    else:
        return row / row_sum

def norm_rows(df):
    return df.apply(l1_row,axis=1)

# ---- parameters that can be tweaked:
# weight variables of edges of G
## project -> account
d = 4.0 / 7.0
m = 2.0 / 7.0
c = 1.0 / 7.0
## account -> project
m_ = 3.0 / 5.0
c_ = 2.0 / 5.0

# dapening factors
e_proj = 0.9
e_account = 0.1

# number of iterations
niter = 1000

#  ----------------------------------

def adj_p_and_acc(D, C, M):
    acc0 = pd.DataFrame(np.zeros((np.size(acc),np.size(acc))), columns=acc, index=acc)

    p_p = d * norm_rows(D)
    p_a = m * norm_rows(M) + c * norm_rows(C)
    a_p = (m_ * M.transpose()).multiply(norm_rows(C.transpose())) + (c_ * norm_rows(C.transpose()))
    # combine the matrices
    return norm_rows(pd.concat([pd.concat([p_p,a_p], axis=0),pd.concat([p_a,acc0], axis=0)], axis=1))

# list of all projects / accounts - will specify the ordering
proj_meta = pd.read_csv('data/cargo-dependencies-meta.csv', delimiter=',', dtype={'ID': 'Int64'}).dropna();
proj_ids = proj_meta['ID'].tolist()
# acc = ['a1', 'a2', 'a3']
# labels = np.concatenate((proj,acc), axis=0)

proj_meta.head()

Unnamed: 0,ID,Name,Platform
0,30742,acacia,Cargo
1,30745,aio,Cargo
2,30746,advapi32-sys,Cargo
3,30747,alfred,Cargo
4,30748,algebloat,Cargo


In [4]:
deps = pd.read_csv('data/cargo-dependencies.csv', delimiter=',');
deps.head()

Unnamed: 0,FROM_ID,TO_ID
0,30742,31187
1,30742,31296
2,30742,31295
3,30742,31085
4,30742,428702


In [7]:
def adj_via_from_to(from_to, all_ids):
    adj = pd.DataFrame(0, columns=all_ids, index=all_ids)
    for index, row in from_to.iterrows():
        if row['FROM_ID'] in adj.index.tolist():
            adj.at[row['FROM_ID'], row['TO_ID']] = 1
    return adj

adjdf = adj_via_from_to(deps, proj_ids);
adjdf.head()

Unnamed: 0,30742,30745,30746,30747,30748,30750,30751,30753,30754,30755,...,3469060,3394154,3405396,3480649,3483337,3483781,3483812,2683436,31398,2682138
30742,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30745,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30746,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30747,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30748,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
#Write adjacency matrix data to csv
#adjdf.to_csv('data/cargo-adjdf.csv')

In [6]:
labels = proj_meta['Name'].tolist()

def g_from_adj(adjdf):
    adjmat = adjdf.values
    # construct the graph from the adjacency matrix
    g = ig.Graph.Adjacency(adjmat.astype(bool).tolist())
    g.es['weight'] = adjmat[adjmat.nonzero()]
    g.vs['label'] = labels
    return g

def page_rank(g):
    # reset = [e_proj] * len(proj) + [e_account] * len(acc)
    reset = [e_proj] * len(proj_ids)
    page_rank = g.personalized_pagerank(reset=reset, niter=niter);
    return pd.DataFrame({'ID': proj_ids, 'Name': labels, 'page_rank': page_rank})

g = g_from_adj(adjdf)

page_rank = page_rank(g)
page_rank_top100 = page_rank.sort_values(by=['page_rank'], ascending=False).head(100)

In [21]:
#Write page_rank data to csv
page_rank.sort_values(by=['page_rank'], ascending=False).to_csv('data/cargo-pagerank.csv', index= False)

In [20]:
page_rank_top100

Unnamed: 0,ID,Name,page_rank
3563,1369785,num-traits,0.059415
1005,428702,rand,0.035209
518,31533,winapi,0.033656
425,31384,serde,0.026141
242,31115,libc,0.022697
4550,1629953,serde_derive,0.020821
16285,3414109,rustc-std-workspace-core,0.019508
396,31342,rustc-serialize,0.014597
7998,2353652,proc-macro2,0.013991
1542,783030,clippy,0.013728


In [None]:
layout = g.layout("large")
vertex_weights = [i * 100 for i in page_rank['page_rank'].tolist()]
adjmat = adjdf.values
edge_weights = [i * 10 for i in adjmat[adjmat.nonzero()]]
#ig.plot(g, vertex_size=vertex_weights, vertex_color=(["blue"] * len(proj_ids)), vertex_label_dist=1, edge_width=edge_weights, layout=layout)