In [1]:
import pathlib
import numpy as np
import networkx as nx

### Computing overall similarity with different weights & thresholds

In [2]:
%%time

def get_overall_sim(alpha, linker_sim_arr, mof_sim_arr):
    return alpha*linker_sim_arr + (1-alpha)*mof_sim_arr

def apply_threshold(omega, adj_arr):
    indices = np.argwhere(adj_arr < omega)
    for index in indices:
        adj_arr[*index] = 0.0
    np.fill_diagonal(adj_arr, 0.0)
    return adj_arr

def get_edge_dataframe(adj_arr):
    G = nx.from_numpy_array(adj_arr, create_using=nx.DiGraph)
    edge_pd = nx.to_pandas_edgelist(G)
    return edge_pd

def main(alpha, omega):
    parent_data_path = pathlib.Path("__file__").parent.resolve()
    linker_sim_path = parent_data_path.joinpath("similarity/linkers_similarity.npy")
    mof_sim_path = parent_data_path.joinpath("similarity/mof_features_similarity.npy")
    
    linker_sim_arr = np.load(linker_sim_path)
    mof_sim_arr = np.load(mof_sim_path)

    adj_arr = get_overall_sim(alpha, linker_sim_arr, mof_sim_arr)
    adj_arr = apply_threshold(omega, adj_arr)
    edge_pd = get_edge_dataframe(adj_arr)
    return edge_pd

edge_pd = main(alpha=0.9, omega=0.9)
parent_data_path = pathlib.Path("__file__").parent.resolve()
edge_pd.to_csv(parent_data_path.joinpath(f"EdgesList_1988_0.9_alpha_0.9_omega.csv"), index=False)

CPU times: user 3.26 s, sys: 67.8 ms, total: 3.32 s
Wall time: 3.43 s
