In [None]:
import numpy as np
import pandas as pd
import networkx as nx
from network_control.metrics import modal_control,ave_control #install network_control library https://github.com/BassettLab/nctpy
from network_control.utils import matrix_normalization
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tqdm import tqdm
from scipy.cluster.hierarchy import linkage
import seaborn as sns
from scipy.stats import boxcox
import math

In [None]:
def get_network(fileroot): 
    
    df = pd.read_csv(f'Edge_lists/adj_list_{fileroot}.csv')
    df['Weight'] = df['Weight']/df['Weight'].max()
    G = nx.from_pandas_edgelist(df,source='Source',target='Target',
                                edge_attr='Weight',create_using=nx.DiGraph())
    return G

In [None]:
def controllability(G):
    net = nx.to_numpy_array(G)
    net = matrix_normalization(net,'discrete')
    nodes = list(G.nodes())
    mc = modal_control(net)
    ac = ave_control(net)
    mc_dict = dict(zip(nodes,mc))
    ac_dict = dict(zip(nodes,ac))
    return mc_dict,ac_dict


In [None]:
def get_node_measures(G):
    nbc_dict = nx.betweenness_centrality(G,weight='distance',normalized=False)
    nec_dict = nx.eigenvector_centrality(G,max_iter=2000,weight='weight')
    ndc_dict = nx.degree_centrality(G)
    nclus_dict = nx.clustering(G,weight='weight')
    mc_dict,ac_dict = controllability(G)
    neigh_dict = nx.average_neighbor_degree(G)
    str_dict = dict(G.degree(weight='weight'))
    dicts = [nbc_dict,nec_dict,ndc_dict,nclus_dict,ac_dict,mc_dict,neigh_dict,str_dict]
    measures = ['betweenness_centrality','eigenvector_centrality',
                'degree_centrality','clustering_coefficient',
                'average_controllability','modal_controllability',
                'neighbour_degree','strength']

    dfs = []
    for measure,d in zip(measures,dicts):
        dfm = pd.DataFrame.from_dict(d,'index')
        dfm.columns=[measure]
        dfs.append(dfm)
    
    df = pd.concat(dfs,axis=1,ignore_index=False)
    df = df.reset_index()
    df.columns = ['Node']+measures
    return df,measures

In [None]:
def adjust_dist(df_all, measure_list):
    
    measure_list.remove('Node')
    
    for measure in measure_list:
        if 'controllability' not in measure:
            df_all[measure] = np.log(df_all[measure])
        else:
            fitted_data,_ = boxcox(df_all[measure].values)
            df_all[measure] = fitted_data
        
    df_all = df_all.replace([-np.inf,np.inf],np.nan)
    df_all = df_all.dropna()
    
    list_of_measures = ['eigenvector_centrality','hubs','authorities','incloseness','outcloseness']
    
    for measure in measure_list:
        if 'eigenvector' in measure:
            df_all = df_all.loc[np.logical_and(df_all['eigenvector_centrality']>-20,df_all['null_eigenvector_centrality']>-20)]
        elif 'hubs' in measure:
            df_all = df_all.loc[df_all['hubs']>-12]
        elif 'authorities' in measure:
            df_all = df_all.loc[df_all['authorities']>-15]
        elif 'incloseness' in measure:
            df_all = df_all.loc[df_all['incloseness']>-12.5]
        elif 'outcloseness' in measure:
            df_all = df_all.loc[df_all['outcloseness']>-12.5]
            
    return df_all

In [None]:
def get_class_labels():
    class_labels_mp = pd.read_csv('labels/labels_mps.csv')
    class_labels_jou = pd.read_csv('labels/labels_journalists.csv')
    class_labels_mp['label'].loc[np.logical_and(class_labels_mp['label']==0,class_labels_mp['c2']!='Conservative')] = 3
    class_labels_jou['label']+=1
    
    class_labels = pd.concat([class_labels_mp[['username','label']],class_labels_jou[['username','label']]],axis=0,ignore_index=True)
    
    class_labels.columns = ['Node','Label']
    return class_labels

In [None]:
def get_cluster_labels(X, num_clusters ,num_sims):
    cluster_labels = []
#     print("Clustering")
    for sim in range(num_sims):
        clf = KMeans(n_clusters= num_clusters, init='random', n_init=300)
        labels = clf.fit_predict(X)
        cluster_labels.append(labels)
    cluster_labels = np.array(cluster_labels)
    return cluster_labels

def get_consensus_matrix(cluster_labels):
    num_nodes = len(cluster_labels[0])
    mat = np.zeros((num_nodes,num_nodes))
    for i in tqdm(range(num_nodes)):
        for j in range(num_nodes):
            mat[i,j] = np.mean(cluster_labels[:,i]==cluster_labels[:,j])
            mat[j,i] = mat[i,j]
    return mat

def plot_clustered_cmat(cmat):
    dmat = 1-cmat
    y = dmat[np.triu_indices(len(dmat), k=1)]
    Z = linkage(y, method='single', optimal_ordering=True)
    perm = np.ravel(Z[:, :2]).astype(np.int32)
    perm = perm[perm < len(dmat)]
    ordered = dmat[perm][:,perm]

    plt.imshow(ordered,cmap='coolwarm',vmin=0,vmax=1)
    plt.colorbar()
    plt.show()

In [None]:
def coeffs(n_array, minimum=0):
    
#     """A function that a list of all combinations of 1s and 0s given the number of 1s.
# 
#     Args:
#         n_array (int/float): number of ones/length of array.
#         minimum (int/float): minimum number of ones considered.
# 
#     Returns: 
#         list: list of arrays with combinations of 0s and 1s.
#     """

    coeff_list = []
    ITER_MAX = 100
    n_array = int(n_array)

    for ones in range(int(minimum), n_array+1):
        coeffs_ = []
        perm_ = np.zeros(n_array)
        perm_[:ones] = 1
        
        iter_ = 0
        while len(coeffs_) != math.comb(n_array, ones) and iter_ < ITER_MAX:
            p = np.random.permutation(perm_)
            bools_ = [np.array_equal(p, arr) for arr in coeffs_]
            if sum(bools_) == 0:
                coeffs_.append(p)
            iter_ = iter_ + 1
        coeff_list = coeff_list + coeffs_
    return coeff_list