In [None]:
import numpy as np
import pandas as pd
import networkx as nx
from network_control.metrics import modal_control,ave_control #install network_control library https://github.com/BassettLab/nctpy
from network_control.utils import matrix_normalization
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tqdm import tqdm
from scipy.cluster.hierarchy import linkage
import seaborn as sns
from scipy.stats import boxcox
import itertools
import warnings

from ipynb.fs.full.func_lib import adjust_dist
from ipynb.fs.full.func_lib import get_class_labels
from ipynb.fs.full.func_lib import get_cluster_labels
from ipynb.fs.full.func_lib import get_consensus_matrix
from ipynb.fs.full.func_lib import plot_clustered_cmat
from ipynb.fs.full.func_lib import coeffs

In [None]:
feature_list = ['k_in', 'k_out', 'hubs', 'authorities', 'incloseness',
       'outcloseness', 't1', 't2', 'betweenness_centrality',
       'eigenvector_centrality', 'degree_centrality', 'clustering_coefficient',
       'modal_controllability', 'average_controllability', 'neighbour_degree',
       'strength']

features_combination = coeffs(len(feature_list))

In [None]:
def get_data(topic):
    
    df_all = pd.read_csv('Node_measures_'+topic+'.csv',index_col=0)

    df_all = adjust_dist(df_all, feature_list)

    df_all = df_all[selected_features]

    measure_list = df_all.columns.values[1:].reshape((2,len(selected_ids)))
    measure_list = df_all.columns.values[1:]


    X = df_all[measure_list].values.astype(float)
    
    
    return X

In [None]:
def check_pca_transform(X):
    
    X_sc = StandardScaler().fit_transform(X)
    pca = PCA()
    X_pc = pca.fit_transform(X_sc)
    
    if np.all(pca.components_[0] < 0) or np.all(pca.components_[0] > 0):
        if ((np.all(pca.components_[1,0:3] < 0) & np.all(pca.components_[1,3:6] > 0)) 
                    or (np.all(pca.components_[1,0:3] > 0) & np.all(pca.components_[1,3:6] < 0))):
            return True
        else:
            return False
    else:
        return False

In [None]:
list_of_topics = ['ukraine','covid','costofliving','brexit']

selected_features_list = []

idx = 0

for arr in features_combination:
    if (arr.sum()>2):
        selected_ids = np.where(arr == 1)[0]
        selected_features = [feature_list[index] for index in selected_ids] + ['null_'+feature_list[index] for index in selected_ids]
        selected_features.insert(0,'Node')

        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            
            count_topics = 0
            
            for topic in list_of_topics:
            
                if check_pca_transform(get_data(topic)):
                
                    count_topics +=1
            
            if count_topics == 4:
                selected_features_list.append(selected_ids)

In [None]:
df_clusters = pd.DataFrame()

idx = 0

for selected_ids in selected_features_list:

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        
        df_final = pd.DataFrame()

        for topic in list_of_topics:

            selected_features = [feature_list[index] for index in selected_ids] + ['null_'+feature_list[index] for index in selected_ids]

            selected_features.insert(0,'Node')

            df_all = pd.read_csv('Node_measures_'+topic+'.csv',index_col=0)

            df_all = adjust_dist(df_all, feature_list)

            df_all = df_all[selected_features]

            measure_list = df_all.columns.values[1:].reshape((2,len(selected_ids)))
            measure_list = df_all.columns.values[1:]


            X = df_all[measure_list].values.astype(float)

            cluster_labels = get_cluster_labels(X,num_clusters=3,num_sims=50)

            df_all['Cluster'] = cluster_labels[0]

            X_sc = StandardScaler().fit_transform(X)
            pca = PCA()
            X_pc = pca.fit_transform(X_sc)

            df_pc = pd.DataFrame(data=X_pc[:,:2],columns=['Inherent', 'Active'])

            if np.all(pca.components_[0] < 0):
                df_pc['Inherent'] = -1*df_pc['Inherent']

            df_pc['Node'] = df_all['Node']

            df = df_all.merge(df_pc, on = 'Node')

            cluster_mean = []

            for cluster_id in range(3):
                
                cluster_mean.append(df.loc[df['Cluster']==cluster_id]['Inherent'].mean())
                
            sorted_clusters = list(np.argsort(cluster_mean))

            nodal_cluster = sorted_clusters[2]
            
            df['topic'] = topic

            if topic == list_of_topics[0]:
                df_final = df.loc[df['Cluster']==nodal_cluster]
            else:
                df_final = pd.concat([df_final,df.loc[df['Cluster']==nodal_cluster]])


        df_clusters.at[idx,'features'] = str(measure_list)


        ukr_list = list(df_final.loc[df_final['topic']=='ukraine']['Node'])
        cvd_list = list(df_final.loc[df_final['topic']=='covid']['Node'])
        col_list = list(df_final.loc[df_final['topic']=='costofliving']['Node'])
        brx_list = list(df_final.loc[df_final['topic']=='brexit']['Node'])

        l = [ukr_list, cvd_list, col_list, brx_list]

        df_clusters.at[idx,'shared_nodes'] = len(set(l[0]).intersection(*l))
        idx +=1
        