In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tqdm import tqdm
from scipy.cluster.hierarchy import linkage
import seaborn as sns
from scipy.stats import boxcox
import warnings

from ipynb.fs.full.func_lib import adjust_dist
from ipynb.fs.full.func_lib import get_class_labels

In [None]:
feature_list = ['betweenness_centrality', 'eigenvector_centrality',
       'degree_centrality', 'clustering_coefficient', 'neighbour_degree',
       'strength', 't1', 't1_normalised', 't2', 't2_normalised']

In [None]:
selected_ids = [2,5,7]
selected_features = [feature_list[index] for index in selected_ids] + ['null_'+feature_list[index] for index in selected_ids]
selected_features.insert(0,'Node')
selected_features

In [None]:
topics = ['ukraine','covid','costofliving','brexit']

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter('ignore')

    for topic in topics:
        
        print(topic)
        
        df_final = pd.DataFrame()

        df = pd.read_csv('Node_measures_'+topic+'_2week.csv',index_col=0)

        for i in range(53):

            t = i+1

            df_all = df.loc[df['week']==t]
        
            selected_features = [feature_list[index] for index in selected_ids] + ['null_'+feature_list[index] for index in selected_ids]
            selected_features.insert(0,'Node')

            df_all = df_all[selected_features]
            
            if len(df_all.index)>1:
            
                measure_list = df_all.columns.values[1:].reshape((2,len(selected_ids)))
                measure_list = df_all.columns.values[1:]

                X = df_all[measure_list].values.astype(float)
                X_sc = StandardScaler().fit_transform(X)
                pca = PCA()
                X_pc = pca.fit_transform(X_sc)

                df_pc = pd.DataFrame(data=X_pc[:,:2],columns=['Inherent', 'Active'])

                if np.all(pca.components_[0] < 0):
                    df_pc['Inherent'] = -1*df_pc['Inherent']

                df_pc['Node'] = df_all['Node']
                df_pc = df_pc.merge(get_class_labels(),on='Node')
                df_pc['Label'] = df_pc['Label'].astype(int)
                label_dict = {0:'Backbench MP',
                            1:'Cabinet MP',
                            2:'Shadow cabinet MP',
                            3:'Opposition backbench MP',
                            4:'Prominent journalists',
                            5:'Rest of the journalists'}

                df_pc['Label_str'] = df_pc['Label'].replace(to_replace=label_dict)


                node = df_pc['Node']
                df_pc.drop(labels=['Node'], axis=1,inplace = True)
                df_pc.insert(0, 'Node', node)

                df_pc['week'] = t

                df_final = pd.concat([df_final,df_pc])

        df_final.to_csv(f'PCA/'+topic+'_individual.csv')