## Look at the latent factor graphs and take a look at highest degree features

In [None]:
!date

#### import libraries

In [None]:
from pandas import read_csv, DataFrame
from json import load as json_load
from igraph import Graph

#### set notebook variables

In [None]:
# parameters
project = 'aging_phase2'

# directories
wrk_dir = '/labshare/raph/datasets/adrd_neuro/brain_aging/phase2'
results_dir = f'{wrk_dir}/results'
figures_dir = f'{wrk_dir}/figures'

# in files
assoc_file = f'{results_dir}/{project}.latent.age_glm.csv'

# out files
out_file = f'{figures_dir}/{project}.latents.graph_high_degree_features.csv'

# constants and variables
DEBUG = True

### load the latent factor age associations

In [None]:
age_glm_df = read_csv(assoc_file, index_col=0)
print(f'shape of age_glm_df is {age_glm_df.shape}')
age_glm_df['key_name'] = age_glm_df.cell_type + ':' + age_glm_df.feature
if DEBUG:
    display(age_glm_df.sample(4))
    print(f'age_glm_df has {age_glm_df.key_name.nunique()} keys')

### for each graph find the vertices with the highest degrees
graph of all model types and then graph for each model type

In [None]:
graph_types = set(['all'] + list(age_glm_df.model_type.unique()))
print(graph_types)

### which of the actual features, not latent, are the mose connected in the graph

In [None]:
%%time
high_degree_features = []
for graph_type in graph_types:
    print(f'######## {graph_type} ########')
    graphml_file = f'{figures_dir}/{project}.latents.{graph_type}.graphml'
    this_graph = Graph.Read_GraphML(graphml_file)
    if DEBUG:
        print(this_graph.vcount())
        print(this_graph.ecount())    
    feature_nodes = this_graph.vs.select(type='feature')
    degrees = feature_nodes.degree()
    sorted_degress = degrees.copy()
    sorted_degress.sort(reverse=True)
    for index in range(0, 11):
        this_degree = sorted_degress[index]
        indices = [i for i, x in enumerate(degrees) if x == this_degree]
        if DEBUG:
            print(index, this_degree, indices)
        for found_index in indices:
            this_node = feature_nodes[found_index]
            high_degree_features.append([graph_type, this_node['name'], this_node['membership'], this_degree])
            if DEBUG:
                print(graph_type, this_node['name'], this_node['membership'], this_degree)

### convert list of found high degree features into a dataframe

In [None]:
hd_features_df = DataFrame(data=high_degree_features, 
                           columns=['graph_type', 'feature', 'partition', 'degree'])
# drop any duplicates
hd_features_df = hd_features_df.drop_duplicates(keep='first')
print(f'shape of hd_features_df {hd_features_df.shape}')
if DEBUG:
    display(hd_features_df.sample(4))    
    display(hd_features_df.graph_type.value_counts())

### save the high degree feature dataframe

In [None]:
hd_features_df.to_csv(out_file)

#### show highest degree feature per type

In [None]:
for graph_type in graph_types:
    display((hd_features_df.loc[hd_features_df.graph_type == graph_type]
             .sort_values('degree', ascending=False).head()))

In [None]:
!date