In [None]:
%matplotlib inline

import pandas as pd
from scipy.stats import spearmanr
from statsmodels.sandbox.stats.multicomp import multipletests
import numpy as np
import os,re, leidenalg, pickle
import igraph as ig
import statsmodels

In [None]:
class Network_Analysis:
    def __init__(self,raw_data,nodes,name,respath):
        self.res_path=respath
        self.name = name

        self.network_ori=self.__calc(raw_data)
        self.nodes=nodes
        print('Network Analysis')
        self.__net_analysis_combi()
    
    def __calc(self,df):
        print('Calculating Correlation..')
        temp=spearmanr(df.T, nan_policy='omit')
        corr=pd.DataFrame(temp[0],columns=list(df.index),index=list(df.index))
        pval=pd.DataFrame(temp[1],columns=list(df.index),index=list(df.index))
        print('Filtering the matrix Correlation..')
        corr=corr.where(np.triu(np.ones(corr.shape)).astype(np.bool))
        pval=pval.where(np.triu(np.ones(pval.shape)).astype(np.bool))
        print('Making long table of Correlation..')
        corr2=corr.unstack().reset_index(name='weight')
        pval2=pval.unstack().reset_index(name='pval')
        res=corr2.merge(pval2,on=['level_0','level_1'])
        res=res[res['level_0'] != res['level_1']]
        res=res.dropna()
        res=res[['level_0','level_1','weight','pval']]
        res['padj']=multipletests(res['pval'],method='fdr_bh')[1]
        res.columns=['source','target','correlation','pvalue','padj']
        res=res[res.padj < 0.05].reset_index(drop=True)
        res.to_csv('%s/%s_edges.txt' % (self.res_path, self.name),sep='\t')
        print('Done!!')
        return res
    
    def __net_analysis_combi(self):
        print('Loading The Network...')
        temp=self.network_ori
        g= ig.Graph.TupleList(zip(temp['source'],temp['target'],temp['correlation']),weights=True)
        self.network = g
        G_pos = g.subgraph_edges(g.es.select(weight_gt = 0), delete_vertices=False)
        G_neg = g.subgraph_edges(g.es.select(weight_lt = 0), delete_vertices=False)
        G_neg.es['weight'] = [-w for w in G_neg.es['weight']]
        part_pos = leidenalg.ModularityVertexPartition(G_pos, weights='weight')
        part_neg = leidenalg.ModularityVertexPartition(G_neg, weights='weight');
        optimiser = leidenalg.Optimiser()
        diff = optimiser.optimise_partition_multiplex([part_pos, part_neg],layer_weights=[1,-1], n_iterations=-1)
        self.clustering_combi=pd.DataFrame(pd.Series(part_pos.membership+part_neg.membership,index=G_pos.vs['name']+G_neg.vs['name'])).reset_index().drop_duplicates().set_index('index')[0]
        print('Cluster Analysis...')
        self.modularity_combi=diff
        self.nodes['cluster'] = self.clustering_combi.reindex(self.nodes.index).tolist()
        temp = pd.Series(self.network.degree(),index = self.network.vs['name'],name='Degree').reindex(self.nodes.index)
        self.nodes = pd.concat([self.nodes,temp],1)
        self.nodes.to_csv('%s/%s_nodes.txt' % (self.res_path,self.name),sep='\t')
    
    def save_network(self):
        print('Saving The Network..')
        pickle_out = open('%s/%s_network_object.pkl' % (self.res_path, self.name),"wb")
        self.writer = None
        pickle.dump(self, pickle_out)
        pickle_out.close()

In [None]:
thr = 5 # Minimum samples

conds = pd.read_csv('https://www.microbiomeatlas.org/data/HGMA.web.metadata.csv',index_col = 'sample.ID')
conds['combi_all'] = conds['Gender'].astype(str)+conds['Geography']#+conds['type']
cohorts = pd.read_excel('SupplementaryTable1.xlsx', index_col = 'Dataset ID')
conds = conds.merge(cohorts, left_on = 'dataset.ID', right_index = True)
data = pd.read_csv('https://www.microbiomeatlas.org/data/HGMA.web.MSP.abundance.matrix.csv', index_col = 0)[conds.index]
data = data.replace(0,np.nan).dropna(thresh = thr)#.fillna(0)


In [None]:
name = 'ALL'
k = Network_Analysis(data.fillna(0), nodes, name = name ,respath='Networks_new/')
k.save_network()

In [None]:
for var in conds['type'].unique():
    
    conds_sub = conds[(conds['type'] == var)]
    data_sub = data[conds_sub.index]
    data_sub = data_sub.dropna(thresh = thr)
    k = Network_Analysis(data_sub.fillna(0), nodes, name = var ,respath='Networks_new/')
    k.save_network()

In [None]:
for var in conds['Region'].unique():
    conds_sub = conds[(conds['Region'] == var)]
    data_sub = data[conds_sub.index]
    data_sub = data_sub.dropna(thresh = thr)
    k = Network_Analysis(data_sub.fillna(0), nodes, name = var ,respath='Networks_new/')
    k.save_network()

In [None]:
for var in conds['Gender'].unique():
    if type(var) == float:
        continue
    conds_sub = conds[(conds['Gender'] == var)]
    data_sub = data[conds_sub.index]
    data_sub = data_sub.dropna(thresh = thr)
    k = Network_Analysis(data_sub.fillna(0), nodes, name = var ,respath='Networks_new/')
    k.save_network()

In [None]:
for var in conds['Cohort_Arif'].unique():
    if type(var) == float:
        continue
    conds_sub = conds[(conds['Cohort_Arif'] == var)]
    data_sub = data[conds_sub.index]
    data_sub = data_sub.dropna(thresh = thr)
    k = Network_Analysis(data_sub.fillna(0), nodes, name = var ,respath='Networks_new/')
    k.save_network()