In [1]:
%load_ext autoreload

import os, sys
import networkx as nx

sys.path.insert(0, "../")

%autoreload 2
from mikg.kgraph import *
import pandas as pd
import numpy as np

import matplotlib
import matplotlib.pyplot as plt

from collections import defaultdict, Counter

import random
random.seed(42)

import pickle

In [None]:
kg = KGraph()
kg.load_kgraph("../data/initial_base_graph.out")

In [None]:
kg.get_edge_between_type()

In [None]:
exprDF = pd.read_csv("../seq_data/expression_ctzone_mean_df.tsv", sep="\t")

In [None]:
exprDFs = {}

for group in exprDF.group.unique():
    exprDFs[group] = exprDF[exprDF.group == group].copy()

print([x for x in exprDFs])

In [None]:
exprKGs = {}

for group in exprDFs:
    
    print(group)
    
    gKG = KGraph()
    
    gKG.load_kgraph("../data/initial_base_graph.out")

    gKG.add_gene_expression(exprDFs[group])

    mNS = MeanNetworkScorer()
    mNS.score(gKG)
        
    exprKGs[group] = gKG

In [None]:
import pickle

print(len(exprKGs))

with open("zone_exprkgs.pickle", 'wb') as f:
    pickle.dump(exprKGs, f)

In [None]:
print(len(exprKGs))


In [None]:
##
##
##
#  START HERE
##
##
##

In [None]:
exprKGs = pickle.load(open("zone_exprkgs.pickle", "rb"))

In [None]:
tissue2zone2kg = defaultdict(lambda : dict())
for x in exprKGs:
    tissue, zone = x.split("_")
    
    tissue2zone2kg[tissue]["{}_{}".format(tissue, zone)] = exprKGs[x]

In [None]:
for x in tissue2zone2kg:
    print(x, len(tissue2zone2kg[x]), [x for x in tissue2zone2kg[x]])

In [None]:
gsa = GenesetAnnotator()
for t in tissue2zone2kg:
    for z in tissue2zone2kg[t]:

        print(t,z)
        gsa.annotate_genesets(tissue2zone2kg[t][z], settype="disease", targettype="gene") # how specific is gene for disease?
        gsa.annotate_genesets(tissue2zone2kg[t][z], settype="disease", targettype="drug") # how specific is drug for disease?

In [None]:
tissues = [x for x in tissue2zone2kg]
zones = [x for x in tissue2zone2kg[tissues[0]]]

print(tissues)
print(zones)

In [None]:
fKG = tissue2zone2kg[tissues[0]][zones[0]]

fKG.get_edge_between_type()

In [None]:
fKG.get_edge_edge_types()

In [None]:
fKG = tissue2zone2kg[tissues[0]][zones[0]]

fKG.plot_node_attribute_histogram(lambda x: x.get("disease_spec_zscore", 0), node_type="gene")

In [None]:
fKG = tissue2zone2kg[tissues[0]][zones[0]]

fKG.plot_node_attribute_histogram(lambda x: x.get("disease_spec_zscore", 0), node_type="drug")

In [None]:
fKG.plot_score_histogram(edge_types=[("gene", "gene")],score_accessor=lambda x: x.get("score", 0))

In [None]:
fKG.plot_score_histogram(edge_types=[("gene", "gene")],score_accessor=lambda x: x.get("score_zscore", 0))

In [None]:
fKG.plot_score_histogram(edge_types=[("drug", "disease")],score_accessor=lambda x: x.get("score_zscore", 0))

In [None]:
! pwd

In [None]:
mNS = MeanNetworkScorer()

In [None]:
basedir = "/home/ubuntu/git/mikg/MI_output"

for x in glob.glob("{}/*/*".format(basedir)):
    #print("Removing", x)
    os.remove(x)

sigKGraphs = {}

for tissue in tissue2zone2kg:
    print(tissue)
    
    zoneSort = {
        "CTRL": 0,
        "RZ": 1,
        "BZ": 2,
        "IZ": 3,
        "FZ": 4
    }
    
    tissueKGs = tissue2zone2kg[tissue]
    
    sTZ = sorted([x for x in tissueKGs], key=lambda x: zoneSort.get(x.split("_")[1], -1))
    
    sTissueKGs = {x: tissueKGs[x] for x in sTZ}
    
    dmi = DifferentialModuleIdentifier()
    
    all_comms = {}
    for zone in sTissueKGs:
        print(tissue, zone)
        gene_kg = sTissueKGs[zone].to_gene_kgraph()
        
        zone_comms = gene_kg.get_communities(minEdgeScore = 1, resolution=4, prefix="{}_mod".format(zone), score_field="score_zscore")
        sigcomm = dmi.identify_differential_communities(zone_comms, zone, sTissueKGs, verbose=False, min_enriched=0.9, minLogFC=-1, score_field="score_zscore")   
        
        for x in sigcomm:
            all_comms[x] = zone_comms[x]
        
    print(len(all_comms))

    for comm in all_comms:
        print(comm, len(all_comms[comm]))
        
                
        
        nwe = NetworkExtender()
        eKG = nwe.extend_network(all_comms[comm], sTissueKGs[zone], verbose=False, min_children_gs=3, score_field="score_zscore")
        
        nwe.extend_network_force(eKG, sTissueKGs[zone], "drug",
                                 acceptor=lambda n, k: k.kg.nodes[n].get("disease_spec_zscore", 0) >= 0.5, # only disease-specific drugs
                                 edge_acceptor=lambda e, k: k.kg.edges[e].get("score_zscore", 0) > 0.1 and not (k.kg.edges[e].get("evidence_status", "") in ["Withdrawn"]) # no withdrawn drugs
                                 )
        
        #eKG.plot_graph()
        
        outdir = os.path.join(basedir, tissue)
        os.makedirs(outdir, exist_ok=True)
        outdir = os.path.join(outdir, comm)
        dmi.plot_communities(sTissueKGs, [eKG.kg.nodes], zone, main_net=[all_comms[comm]], font_size=6, titles=[comm], num_columns=5, outfile=outdir, score_accessor=lambda x: x.get("score_zscore", 0))
        
        sigKGraphs[comm] = eKG

            

In [None]:
[x for x in sigKGraphs]

In [None]:
import pickle

with open("zone_sigkgs.pickle", 'wb') as f:
    
    for x in sigKGraphs:
        sigKGraphs[x].kg = sigKGraphs[x].kg.copy()
    
    pickle.dump(sigKGraphs, f)

In [None]:
##
##
##
#  AND MAYBE HERE
##
##
##

In [None]:
sigKGraphs = pickle.load(open("zone_sigkgs.pickle", "rb"))

In [None]:
[x for x in sigKGraphs]

In [None]:
fKG.get_node_edges("Orphanet:59135")

In [None]:
fKG.kg.nodes["Orphanet:59135"]

In [None]:
fKG.kg.nodes["MYH7"]

In [None]:
node="Orphanet:59135"
target_ntype="gene"
child_score_accessor=lambda x: x.get("expression", {}).get("score", 0)

#get all children of node of type target_ntype
geneChildren = fKG._get_predecessors(node, target_ntype)

#get all scores for children
childrenScores = []
for child in geneChildren:
    childrenScores.append( child_score_accessor(fKG.kg.nodes[child]) )

    
if len(childrenScores) == 0:
    nodeMedian = 0
    nodeScore = 0    
else:
    nodeScore = np.mean(childrenScores)
    nodeSD = np.std(childrenScores)
    nodeMedian = np.median(childrenScores)
    
print(nodeMedian)

In [None]:
diseaseDegrees = fKG.get_node_degrees("disease")

In [None]:
plt.hist(diseaseDegrees, bins=100)

In [None]:
skg = sigKGraphs["Adipocyte_CTRL_mod_22"]

In [None]:
def get_edges_of_node(kg, node):
    
    allEdges = []
    for inEdge in kg.in_edges(node):
        oNode = inEdge[0]
        allEdges.append(oNode)
            
    for outEdge in kg.out_edges(node):
        oNode = outEdge[1]
        allEdges.append(oNode)
                    
    return allEdges

In [None]:
get_edges_of_node(skg, "Orphanet:725")