___
### make edge categorized dictionary from `netctrl_results_liu.json`
___

In [1]:
# import module

import json
import os
import csv
import pandas as pd

In [2]:
# read JSON file
json_file_path = "../../data/result/netctrl_results_liu.json"

with open(json_file_path, 'r', encoding='utf-8') as json_file:
    netctrl_results = json.load(json_file)

# read all network files for edge information
file_name = {f'phase{i}': os.listdir('../../data/inferred_grn' + f"/phase{i}/") for i in range(1, 5)}

# read nD criteria csv file
criteria = pd.read_csv("../../data/result/netctrl_criteria_liu.csv", sep = ',')
criteria.head()

Unnamed: 0,phase,network,nD
0,phase1,Primordial_Germ_cells,214
1,phase1,Blastomeres,451
2,phase1,Enveloping_Layer,434
3,phase2,Ectoderm,394
4,phase2,Primordial_Germ_cells,293


In [3]:
# result dictionary
results_dict = {}

# phase for loop 
for phase, networks in netctrl_results.items():
    results_dict[phase] = {}

    for net_name, edge_results in networks.items():
        net_key = net_name.replace('.ncol', '')
        
        # nD criteria value
        criteria_value = criteria.loc[(criteria['phase'] == phase) & (criteria['network'] == net_key), 'nD']
        
        if criteria_value.empty:
            continue 
        
        criteria_value = criteria_value.item()
        
        # nD tendency with edge removal
        results_dict[phase][net_name] = {"decrease": [], "remain": [], "increase": []}
        
        # read network edge file
        edge_file_path = os.path.join('../../data/inferred_grn', phase, f'{net_name}.ncol')
        if not os.path.exists(edge_file_path):
            continue  
        
        # load edge information
        with open(edge_file_path, 'r') as edge_file:
            edges = edge_file.readlines()
        
        # edge comparison 
        for idx, (edge, value) in enumerate(zip(edges, edge_results.values()), start=1):
            value = int(value) 
            
            if value < criteria_value:
                results_dict[phase][net_name]["decrease"].append([idx, edge.strip(), value])
            elif value > criteria_value:
                results_dict[phase][net_name]["increase"].append([idx, edge.strip(), value])
            else:
                results_dict[phase][net_name]["remain"].append([idx, edge.strip(), value])

# save into JSON
output_json_path = "../../data/result/netctrl_results_liu_filtered.json" 

with open(output_json_path, 'w', encoding='utf-8') as out_file:
    json.dump(results_dict, out_file, indent=4, ensure_ascii=False)

In [4]:
# this is a example of critical edges
for phase in list(results_dict.keys()):
    print("==============================")
    print("===========", phase, "===========")
    print("==============================")
    for network in list(results_dict[phase].keys()):
        print("----------", network)
        for lc in results_dict[phase][network]['increase']:
            print(lc[1].split(' ')[0], '>>>', lc[1].split(' ')[1])

---------- Primordial_Germ_cells
---------- Blastomeres
FOXB1A >>> GOLGB1
FOXD3 >>> ZGC:153675
FOXD5 >>> LRWD1
HOPX >>> ELL2
KLF6A >>> ITSN2B
POU5F3 >>> VGLL4L
SI:DKEY-43P13.5 >>> RBM38
SOX11A >>> EFHD1
SOX13 >>> HER5
ZEB1A >>> GOLGA4
---------- Enveloping_Layer
---------- Ectoderm
BLF >>> NNR
EVE1 >>> RGCC
HER7 >>> SI:DKEY-27I16.2
ID1 >>> FOXI1
MSX1B >>> SEPH
OTX1B >>> NOVA2
TFAP2C >>> ZGC:153409
VOX >>> DDX17
ZEB1A >>> ASPM
---------- Primordial_Germ_cells
HER5 >>> CHMP2BB
HER7 >>> SEC61G
ID2A >>> SALL4
ID3 >>> TP53
IRX7 >>> NPM2B
MYCN >>> KIAA0101
OTX1B >>> SSR4
TA >>> SI:CH211-173M16.2
XBP1 >>> P4HB
ZEB1A >>> TPM3
ZGC:113886 >>> GAPDH
---------- Other_Mesendoderm
E2F7 >>> GOLGB1
EVE1 >>> BLF
FOXA2 >>> SEPW1
HER5 >>> BTG2
MSGN1 >>> EFNB2A
SI:DKEY-261J4.3 >>> LRRC59
SOX32 >>> COQ10B
---------- Other_Axial_Mesoderm
CABZ01070258.1 >>> CAPNS1A
DRL >>> CYP2AA8
HMGB1A >>> SEPH
HOPX >>> FOPNL
MYT1A >>> TOP2A
NOTO >>> PFN1
SI:DKEY-261J4.3 >>> ZIC2A
TWIST2 >>> TUBA8L2
---------- Enveloping_L

In [5]:
# calculate critical TF
rows = []

for phase in list(results_dict.keys()):
    for network in list(results_dict[phase].keys()):
        
        phase_net = f"{phase}_{network}"

        lc_genes = [
            lc[1].split(' ')[0]
            for lc in results_dict[phase][network].get('increase', [])
            if lc and len(lc) > 1
        ]

        if not lc_genes:
            continue

        rows.append([phase_net] + lc_genes)

df = pd.DataFrame(rows)

# save a result
df.to_csv('../../data/result/critical_TF.csv', header=False)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26,27,28,29,30,31,32,33,34,35
0,phase1_Blastomeres,FOXB1A,FOXD3,FOXD5,HOPX,KLF6A,POU5F3,SI:DKEY-43P13.5,SOX11A,SOX13,...,,,,,,,,,,
1,phase2_Ectoderm,BLF,EVE1,HER7,ID1,MSX1B,OTX1B,TFAP2C,VOX,ZEB1A,...,,,,,,,,,,
2,phase2_Primordial_Germ_cells,HER5,HER7,ID2A,ID3,IRX7,MYCN,OTX1B,TA,XBP1,...,,,,,,,,,,
3,phase2_Other_Mesendoderm,E2F7,EVE1,FOXA2,HER5,MSGN1,SI:DKEY-261J4.3,SOX32,,,...,,,,,,,,,,
4,phase2_Other_Axial_Mesoderm,CABZ01070258.1,DRL,HMGB1A,HOPX,MYT1A,NOTO,SI:DKEY-261J4.3,TWIST2,,...,,,,,,,,,,
