In [None]:
import numpy as np
import pandas as pd
import numba as nb
from minepy import MINE
from minepy import pstats, cstats

string_limit = 850

anno = pd.read_csv("./annotation.csv")

In [None]:
def calc_mic(x, y):
    mine = MINE(alpha=0.6, c=15)
    mine.compute_score(x, y)
    return mine.mic()

In [None]:
sample = anno[anno["time"] != 'Baseline']["geo"]
ctrl = anno[anno["time"] == 'Baseline']["geo"]

In [None]:
stringdb = pd.read_csv("StringDB_link.csv", delimiter=" ")
stringdb["protein1"] = stringdb["protein1"].str.slice(start=5)
stringdb["protein2"] = stringdb["protein2"].str.slice(start=5)
stringdb = stringdb[stringdb["combined_score"] >= string_limit]
stringdb = stringdb.reset_index(drop=True)

dict1 = pd.read_csv("./id.csv")
length = len(np.unique(dict1["symbol"].values))
fpkm = pd.read_csv("./exprSet_fine.csv")

d = {dict1["ensp"][i]: int(dict1["id"][i]) for i in range(len(dict1))}
d2 = {int(dict1["id"][i]): dict1["symbol"][i] for i in range(len(dict1))}
string_bool = np.zeros([length, length])


for i in range(len(stringdb)):
    if (stringdb["protein1"][i] in d) & (stringdb["protein2"][i] in d):
        string_bool[d[stringdb["protein1"][i]], d[stringdb["protein2"][i]]] = 1

np.fill_diagonal(string_bool, 0)
string_bool = string_bool == 1

origin_frame = fpkm[ctrl].to_numpy()
origin_sd = np.std(origin_frame, ddof=1, axis=1)

num = len(origin_frame)

In [None]:
# Create a placeholder for the MIC values
origin_mic = np.zeros(string_bool.shape)

# Populate the MIC values only where string_bool is True
for i in range(num):
    for j in range(i - 1):
        if string_bool[i, j]:
            mic_value = calc_mic(origin_frame[i, :], origin_frame[j, :])
            origin_mic[i, j] = mic_value
            origin_mic[j, i] = mic_value  # MIC is symmetric

In [None]:

edge_origin_list = []

idlist1 = []
idlist2 = []
for i in range(num):
    for j in range(i - 1):
        if string_bool[i, j] != 0:
            edge_origin_list.append([d2[i], d2[j]])
            idlist1.append(i)
            idlist2.append(j)

edge_origin_entropy = np.zeros((len(idlist1)), dtype=np.float64)
edge_origin_sd = np.zeros((len(idlist1)), dtype=np.float64)
idlist1 = nb.typed.List(idlist1)
idlist2 = nb.typed.List(idlist2)


In [None]:
# Entropy calculation
@nb.njit(parallel=False)
def entropy(pc, sd, idlist1, idlist2, edge_entropy, edge_sd):
    for i in range(len(idlist1)):
        p1 = idlist1[i]
        p2 = idlist2[i]

        left_not_zero = np.where(pc[p1] != 0)[0]
        right_not_zero = np.where(pc[p2] != 0)[0]

        left_not_zero = np.delete(left_not_zero, np.where(left_not_zero == p2)[0])
        right_not_zero = np.delete(right_not_zero, np.where(right_not_zero == p1)[0])

        len_left_not_zero = len(left_not_zero)
        len_right_not_zero = len(right_not_zero)

        if len_left_not_zero < 2 and len_right_not_zero < 2:
            edge_entropy[i] = 0
            edge_sd[i] = 0
            continue
        if len_left_not_zero < 2 and len_right_not_zero > 1:
            right_prob = (pc[p2][right_not_zero]) / np.sum((pc[p2][right_not_zero]))
            right_entropy = -np.sum(right_prob * np.log2(right_prob)) / np.log2(
                len_right_not_zero
            )
            entropy = (
                right_entropy
                / (len_left_not_zero + len_right_not_zero)
                * len_right_not_zero
            )
            edge_entropy[i] = entropy
            edge_sd[i] = (sd[p1] * len_left_not_zero + sd[p2] * len_right_not_zero) / (
                len_left_not_zero + len_right_not_zero
            )
            continue
        if len_left_not_zero > 1 and len_right_not_zero < 2:
            left_prob = (pc[p1][left_not_zero]) / np.sum((pc[p1][left_not_zero]))
            left_entropy = -np.sum(left_prob * np.log2(left_prob)) / np.log2(
                len_left_not_zero
            )
            entropy = (
                left_entropy
                / (len_left_not_zero + len_right_not_zero)
                * len_left_not_zero
            )
            edge_entropy[i] = entropy
            edge_sd[i] = (sd[p1] * len_left_not_zero + sd[p2] * len_right_not_zero) / (
                len_left_not_zero + len_right_not_zero
            )
            continue

        edge_sd[i] = (sd[p1] * len_left_not_zero + sd[p2] * len_right_not_zero) / (
            len_left_not_zero + len_right_not_zero
        )
        left_prob = (pc[p1][left_not_zero]) / np.sum((pc[p1][left_not_zero]))
        right_prob = (pc[p2][right_not_zero]) / np.sum((pc[p2][right_not_zero]))
        left_entropy = -np.sum(left_prob * np.log2(left_prob)) / np.log2(
            len_left_not_zero
        )
        right_entropy = -np.sum(right_prob * np.log2(right_prob)) / np.log2(
            len_right_not_zero
        )
        entropy = (
            right_entropy * len_right_not_zero + left_entropy * len_left_not_zero
        ) / (len_left_not_zero + len_right_not_zero)
        edge_entropy[i] = entropy

In [None]:
entropy(origin_mic, origin_sd, idlist1, idlist2, edge_origin_entropy, edge_origin_sd)

In [None]:
append_frame2 = pd.read_csv("./exprSet_fine.csv",index_col=0)
append_len = np.shape(append_frame2)[1]
id_len = len(idlist1)
landscape2 = pd.DataFrame()


In [None]:
for k in append_frame2.columns:
    print(k, end="\n")

    append_frame3 = np.column_stack((origin_frame, append_frame2[k]))

    append_mic = np.zeros(string_bool.shape)

    for i in range(num):
        for j in range(i - 1):
            if string_bool[i, j]:
                mic_value = calc_mic(append_frame3[i, :], append_frame3[j, :])
                append_mic[i, j] = mic_value
                append_mic[j, i] = mic_value
        
    append_sd = np.std(append_frame3, ddof=1, axis=1)
    edge_append_entropy = np.zeros((len(idlist1)), dtype=np.float64)
    edge_append_sd = np.zeros((len(idlist1)), dtype=np.float64)

    entropy(append_mic, append_sd, idlist1, idlist2, edge_append_entropy, edge_append_sd)

    edge_append_entropy = np.abs(edge_append_entropy - edge_origin_entropy)
    edge_append_sd = np.abs(edge_append_sd - edge_origin_sd)

    landscape_pros = pd.DataFrame(edge_append_sd * edge_append_entropy)
    landscape_pros.columns = [k]
    landscape2 = pd.concat([landscape2, landscape_pros], axis=1)

In [None]:
landscape2 = pd.concat([pd.DataFrame(edge_origin_list, columns=['node1', 'node2']), landscape2], axis=1)
landscape2 = landscape2.fillna(0)
landscape2.to_csv("edge_entropy_logM-1_" + str(string_limit) + "_mic.csv", index=False)