# set dataset

In [2]:
alp = "b"

# load dataset

In [3]:
import scipy.io

alp2data = {"a": "iJO1366", "b": "iAF1260b", "c": "iAF692", "d": "iHN637"}
datafile = "./reactionsU/" + alp2data[alp] + "/" + alp2data[alp] + ".mat"
loaded = scipy.io.loadmat(datafile)

# indices of hyperlinks and candidates in data

In [4]:
if alp == "a" or alp == "c" or alp == "d":
    hi = 10
    ci = 17
elif alp == "b":
    hi = 11
    ci = 18

# get hyperlinks and candidates

In [5]:
import numpy as np

data = loaded['Model'][0][0]
hyperlinks = np.array(data[hi].toarray())
candidates = data[ci]

# get the directed hypergraph for a stoichiometric matrix

In [6]:
from tqdm import tqdm
def getUndirectedHypergraph(stoichiometric_matrix):
    dic_dir = {}
    for j in tqdm(range(stoichiometric_matrix.shape[1])):
        pos, neg = [], []
        for i in range(stoichiometric_matrix.shape[0]):
            entry = stoichiometric_matrix[i][j]
            if entry < 0:
                neg.append(i)
            elif entry > 0:
                pos.append(i)
        k = str(j)
        dic_dir[k] = neg+pos
    return  dic_dir

udhyperlinks = getUndirectedHypergraph(hyperlinks)
udcandidates = getUndirectedHypergraph(candidates)

100%|██████████| 2388/2388 [00:02<00:00, 1084.24it/s]
100%|██████████| 3630/3630 [00:03<00:00, 1023.65it/s]


# get hits and misses of hyperlinks in candidates

In [7]:
import collections
from tqdm import tqdm

def getHitsandMisses(udhyperlinks, udcandidates):
    compare = lambda x, y: collections.Counter(x) == collections.Counter(y)
    ckeys = list(udcandidates.keys())
    hkeys = list(udhyperlinks.keys())
    hits, misses, rep, flag, hflag = [], [], [], [0]*len(ckeys), [0]*len(hkeys)

    print("getting all indices of hyperlinks in candidates")
    for j, K in enumerate(udhyperlinks.keys()):
        HL = udhyperlinks[K]

        for i, k in enumerate(ckeys):
            hl = udcandidates[k]
            if compare(HL, hl):
                if hflag[j] == 0:
                    hits.append([K,k])
                    hflag[j] = 1
                    flag[j] = 1

    for i in range(len(flag)):
        if flag[i] == 0:
            misses.append(ckeys[i])
    return hits, misses

hits, misses =  getHitsandMisses(udhyperlinks, udcandidates)          

getting all indices of hyperlinks in candidates


In [8]:
assert(len(hits)==hyperlinks.shape[1])

In [9]:
assert(len(hits) + len(misses) == candidates.shape[1])

# sanity check

In [10]:
def sanityCheck(udhyperlinks, udcandidates, hits, misses):
    compare = lambda x, y: collections.Counter(x) == collections.Counter(y)
    flag = True
    
    for m in tqdm(misses):
        HL = udcandidates[m]

        for k in udhyperlinks.keys():
            hl = udhyperlinks[k]
            if compare(hl, HL):
                flag = False
                
    for pair in tqdm(hits):
        k = pair[1]
        HL = udcandidates[k]
        notFound = True

        for k in udhyperlinks.keys():

            hl = udhyperlinks[k]
            if compare(hl, HL):
                notFound = False
                break
        if notFound:
            flag = False
    
    if flag:
        print("sanity check successfull")
    else:
        print("failed!")

sanityCheck(udhyperlinks, udcandidates, hits, misses)

100%|██████████| 1242/1242 [00:17<00:00, 69.14it/s]
100%|██████████| 2388/2388 [00:17<00:00, 138.53it/s]

failed!





# construct S and U matrices

In [11]:
def appendColumn(matrix, V, hyperedge):
    column = np.zeros(V)
    for node in hyperedge:
        column[node] = 1
    matrix.append(column)
    return matrix

In [12]:
def getHyperlinksAndCandidates(udhyperlinks, udcandidates, hits, misses):
    V = candidates.shape[0]
    compare = lambda x, y: collections.Counter(x) == collections.Counter(y)
    ST = [] # S transpose
    UT = [] # U transpose


    for pair in hits:
        K, k = pair[0], pair[1]
        HL, hl = udhyperlinks[K], udcandidates[k]

        if compare(HL, hl) :
            ST = appendColumn(ST, V, HL)
            UT = appendColumn(UT, V, hl)

        else:
            print("wrong!")


    for m in misses:
        hl = udcandidates[m]
        UT = appendColumn(UT, V, hl)
    
    
    return ST, UT

ST, UT = getHyperlinksAndCandidates(udhyperlinks, udcandidates, hits, misses)

In [13]:
import random
def addNegativeHyperlinks(UT):
    V = candidates.shape[0]
    NHL = len(UT) # number of negative hyperlinks
    print("adding", NHL, "negative hyperlinks")
    for i in range(NHL):
        size = np.random.randint(4, 6)
        sample = random.sample(range(V), size)
        UT = appendColumn(UT, V, sample)
    return UT

UT = addNegativeHyperlinks(UT)

S = np.transpose(np.array(ST))
U = np.transpose(np.array(UT))

adding 3630 negative hyperlinks


In [14]:
zero_indices = []
for i in range(U.shape[0]):
    if U[i,:].sum() == 0:
        zero_indices.append(i)
'''
SF = np.delete(S, zero_indices, 0)
UF = np.delete(U, zero_indices, 0)
'''

'\nSF = np.delete(S, zero_indices, 0)\nUF = np.delete(U, zero_indices, 0)\n'

In [15]:
len(zero_indices)

0

In [16]:
S.shape

(1668, 2388)

In [17]:
U.shape

(1668, 7260)

In [19]:
import pickle
path = "./reactionsU/" + alp2data[alp] + "/" + alp2data[alp]
with open(path + ".hyperlinks.pkl", 'wb') as handle:
    pickle.dump(S, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(path + ".candidates.pkl", 'wb') as handle:
    pickle.dump(U, handle, protocol=pickle.HIGHEST_PROTOCOL)

# "r" run all cells

In [None]:
get_ipython().run_cell_magic('javascript', '', "\nJupyter.keyboard_manager.command_shortcuts.add_shortcut('r', {\n    help : 'run all cells',\n    help_index : 'zz',\n    handler : function (event) {\n        IPython.notebook.execute_all_cells();\n        return false;\n    }}\n);")