In [12]:
import numpy as np
import pandas as pd

In [13]:
def random_walk(rna_cnt, similarity_matrix, prot_id, Wq, Wu, rq, ru, output_file_name):
    
    associated_rna=inter[inter.PROT_ID==prot_id].RNA_ID.to_numpy() 
    # rna in associated_rna == True if rna is labelled node
    
    R=similarity_matrix.copy() # correlation matrix
    for i in range(rna_cnt):
        if i in associated_rna:
            R[i]=R[i]*Wq
        else:
            R[i]=R[i]*Wu
    for i in range(rna_cnt):
        sigma=np.sum(R[i])
        R[i]=R[i]/sigma
    
    Lq=R.copy()
    Lu=R.copy()
    
    for i in range(rna_cnt):
        if i in associated_rna:
            Lu[i]*=0
        else:
            Lq[i]*=0
    
    filt=np.zeros(rna_cnt)
    for i in associated_rna:
        filt[i]=1
    
    mod_Q=associated_rna.shape[0]
    X_init=np.zeros((rna_cnt,1))
    for i in associated_rna:
        X_init[i][0]=1/mod_Q
    
    X=X_init.copy()
    rq=0.8
    ru=0.4
    pq=np.matmul(filt,X)[0]
    pu=1-pq
    iter_cnt=0
    # print(pq)
    # print("i:"+str(iter_cnt)+" "+str(X[0][0]))
    
    while(True):
        prev=X.copy()
        X=rq*(np.matmul(Lq.transpose(),prev))+pq*(1-rq)*X_init+ru*(np.matmul(Lu.transpose(),prev))+pu*(1-ru)*X_init
        pq=np.matmul(filt,X)[0]
        pu=1-pq

        if(np.linalg.norm(X-prev)<1e-10):
            break
        iter_cnt+=1
        # print("i:"+str(iter_cnt)+" "+str(X[0][0]))
    
    print(f"Iterations[{prot_id}]: {iter_cnt}")
    
    X=np.reshape(X,rna_cnt)
    ser=pd.Series(X)
    
    ser.sort_values(ascending=False, inplace=True)
    
    f=open(output_file_name,'a')
    f.write('\nPROTEIN ID:\t'+str(prot_id)+'\n')
    f.write('RNA\tCorr. score(Sr)\n')
    for index, value in ser.items():
        if index not in associated_rna:
            f.write(str(index)+'\t'+str(value)+'\n')
    f.close()

In [14]:
rna_sim=pd.read_csv('rna_similarity.txt',sep='\t')
prot_sim=pd.read_csv('prot_similarity.txt',sep='\t')
inter=pd.read_csv('list_of_interactions.txt',sep='\t')

rna_cnt=inter['RNA_ID'].unique().shape[0]
prot_cnt=inter['PROT_ID'].unique().shape[0]

In [15]:
similarity_matrix=np.empty((rna_cnt,rna_cnt))
for i in rna_sim.index:
    similarity_matrix[rna_sim['RNA(i)'][i]][rna_sim['RNA(j)'][i]]=rna_sim['Sim(i,j)'][i]

In [16]:
output_file_name='random_walk_scores.txt'
Wq=0.8
Wu=0.4
rq=0.8
ru=0.4
for i in range(prot_cnt):
    random_walk(rna_cnt,similarity_matrix,i,Wq,Wu,rq,ru,output_file_name)

Iterations[0]: 17
Iterations[1]: 15
Iterations[2]: 18
Iterations[3]: 19
Iterations[4]: 15
Iterations[5]: 20
Iterations[6]: 14
Iterations[7]: 20
Iterations[8]: 21
Iterations[9]: 19
Iterations[10]: 18
Iterations[11]: 19
Iterations[12]: 19
Iterations[13]: 18
Iterations[14]: 19
Iterations[15]: 19
Iterations[16]: 20
Iterations[17]: 21
Iterations[18]: 18
Iterations[19]: 20
Iterations[20]: 22
Iterations[21]: 21
Iterations[22]: 20
Iterations[23]: 20
Iterations[24]: 17
Iterations[25]: 21
Iterations[26]: 21
Iterations[27]: 19
Iterations[28]: 21
Iterations[29]: 19
Iterations[30]: 20
Iterations[31]: 20
Iterations[32]: 21
Iterations[33]: 20
Iterations[34]: 20
Iterations[35]: 20
Iterations[36]: 21
Iterations[37]: 20
Iterations[38]: 22
Iterations[39]: 20
Iterations[40]: 20
Iterations[41]: 22
Iterations[42]: 21
Iterations[43]: 21
Iterations[44]: 20
Iterations[45]: 22
Iterations[46]: 21
Iterations[47]: 23
Iterations[48]: 20
Iterations[49]: 23
Iterations[50]: 19
Iterations[51]: 21
Iterations[52]: 22
Ite