In [6]:
import numpy as np
import pandas as pd

In [7]:
def random_walk(inter, rna_cnt, similarity_matrix, prot_id, Wq, Wu, rq, ru, output_file_name, slice_):
    
    verified_association=inter[inter.PROT_ID==prot_id].RNA_ID.to_numpy()
    inter_size=verified_association.shape[0]
    associated_rna=verified_association[0:int(slice_*inter_size)] # slicing the dataset and pretending that we dont know about the remaining (1-slice_)*100% of the interactions associated with 'prot_id'
    # rna in associated_rna == True if rna is labelled node
    
    R=similarity_matrix.copy() # correlation matrix
    for i in range(rna_cnt):
        if i in associated_rna:
            R[i]=R[i]*Wq
        else:
            R[i]=R[i]*Wu
    for i in range(rna_cnt):
        sigma=np.sum(R[i])
        R[i]=R[i]/sigma
    
    Lq=R.copy()
    Lu=R.copy()
    
    for i in range(rna_cnt):
        if i in associated_rna:
            Lu[i]*=0
        else:
            Lq[i]*=0
    
    filt=np.zeros(rna_cnt)
    for i in associated_rna:
        filt[i]=1
    
    mod_Q=associated_rna.shape[0]
    X_init=np.zeros((rna_cnt,1))
    for i in associated_rna:
        X_init[i][0]=1/mod_Q
    
    X=X_init.copy()
    pq=np.matmul(filt,X)[0]
    pu=1-pq
    iter_cnt=0
    # print(pq)
    # print("i:"+str(iter_cnt)+" "+str(X[0][0]))
    
    while(True):
        prev=X.copy()
        X=rq*(np.matmul(Lq.transpose(),prev))+pq*(1-rq)*X_init+ru*(np.matmul(Lu.transpose(),prev))+pu*(1-ru)*X_init
        pq=np.matmul(filt,X)[0]
        pu=1-pq

        if(np.linalg.norm(X-prev,ord=1)<1e-10):
            break
        iter_cnt+=1
        # print("i:"+str(iter_cnt)+" "+str(X[0][0]))
    
    print(f"Iterations[{prot_id}]: {iter_cnt}")
    
    X=np.reshape(X,rna_cnt)
    ser=pd.Series(X)
    
    ser.sort_values(ascending=False, inplace=True)
    
    f=open(output_file_name,'a')
    f.write('\nPROTEIN ID:\t'+str(prot_id)+'\n')
    f.write('RNA\tCorr. score(Sr)\tInteraction verified\n')
    for index, value in ser.items():
        if index not in associated_rna:
            if index in verified_association:
                flag='YES'
            else:
                flag='NO'
            f.write(str(index)+'\t'+str(value)+'\t'+flag+'\n')
    f.close()

In [8]:
rna_sim=pd.read_csv('rna_similarity.txt',sep='\t')
prot_sim=pd.read_csv('prot_similarity.txt',sep='\t')
inter=pd.read_csv('list_of_interactions.txt',sep='\t')

rna_cnt=inter['RNA_ID'].unique().shape[0]
prot_cnt=inter['PROT_ID'].unique().shape[0]

In [9]:
similarity_matrix=np.empty((rna_cnt,rna_cnt))
for i in rna_sim.index:
    similarity_matrix[rna_sim['RNA(i)'][i]][rna_sim['RNA(j)'][i]]=rna_sim['Sim(i,j)'][i]

In [14]:
output_file_name='random_walk_verify_slice_'
slice_=0.8
output_file_name=output_file_name+str(slice_)+'.txt'
Wq=0.8
Wu=0.4
rq=0.8
ru=0.4
open(output_file_name,'w').close()
for i in range(prot_cnt):
    random_walk(inter,rna_cnt,similarity_matrix,i,Wq,Wu,rq,ru,output_file_name,slice_)

Iterations[0]: 21
Iterations[1]: 16
Iterations[2]: 21
Iterations[3]: 21
Iterations[4]: 16
Iterations[5]: 22
Iterations[6]: 18
Iterations[7]: 22
Iterations[8]: 23
Iterations[9]: 21
Iterations[10]: 21
Iterations[11]: 22
Iterations[12]: 22
Iterations[13]: 21
Iterations[14]: 22
Iterations[15]: 22
Iterations[16]: 22
Iterations[17]: 23
Iterations[18]: 21
Iterations[19]: 22
Iterations[20]: 23
Iterations[21]: 23
Iterations[22]: 22
Iterations[23]: 21
Iterations[24]: 20
Iterations[25]: 23
Iterations[26]: 23
Iterations[27]: 22
Iterations[28]: 22
Iterations[29]: 21
Iterations[30]: 22
Iterations[31]: 22
Iterations[32]: 20
Iterations[33]: 22
Iterations[34]: 23
Iterations[35]: 23
Iterations[36]: 23
Iterations[37]: 22
Iterations[38]: 24
Iterations[39]: 23
Iterations[40]: 23
Iterations[41]: 24
Iterations[42]: 22
Iterations[43]: 23
Iterations[44]: 21
Iterations[45]: 23
Iterations[46]: 23
Iterations[47]: 0
Iterations[48]: 21
Iterations[49]: 0
Iterations[50]: 21
Iterations[51]: 23
Iterations[52]: 24
Itera