In [16]:
import pandas as pd
import numpy as np
import scipy as sp
import os
from scipy.sparse import csr_matrix, lil_matrix, eye
from scipy.sparse.linalg import spsolve

In [3]:
dir_name = os.path.join("..", "Data")
df_human = pd.read_csv(os.path.join(dir_name, "9606.protein.links.full.v12.0.txt"), sep=" ")
df_rvfv = pd.read_csv(os.path.join(dir_name, "string_interactions.tsv"), sep="\t")


In [4]:
df_human

Unnamed: 0,protein1,protein2,neighborhood,neighborhood_transferred,fusion,cooccurence,homology,coexpression,coexpression_transferred,experiments,experiments_transferred,database,database_transferred,textmining,textmining_transferred,combined_score
0,9606.ENSP00000000233,9606.ENSP00000356607,0,0,0,0,0,0,45,0,134,0,0,0,81,173
1,9606.ENSP00000000233,9606.ENSP00000427567,0,0,0,0,0,0,0,0,128,0,0,0,70,154
2,9606.ENSP00000000233,9606.ENSP00000253413,0,0,0,0,0,49,111,0,49,0,0,0,69,151
3,9606.ENSP00000000233,9606.ENSP00000493357,0,0,0,0,0,56,0,0,53,0,0,433,81,471
4,9606.ENSP00000000233,9606.ENSP00000324127,0,0,0,0,0,0,0,0,46,0,0,153,91,201
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13715399,9606.ENSP00000501317,9606.ENSP00000475489,0,0,0,0,0,60,0,0,99,0,0,126,0,195
13715400,9606.ENSP00000501317,9606.ENSP00000370447,0,0,0,0,0,0,55,0,111,0,0,0,79,158
13715401,9606.ENSP00000501317,9606.ENSP00000312272,0,0,0,0,0,0,0,0,0,0,0,187,88,226
13715402,9606.ENSP00000501317,9606.ENSP00000402092,0,0,0,0,0,0,0,0,67,0,0,146,0,169


In [5]:
unique_prots = set(df_human["protein1"]) | set(df_human["protein2"])
#print(len(unique_prots))
unique_prots = list(unique_prots)
prot_map = {p:i for i,p in enumerate(unique_prots)}
prot_map


{'9606.ENSP00000348708': 0,
 '9606.ENSP00000226574': 1,
 '9606.ENSP00000451030': 2,
 '9606.ENSP00000260327': 3,
 '9606.ENSP00000460683': 4,
 '9606.ENSP00000350704': 5,
 '9606.ENSP00000362139': 6,
 '9606.ENSP00000358098': 7,
 '9606.ENSP00000410396': 8,
 '9606.ENSP00000306900': 9,
 '9606.ENSP00000221132': 10,
 '9606.ENSP00000320184': 11,
 '9606.ENSP00000362888': 12,
 '9606.ENSP00000290871': 13,
 '9606.ENSP00000356584': 14,
 '9606.ENSP00000417806': 15,
 '9606.ENSP00000312129': 16,
 '9606.ENSP00000344140': 17,
 '9606.ENSP00000219139': 18,
 '9606.ENSP00000367050': 19,
 '9606.ENSP00000354090': 20,
 '9606.ENSP00000287156': 21,
 '9606.ENSP00000359490': 22,
 '9606.ENSP00000002596': 23,
 '9606.ENSP00000301454': 24,
 '9606.ENSP00000499406': 25,
 '9606.ENSP00000228468': 26,
 '9606.ENSP00000355752': 27,
 '9606.ENSP00000297029': 28,
 '9606.ENSP00000334197': 29,
 '9606.ENSP00000352233': 30,
 '9606.ENSP00000390011': 31,
 '9606.ENSP00000260653': 32,
 '9606.ENSP00000261714': 33,
 '9606.ENSP00000349541':

In [14]:
# Generate list of unique host proteins that interact with NSs
rvfv_prot_list = set(pd.concat([df_rvfv["node1_external_id"], df_rvfv["node2_external_id"]], axis=0, ignore_index=True))
rvfv_interactor_list = {p for p in rvfv_prot_list if "9606.ENSP" in p}

# Create dictionary mapping NSs interactor protein names to their array index
s_array = np.zeros(len(unique_prots))
for prot in rvfv_interactor_list:
    if prot in prot_map:
        s_array[prot_map[prot]] = 1.0
    else:
        print(f'Protein {prot} does not interact with any human proteins.')


Protein 9606.ENSP00000360286 does not interact with any human proteins.
Protein 9606.ENSP00000353622 does not interact with any human proteins.
Protein 9606.ENSP00000356348 does not interact with any human proteins.
Protein 9606.ENSP00000384144 does not interact with any human proteins.


In [9]:
# Initialize ones array corresponding to NSs interactor proteins
s_array.sum()


6.0

In [28]:
# Pair human protein columns, map protein names to matrix coordinates, change matrix value to one at that position (and inverse position)
w_sparse_matrix = lil_matrix((len(prot_map), len(prot_map)))

for prot1, prot2 in zip(df_human['protein1'], df_human['protein2']):
    w_sparse_matrix[prot_map[prot1], prot_map[prot2]] = 1
    w_sparse_matrix[prot_map[prot2], prot_map[prot1]] = 1

w_sparse_matrix = w_sparse_matrix.tocsr()



In [34]:
D = np.sqrt(w_sparse_matrix.sum(-1))
D[D < 1] = 1
w_sparse_matrix = w_sparse_matrix.multiply(1.0/D)
w_sparse_matrix = w_sparse_matrix.multiply(1.0/D.T)

#w_sparse_matrix = w_sparse_matrix / D / D.T

In [36]:
alpha = 0.2
M = (1 + alpha) * eye(len(unique_prots)) - (w_sparse_matrix * alpha)

In [None]:
y_array = spsolve(M,s_array)

In [38]:
type(s_array)

numpy.ndarray

In [32]:
w_sparse_matrix.shape

(19622, 19622)

In [37]:
type(M)

scipy.sparse.csr.csr_matrix