## FUTURE WORK:
- Analyze each network and extract interesting residues / interactions:
  - Top 5 highest-degree residue(s) and their "type", and their residue name.
  - Residues involved in a particular set of interactions (ID a motif if there is one of interest??? design a motif? check what a particular set of interactions might mean relative to the 3D structure)
- Compare to a set of several random protein structures to debias from, say, secondary-structure elements?
  - A few "types", i.e. 
    - Mostly β-barrel
    - Mostly α-helix
    - Unstructured
    - Etc.
- **Randomize the networks** and check the values of the above variables! Why would they be different or stay the same?
- Analyze this "mega" network and compare to evolutionary relationship between them

In [None]:
# load in other sifs
!wget https://github.com/picodase/cs446_final/blob/main/3fli.sif
df_fxr = pd.read_csv("3fli.sif", sep='\t', names=["resA", "interacType","resB"])

!wget https://github.com/picodase/cs446_final/blob/main/1FAW.sif
df_FAW = pd.read_csv("1FAW.sif", sep='\t', names=["resA", "interacType","resB"])

!wget https://github.com/picodase/cs446_final/blob/main/1FSX.sif
df_FSX = pd.read_csv("1FSX.sif", sep='\t', names=["resA", "interacType","resB"])

In [None]:
x_path = "."

x_rin_dfs = []
x_fileNames = []
for file in os.scandir(x_path):
    if (file.path.endswith(".sif") and file.is_file()):
      print(file.name.replace(".sif",""))
      x_fileNames.append(file.name.split(".")[0])
      x_rin_dfs.append(pd.read_csv(file, sep='\t', names=["resA", "interacType","resB"]))

In [None]:
# create lists to hold each set of values for the specified interaction types
x_interac_cts = []
x_edgelists = []
x_graphs = []
x_thr_motifs = []
x_four_motifs = []

for df in x_rin_dfs:
  # interaction counts
  x_interac_cts.append(df.interacType.value_counts())

  # interaction types
  i = {}
  #i["tot"] = df[['resA','resB']].values.tolist()
  i["hbd"] = df[df.interacType.isin(hbond)][['resA','resB']].values.tolist()
  i["vdw"] = df[df.interacType.isin(vdw)][['resA','resB']].values.tolist()
  i["lig"] = df[df.interacType.isin(lig)][['resA','resB']].values.tolist()
  i["ππ"] = df[df.interacType.isin(ππ)][['resA','resB']].values.tolist()
  x_edgelists.append(i)

  # graphs
  g = {}
  #g["tot"] = igraph.Graph.TupleList(i["tot"])
  g["hbd"] = igraph.Graph.TupleList(i["hbd"], directed=True)
  g["vdw"] = igraph.Graph.TupleList(i["vdw"])
  g["lig"] = igraph.Graph.TupleList(i["lig"])
  g["ππ"] = igraph.Graph.TupleList(i["ππ"])
  x_graphs.append(g)

  # calculate motifs for each graph

  # three-motifs
  t_m = {}
  #t_m["tot"] = g["tot"].motifs_randesu()
  t_m["hbd"] = g["hbd"].motifs_randesu()
  t_m["vdw"] = g["vdw"].motifs_randesu()
  t_m["lig"] = g["lig"].motifs_randesu()
  t_m["ππ"] = g["ππ"].motifs_randesu()
  x_thr_motifs.append(t_m)

  # four-motifs
  f_m = {}
  #f_m["tot"] = g["tot"].motifs_randesu(size=4)
  f_m["hbd"] = g["hbd"].motifs_randesu(size=4)
  f_m["vdw"] = g["vdw"].motifs_randesu(size=4)
  f_m["lig"] = g["lig"].motifs_randesu(size=4)
  f_m["ππ"] = g["ππ"].motifs_randesu(size=4)
  x_four_motifs.append(f_m)

In [None]:
#Cosine Correlation Matrix for interac_cts
x_rin_interact_data = pd.DataFrame(x_interac_cts, index=fileNames).T

def dfCosSim(n1: np.ndarray, n2: np.ndarray):
  return cosine_similarity(n1.reshape(1,-1), n2.reshape(1,-1))

res = x_rin_interact_data.corr(dfCosSim)
res.style.background_gradient(cmap='coolwarm')

In [None]:
#Cosine Correlation Matrices for 3 motif
x_four_motif_hbd = [d['hbd'] for d in x_four_motifs]
x_four_motif_vdw = [d['vdw'] for d in x_four_motifs]
x_four_motif_lig = [d['lig'] for d in x_four_motifs]
x_four_motif_pipi = [d['ππ'] for d in x_four_motifs]

x_rn_fourmotif_hbd_data = pd.DataFrame(x_four_motif_hbd, index=x_fileNames).T
x_rn_fourmotif_vdw_data = pd.DataFrame(x_four_motif_vdw, index=x_fileNames).T
x_rn_fourmotif_lig_data = pd.DataFrame(x_four_motif_lig, index=x_fileNames).T
x_rn_fourmotif_pipi_data = pd.DataFrame(x_four_motif_pipi, index=x_fileNames).T

x_rn_fourmotif_hbd_corr = x_rn_fourmotif_hbd_data.corr(dfCosSim)
x_rn_fourmotif_vdw_corr = x_rn_fourmotif_vdw_data.corr(dfCosSim)
x_rn_fourmotif_lig_corr = x_rn_fourmotif_lig_data.corr(dfCosSim)
x_rn_fourmotif_pipi_corr = x_rn_fourmotif_pipi_data.corr(dfCosSim)

### Pairwise comparisons for all networks in a collection

In [None]:
x_rn_fourmotif_hbd_corr.style.background_gradient(cmap='coolwarm',axis=None)

In [None]:
x_rn_fourmotif_vdw_corr.style.background_gradient(cmap='coolwarm',axis=None)

In [None]:
x_rn_fourmotif_lig_corr.style.background_gradient(cmap='coolwarm',axis=None)

In [None]:
x_rn_fourmotif_pipi_corr.style.background_gradient(cmap='coolwarm',axis=None)

In [None]:
import itertools

# h-bonding: fxr, hb1, hb2, hb3
hbds = [fxr["hbd"],graphs[0]["hbd"],graphs[3]["hbd"],graphs[6]["hbd"]]
for pair in itertools.combinations(hbds, r=2):
    print([tmCosSim(pair[0], pair[1])[0][0], qmCosSim(pair[0], pair[1])[0][0]])

In [None]:
# van der Waals interactions: fxr, hb1, hb2, hb3

vdws = [fxr["vdw"],graphs[0]["vdw"],graphs[3]["vdw"],graphs[6]["vdw"]]
for pair in itertools.combinations(vdws, r=2):
    print([tmCosSim(pair[0], pair[1])[0][0], qmCosSim(pair[0], pair[1])[0][0]])

In [None]:
# ligand: fxr, hb1, hb2, hb3
ligs = [fxr["lig"],graphs[0]["lig"],graphs[3]["lig"],graphs[6]["lig"]]
for pair in itertools.combinations(ligs, r=2):
    print([tmCosSim(pair[0], pair[1])[0][0], qmCosSim(pair[0], pair[1])[0][0]])

In [None]:
# ππ: fxr, hb1, hb2, hb3
ππs = [fxr["ππ"],graphs[0]["ππ"],graphs[1]["ππ"],graphs[2]["ππ"]]

#for pair in itertools.combinations(ππs, r=2):
#    print([tmCosSim(pair[0], pair[1])[0][0], qmCosSim(pair[0], pair[1])[0][0]])

In [None]:
# fxr, hb1, hb2, hb3

dfs = [df_fxr,rin_dfs[0],rin_dfs[1],rin_dfs[6]]
for pair in itertools.combinations(dfs, r=2):
    print(icCosSim(pair[0], pair[1])[0][0])