In [None]:
import numpy as np
import pandas as pd
from cliffs import get_similarity_matrix
from json import load
from chython import smiles, GridDepict

In [None]:
# Load target sequences and drugs from respective JSON files
targets = load(open('../data/KIBA/target_seq.txt'))
drugs = load(open('../data/KIBA/SMILES.txt'))

# Load drug-target affinity data from a CSV file into a DataFrame
# The last column is excluded from the data as it has only NAs
affinity = pd.read_csv('../data/KIBA/affinity.txt', sep='\t', header=None).iloc[:, :-1].copy()

# Map drug ids to indices (rows) and target ids to columns
affinity.index = drugs
affinity.columns = targets

In [None]:
# Calculate the similarity matrix for the drug molecules using SMILES strings
sim = get_similarity_matrix(list(drugs.values()))

# Find indices of non-zero elements in the similarity matrix, indicating pairs of similar drugs
i, j = sim.nonzero()

# Select corresponding rows from the affinity DataFrame for these drug pairs d1-d2
# d1 is the first drug in the pair
# d2 is the second drug in the pair
d1 = affinity.iloc[i]
d2 = affinity.iloc[j]

# Find 1x difference in affinities (KIBA values)
col_slice = (np.abs(d1.values - d2.values) > 1).sum(0) > 0

# Concatenate and sort the data for drugs with significant differences in affinity
found = pd.concat([d1.iloc[:, col_slice].reset_index(), d2.iloc[:, col_slice].reset_index()]).sort_index()
found

In [None]:
# Visualize the structures of the drugs with significant differences in affinity
GridDepict([smiles(drugs[x]) for x in found['index']], cols=2, labels=found['index'])