In [None]:
import numpy as np
import pandas as pd
from cliffs import get_similarity_matrix
from json import load
from chython import smiles, GridDepict

In [None]:
# Load target sequences and drugs from respective JSON files
targets = load(open('../data/KIBA/target_seq.txt'))
drugs = load(open('../data/KIBA/SMILES.txt'))

# Load drug-target affinity data from a CSV file into a DataFrame
# The last column is excluded from the data as it has only NAs
affinity = pd.read_csv('../data/KIBA/affinity.txt', sep='\t', header=None).iloc[:, :-1].copy()

# Map drug ids to indices (rows) and target ids to columns
affinity.index = drugs
affinity.columns = targets

In [None]:
affinity

In [None]:
# Transform the 'affinity' DataFrame from wide format to long format
unpivoted = affinity.stack().reset_index()

# Rename the columns of the transformed DataFrame for clarity
unpivoted.columns = ['drug', 'target', 'affinity']
unpivoted

In [None]:
groups = []

# Loop through each group in the DataFrame 'unpivoted' grouped by 'target'
# 'g_name' holds the name of the target, 'group' contains the corresponding rows
for g_name, group in unpivoted.groupby('target', sort=False):
    
    # Calculate the similarity matrix for the drug molecules
    sim = get_similarity_matrix([drugs[x] for x in group.drug])
    # Find non-zero elements in the similarity matrix, indicating pairs of similar drugs
    i, j = sim.nonzero()
    
    # Select corresponding rows from the affinity DataFrame for these drug pairs d1-d2
    # 'd1' and 'd2' represent the first and second drug in the pair respectively
    d1 = group.iloc[i]
    d2 = group.iloc[j]
    
    # Calculate the 1x difference in affinity between the two drugs (KIBA values)
    affinity_diff = np.abs(d1.affinity.values - d2.affinity.values) > 1
    
    # Select rows from d1 and d2 where the affinity difference is significant
    cliff1 = d1.iloc[affinity_diff]
    cliff2 = d2.iloc[affinity_diff]
    
    # Pair up corresponding rows from cliff1 and cliff2 side by side
    paired = pd.concat([cliff1.reset_index(drop=True), cliff2.reset_index(drop=True)], axis=1)
    paired.columns = ['drug1', 'target', 'affinity1', 'drug2', 'remove', 'affinity2']
    paired = paired[['target', 'drug1', 'drug2', 'affinity1', 'affinity2']].copy()
    groups.append(paired)

# Concatenate all group DataFrames into a single DataFrame
groups = pd.concat(groups)

In [None]:
groups.to_csv('../analysis/kiba.csv', index=False)

In [None]:
groups

In [None]:
# Compare random values
affinity.loc[['CHEMBL1230790','CHEMBL1241582']]