In [None]:
import numpy as np
import pandas as pd
from cliffs import get_similarity_matrix
from json import load
from chython import smiles, GridDepict

In [None]:
# Load target sequences and drugs from respective JSON files
targets = load(open('../data/DAVIS/target_seq.txt'))
drugs = {int(k): v for k, v in load(open('../data/DAVIS/SMILES.txt')).items()}

# Load drug-target affinity data from a CSV file into a DataFrame
affinity = pd.read_csv('../data/DAVIS/affinity.txt', sep=' ', header=None)

# Map drug ids to indices (rows) and target ids to columns
affinity.index = drugs
affinity.columns = targets

In [None]:
affinity

In [None]:
# Transform the 'affinity' DataFrame from wide format to long format
unpivoted = affinity.stack().reset_index()

# Rename the columns of the transformed DataFrame for clarity
unpivoted.columns = ['drug', 'target', 'affinity']
unpivoted

In [None]:
# Calculate the similarity matrix for the drug molecules
sim = get_similarity_matrix(list(drugs.values()))
# Find non-zero elements in the similarity matrix, indicating pairs of similar drugs
i, j = sim.nonzero()

sim

In [None]:
i, j

In [None]:
groups = []

# Loop through each group in the DataFrame 'unpivoted' grouped by 'target'
# 'g_name' holds the name of the target, 'group' contains the corresponding rows
for g_name, group in unpivoted.groupby('target', sort=False):
    # Select corresponding rows from the affinity DataFrame for these drug pairs d1-d2
    # 'd1' and 'd2' represent the first and second drug in the pair respectively
    d1 = group.iloc[i]
    d2 = group.iloc[j]
    
    # Calculate the 1-fold difference in affinity between the two drugs
    # 'affinity_diff' is a boolean array where True indicates a significant difference
    affinity_diff = np.abs(np.log10(d1.affinity.values) - np.log10(d2.affinity.values)) > 1
    
    # Select rows from d1 and d2 where the affinity difference is significant
    cliff1 = d1.iloc[affinity_diff]
    cliff2 = d2.iloc[affinity_diff]
    
    # Pair up corresponding rows from cliff1 and cliff2 side by side
    paired = pd.concat([cliff1.reset_index(drop=True), cliff2.reset_index(drop=True)], axis=1)
    # Rename columns for clarity
    paired.columns = ['drug1', 'target', 'affinity1', 'drug2', 'remove', 'affinity2']
    
    # Rearrange and select only relevant columns
    paired = paired[['target', 'drug1', 'drug2', 'affinity1', 'affinity2']].copy()
    
    # Add the paired DataFrame to the groups list
    groups.append(paired)

# Concatenate all group DataFrames into a single DataFrame
groups = pd.concat(groups)

In [None]:
# Save to csv file
groups.to_csv('../analysis/davis.csv', index=False)

In [None]:
groups

In [None]:
# Compare random values
affinity.loc[[5291,10074640]]