In [None]:
"""                             iSIM_MODULES
    ----------------------------------------------------------------------
    
    Miranda-Quintana Group, Department of Chemistry, University of Florida 
    
    ----------------------------------------------------------------------
    
    Please, cite the original paper on iSIM:

    ----------------------------------------------------------------------

    This file contains an example of generation of simulated binary and real-number
    fingerprints and their comparison using the iSIM method.

    """

In [1]:
from isim_comp import calculate_isim
from isim_utils import pairwise_average, pairwise_average_real
from isim_real import *
import numpy as np
import random

In [2]:
# For binary fingerprints
# Number of datasets to simulate
n_datasets = 10 #---> in the original paper, we used 10000 datasets

# Similarity index to use
n_ary = 'SM'

# Initialize lists to store iSIM and pairwise values
isim_values = []
pair_values = []

for rep in range(n_datasets):

    # Generate random binary fingerprints
    fp_total = random.randint(10, 100) #---> in the original paper, we used 100-1000 fingerprints
    fp_size = random.randint(166, 2049) #---> in the original paper, we used fingerprints of 166-2048 bits
    bias = np.random.uniform(0.01,1) #---> the bias is to cover the whole range of possible similarity values
    total_fingerprints = np.random.choice([int(0),int(1)], size=(fp_total, fp_size), p = [bias, 1 - bias])

    # Append values 
    isim_values.append(calculate_isim(total_fingerprints, n_ary = n_ary))
    pair_values.append(pairwise_average(total_fingerprints, n_ary = n_ary))


# Print results of the comparison of the iSIM and pairwise values
print('R2:', np.corrcoef(isim_values, pair_values)[0,1]**2)
print('MAE:', np.mean(np.abs(np.array(isim_values) - np.array(pair_values))))
print('RMSE:', np.sqrt(np.mean((np.array(isim_values) - np.array(pair_values))**2)))


R2: 1.0
MAE: 3.3306690738754695e-17
RMSE: 7.850462293418876e-17


In [6]:
# For real-number fingerprints
# Number of datasets to simulate
n_datasets = 100 #---> in the original paper, we used 10000 datasets

# Similarity index to use
n_ary = 'JT'

# Choose the iSIM function to use
if n_ary == 'RR': isim_func = isim_rr
elif n_ary == 'SM': isim_func = isim_sm
elif n_ary == 'JT': isim_func = isim_jt

# Initialize lists to store iSIM and pairwise values
isim_values = []
pair_values = []

# Generate random real-number fingerprints and compute isim and pairwise values
for rep in range(n_datasets):

    # Random number of fingerprints and size
    fp_total = random.randint(10, 100) # ---> in the original paper, we used 100-1000 fingerprints
    fp_size = random.randint(166, 2049) # ---> in the original paper, we used fingerprints of 166-2048 bits

    # Random matrix generation
    total_fingerprints = np.random.random((fp_total, fp_size))

    # Append values
    isim_values.append(isim_func(total_fingerprints))
    pair_values.append(pairwise_average_real(total_fingerprints, n_ary = n_ary))

# Print results of the comparison of the iSIM and pairwise values
print('R2:', np.corrcoef(isim_values, pair_values)[0,1]**2)
print('MAE:', np.mean(np.abs(np.array(isim_values) - np.array(pair_values))))
print('RMSE:', np.sqrt(np.mean((np.array(isim_values) - np.array(pair_values))**2)))

R2: 0.9994563950870287
MAE: 2.68910065286454e-05
RMSE: 4.699100306762527e-05
