In [1]:
import numpy as np
import pandas

In [2]:
def compute_jaccards_coefficient(sequence_1, sequence_2):
    """ Computes Jaccard's coefficient between two binary sequences.
    :param seqeunce_1: numpy array with first bit sequence
    :param seqeunce_2: numpy array with second bit sequence
    :returns: jaccard's coefficient
    """
    p = np.count_nonzero(np.logical_and(sequence_1, sequence_2))    
    s = np.count_nonzero(np.logical_not(np.logical_or(sequence_1, sequence_2)))
    q = np.count_nonzero(np.logical_and(sequence_1, np.logical_not(sequence_2)))
    r = np.count_nonzero(np.logical_and(np.logical_not(sequence_1), sequence_2))
    j = (q+r)/(p+q+r+s)
    return 1-j

In [3]:
# Example 1
s1 = np.array([0,0,0,0])
s2 = np.array([1,1,1,1])
compute_jaccards_coefficient(s1, s2)

0.0

In [4]:
# Example 2
s1 = np.array([1,1,1,1])
s2 = np.array([1,1,1,1])
compute_jaccards_coefficient(s1, s2)

1.0

In [5]:
df = pandas.read_excel('All_comps.xlsx', sheet_name='ExFP')

In [7]:
mol1_fp = np.asarray(df.iloc[0][1:])
mol2_fp = np.asarray(df.iloc[1][1:])
mol3_fp = np.asarray(df.iloc[2][1:])
mol4_fp = np.asarray(df.iloc[3][1:])
mol5_fp = np.asarray(df.iloc[4][1:])
mol6_fp = np.asarray(df.iloc[5][1:])
mol7_fp = np.asarray(df.iloc[6][1:])
mol8_fp = np.asarray(df.iloc[7][1:])

In [8]:
print('Jaccards coefficient', compute_jaccards_coefficient(mol1_fp, mol8_fp))
print('Jaccards coefficient', compute_jaccards_coefficient(mol2_fp, mol8_fp))
print('Jaccards coefficient', compute_jaccards_coefficient(mol3_fp, mol8_fp))
print('Jaccards coefficient', compute_jaccards_coefficient(mol4_fp, mol8_fp))
print('Jaccards coefficient', compute_jaccards_coefficient(mol5_fp, mol8_fp))
print('Jaccards coefficient', compute_jaccards_coefficient(mol6_fp, mol8_fp))
print('Jaccards coefficient', compute_jaccards_coefficient(mol7_fp, mol8_fp))

Jaccards coefficient 0.671875
Jaccards coefficient 0.5703125
Jaccards coefficient 0.6298828125
Jaccards coefficient 0.5546875
Jaccards coefficient 0.654296875
Jaccards coefficient 0.58203125
Jaccards coefficient 0.6484375
