In [12]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.DataStructs.cDataStructs import TanimotoSimilarity

# Function to convert SMILES to Morgan fingerprint
def smiles_to_fingerprint(smiles):
    if pd.isna(smiles) or not isinstance(smiles, str) or smiles.strip() == "":
        return None
    try:
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            return AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048)
        else:
            return None
    except:
        return None

# Function to calculate Tanimoto similarity
def calculate_tanimoto_similarity(fp1, fp2):
    if fp1 is None or fp2 is None:
        return None
    return TanimotoSimilarity(fp1, fp2)

# Read the Excel file (change file path as needed)
file_path = "C:/Users/ganes/OneDrive/Desktop/AI/Drug/T_Test.xlsx"
df = pd.read_excel(file_path)

# Assuming the SMILES are in two columns, 'Input' and 'Output'
input_smiles_col = "Input"
output_smiles_col = "Output"

# Convert Input and Output SMILES to fingerprints
df['Input_Fingerprint'] = df[input_smiles_col].apply(smiles_to_fingerprint)
df['Output_Fingerprint'] = df[output_smiles_col].apply(smiles_to_fingerprint)

# Initialize an empty DataFrame to store pairwise Tanimoto similarities
similarity_matrix = pd.DataFrame(index=df.index, columns=df.index)

# Perform pairwise comparison
for i, row_input in df.iterrows():
    for j, row_output in df.iterrows():
        # Calculate Tanimoto similarity between Input SMILES of row i and Output SMILES of row j
        tanimoto_similarity = calculate_tanimoto_similarity(row_input['Input_Fingerprint'], row_output['Output_Fingerprint'])
        similarity_matrix.at[i, j] = tanimoto_similarity

# Save the pairwise similarity matrix to an Excel file
output_file_path = "pairwise_tanimoto_similarity_output.xlsx"
similarity_matrix.to_excel(output_file_path)

print(f"Pairwise Tanimoto similarity calculation completed. Results saved to {output_file_path}.")




Pairwise Tanimoto similarity calculation completed. Results saved to pairwise_tanimoto_similarity_output.xlsx.


In [13]:
for i, row_input in df.iterrows():
    for j, row_output in df.iterrows():
        tanimoto_similarity = calculate_tanimoto_similarity(row_input['Input_Fingerprint'], row_output['Output_Fingerprint'])
        similarity_matrix.at[i, j] = tanimoto_similarity
        
        # Check for similar fingerprints, ensuring tanimoto_similarity is not None
        if tanimoto_similarity is not None and tanimoto_similarity > 0.85:
            print(f"Similar fingerprints found: Input row {i}, Output row {j}, SMILES: {row_input[input_smiles_col]}, {row_output[output_smiles_col]}")


Similar fingerprints found: Input row 3, Output row 261, SMILES: Cc1oncc1/C(S)=N/c1ccc(C#N)cc1, Cc1oncc1/C(S)=N/c2ccc(C#N)cc2
Similar fingerprints found: Input row 10, Output row 252, SMILES: C/C(O)=C(\C#N)C(=O)Nc1ccc(-c2ccccc2)cc1, C/C(O)=C(C#N)/C(Nc1ccc(c2ccccc2)cc1)=O
Similar fingerprints found: Input row 11, Output row 259, SMILES: CCOc1cccc(-c2ccc(NC(=O)/C(C#N)=C(/C)O)cc2)c1, CCOc1cccc(c2ccc(NC(/C(C#N)=C(C)\O)=O)cc2)c1
Similar fingerprints found: Input row 12, Output row 253, SMILES: C/C(O)=C(\C#N)C(=O)Nc1ccc(-c2ccccc2Cl)cc1, C/C(O)=C(C#N)/C(Nc1ccc(c2ccccc2Cl)cc1)=O
Similar fingerprints found: Input row 22, Output row 258, SMILES: CCOC(=O)/C(C#N)=C\Nc1ccc(-c2ccccc2)cc1, CCOC(/C(C#N)=C/Nc1ccc(c2ccccc2)cc1)=O
Similar fingerprints found: Input row 27, Output row 256, SMILES: CC/C(O)=C(\C#N)C(=O)Nc1ccc(-c2ccccc2Cl)c(C(=O)OC)c1, CC/C(O)=C(C#N)/C(Nc1ccc(c2ccccc2Cl)c(C(OC)=O)c1)=O
Similar fingerprints found: Input row 28, Output row 254, SMILES: CC/C(O)=C(\C#N)C(=O)Nc1ccc(-c2ccc(F)cc2F)c

In [14]:
import pandas as pd

# List to hold similar fingerprint results
similar_fingerprints = []

for i, row_input in df.iterrows():
    for j, row_output in df.iterrows():
        tanimoto_similarity = calculate_tanimoto_similarity(row_input['Input_Fingerprint'], row_output['Output_Fingerprint'])
        similarity_matrix.at[i, j] = tanimoto_similarity
        
        # Check for similar fingerprints, ensuring tanimoto_similarity is not None
        if tanimoto_similarity is not None and tanimoto_similarity > 0.85:
            similar_fingerprints.append({
                'Input Row': i,
                'Output Row': j,
                'Input SMILES': row_input[input_smiles_col],
                'Output SMILES': row_output[output_smiles_col],
                'Tanimoto Similarity': tanimoto_similarity
            })

# Create a DataFrame from the similar fingerprints list
similar_df = pd.DataFrame(similar_fingerprints)

# Save to Excel
output_file = 'similar_fingerprints.xlsx'
similar_df.to_excel(output_file, index=False)

print(f"Similar fingerprints saved to {output_file}.")


Similar fingerprints saved to similar_fingerprints.xlsx.
