# Evaluation

In [1]:
# Prediction accuracy may depend on whether stereochemistry is preserved or removed
# We should calculate both accuracies
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
from rdkit import RDLogger 
import re
RDLogger.DisableLog('rdApp.*')
from tqdm import tqdm

In [2]:
def calc_accuracy(path_ground_truth, path_predictions):
    """
    path_ground_truth: path to ground truth file (.txt)
    path_predictions: path to predictions file (.txt)
    """
    # Step 1: Read in the two text files
    with open(path_ground_truth, 'r') as f_gt, open(path_predictions, 'r') as f_pred:
        gt_lines = f_gt.readlines()
        pred_lines = f_pred.readlines()
    
    # Step 2: Check for equal length
    assert len(gt_lines) == len(pred_lines), "Ground truth and predictions lists must have the same length."
    
    # Step 3: Initialize variables
    acc_with_stereo = 0
    acc_wo_stereo = 0
    invalid_smiles = 0
    
    # Step 4: Compare SMILES strings
    for gt_smiles, pred_smiles in tqdm(zip(gt_lines, pred_lines), total=len(gt_lines)):
        gt_mol = Chem.MolFromSmiles(gt_smiles.strip().replace(" ", ""))
        pred_mol = Chem.MolFromSmiles(pred_smiles.strip().replace(" ", ""))
        
        if gt_mol is not None and pred_mol is not None:
            # Step 4.1: Check with stereo
            if set(Chem.MolToSmiles(gt_mol)) == set(Chem.MolToSmiles(pred_mol)):
                acc_with_stereo += 1
                acc_wo_stereo += 1
            else:
                # Step 4.2: Check without stereo
                Chem.RemoveStereochemistry(gt_mol)
                Chem.RemoveStereochemistry(pred_mol)
                if set(Chem.MolToSmiles(gt_mol)) == set(Chem.MolToSmiles(pred_mol)):
                    acc_wo_stereo += 1
        else:
            invalid_smiles += 1
    
    # Step 5: Calculate and print accuracy
    total_samples = len(gt_lines)
    accuracy_with_stereo = acc_with_stereo / total_samples
    accuracy_without_stereo = acc_wo_stereo / total_samples
    invalid_smiles_ratio = invalid_smiles / total_samples
    
    print(f"Accuracy with stereochemical information: {accuracy_with_stereo:.2%}")
    print(f"Accuracy without stereochemical information: {accuracy_without_stereo:.2%}")
    print(f"Invalid SMILES ratio: {invalid_smiles_ratio:.2%}")

# # Example usage
# calc_accuracy("ground_truth.txt", "predictions.txt")


In [4]:
parent = '/Users/dsw46/Projects_local/ORDerly_jcim_response/transformer_data/'

In [6]:

# orderly_forward_separated
ground_truth_path = parent + 'orderly_forward_separated/tgt-test.txt'
predictions_path = parent + 'orderly_forward_separated/predictions_ofs_20_on_orderly_forward_separated_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 172535/172535 [01:40<00:00, 1719.08it/s]

Accuracy with stereochemical information: 83.86%
Accuracy without stereochemical information: 85.84%
Invalid SMILES ratio: 0.34%





In [10]:
# orderly_forward_mixed
ground_truth_path = parent + 'orderly_forward_mixed/tgt-test.txt'
predictions_path = parent + 'orderly_forward_mixed/predictions_ofm_20_on_orderly_forward_mixed_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 172535/172535 [01:38<00:00, 1750.40it/s]

Accuracy with stereochemical information: 81.96%
Accuracy without stereochemical information: 83.99%
Invalid SMILES ratio: 0.36%





In [13]:
# orderly_retro
ground_truth_path = parent + 'orderly_retro/tgt-test.txt'
predictions_path = parent + 'orderly_retro/predictions_or_20_on_orderly_retro_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 174161/174161 [02:08<00:00, 1355.50it/s]

Accuracy with stereochemical information: 51.28%
Accuracy without stereochemical information: 52.30%
Invalid SMILES ratio: 0.21%





In [25]:
# orderly_forward_separated
# non uspto
ground_truth_path = parent + 'non_uspto_forward_separated/tgt-test.txt'
predictions_path = parent + 'non_uspto_forward_separated/predictions_ofs_20_on_not_uspto_forward_separated_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 50210/50210 [00:30<00:00, 1621.25it/s]

Accuracy with stereochemical information: 66.10%
Accuracy without stereochemical information: 66.92%
Invalid SMILES ratio: 0.40%





In [23]:
# orderly_forward_mixed
# non uspto
ground_truth_path = parent + 'non_uspto_forward_mixed/tgt-test.txt'
predictions_path = parent + 'non_uspto_forward_mixed/predictions_ofm_20_on_not_uspto_forward_mixed_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 50210/50210 [00:28<00:00, 1754.40it/s]

Accuracy with stereochemical information: 84.12%
Accuracy without stereochemical information: 85.20%
Invalid SMILES ratio: 0.27%





In [24]:
# orderly_retro
# non uspto
ground_truth_path = parent + 'non_uspto_retro/tgt-test.txt'
predictions_path = parent + 'non_uspto_retro/predictions_or_20_on_not_uspto_retro_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 49206/49206 [00:39<00:00, 1237.86it/s]

Accuracy with stereochemical information: 37.22%
Accuracy without stereochemical information: 37.42%
Invalid SMILES ratio: 0.27%



