# Evaluation

In [10]:
# Prediction accuracy may depend on whether stereochemistry is preserved or removed
# We should calculate both accuracies
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
from rdkit import RDLogger 
import re
RDLogger.DisableLog('rdApp.*')
from tqdm import tqdm

In [11]:
def calc_accuracy(path_ground_truth, path_predictions):
    """
    path_ground_truth: path to ground truth file (.txt)
    path_predictions: path to predictions file (.txt)
    """
    # Step 1: Read in the two text files
    with open(path_ground_truth, 'r') as f_gt, open(path_predictions, 'r') as f_pred:
        gt_lines = f_gt.readlines()
        pred_lines = f_pred.readlines()
    
    # Step 2: Check for equal length
    assert len(gt_lines) == len(pred_lines), "Ground truth and predictions lists must have the same length."
    
    # Step 3: Initialize variables
    acc_with_stereo = 0
    acc_wo_stereo = 0
    invalid_smiles = 0
    
    # Step 4: Compare SMILES strings
    for gt_smiles, pred_smiles in tqdm(zip(gt_lines, pred_lines), total=len(gt_lines)):
        gt_mol = Chem.MolFromSmiles(gt_smiles.strip().replace(" ", ""))
        pred_mol = Chem.MolFromSmiles(pred_smiles.strip().replace(" ", ""))
        
        if gt_mol is not None and pred_mol is not None:
            # Step 4.1: Check with stereo
            if set(Chem.MolToSmiles(gt_mol)) == set(Chem.MolToSmiles(pred_mol)):
                acc_with_stereo += 1
                acc_wo_stereo += 1
            else:
                # Step 4.2: Check without stereo
                Chem.RemoveStereochemistry(gt_mol)
                Chem.RemoveStereochemistry(pred_mol)
                if set(Chem.MolToSmiles(gt_mol)) == set(Chem.MolToSmiles(pred_mol)):
                    acc_wo_stereo += 1
        else:
            invalid_smiles += 1
    
    # Step 5: Calculate and print accuracy
    total_samples = len(gt_lines)
    accuracy_with_stereo = acc_with_stereo / total_samples
    accuracy_without_stereo = acc_wo_stereo / total_samples
    invalid_smiles_ratio = invalid_smiles / total_samples
    
    print(f"Accuracy with stereochemical information: {accuracy_with_stereo:.2%}")
    print(f"Accuracy without stereochemical information: {accuracy_without_stereo:.2%}")
    print(f"Invalid SMILES ratio: {invalid_smiles_ratio:.2%}")

# # Example usage
# calc_accuracy("ground_truth.txt", "predictions.txt")


In [3]:
# orderly_forward_separated
ground_truth_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/orderly_forward_separated/tgt-test.txt'
predictions_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/orderly_forward_separated/results/predictions_ofs_1_on_orderly_forward_separated_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 172238/172238 [02:45<00:00, 1041.07it/s]

Accuracy with stereochemical information: 82.18%
Accuracy without stereochemical information: 84.31%
Invalid SMILES ratio: 0.46%





In [4]:
# orderly_forward_mixed
ground_truth_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/orderly_forward_mixed/tgt-test.txt'
predictions_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/orderly_forward_mixed/results/predictions_ofm_1_on_orderly_forward_mixed_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 172237/172237 [02:38<00:00, 1089.62it/s]

Accuracy with stereochemical information: 80.79%
Accuracy without stereochemical information: 82.86%
Invalid SMILES ratio: 0.47%





In [5]:
# orderly_retro
ground_truth_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/orderly_retro/tgt-test.txt'
predictions_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/orderly_retro/results/predictions_or_1_on_orderly_retro_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 174009/174009 [02:48<00:00, 1034.12it/s]

Accuracy with stereochemical information: 49.96%
Accuracy without stereochemical information: 50.99%
Invalid SMILES ratio: 0.25%





In [12]:
# orderly_forward_separated
# non uspto
ground_truth_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/not_uspto_forward_separated/tgt-test.txt'
predictions_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/orderly_forward_separated/results/predictions_ofs_1_on_not_uspto_forward_separated_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 48368/48368 [00:39<00:00, 1212.43it/s]

Accuracy with stereochemical information: 64.75%
Accuracy without stereochemical information: 65.38%
Invalid SMILES ratio: 0.37%





In [7]:
# orderly_forward_mixed
# non uspto
ground_truth_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/not_uspto_forward_mixed/tgt-test.txt'
predictions_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/orderly_forward_mixed/results/predictions_ofm_1_on_not_uspto_forward_mixed_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 48274/48274 [00:42<00:00, 1141.48it/s]

Accuracy with stereochemical information: 82.61%
Accuracy without stereochemical information: 83.62%
Invalid SMILES ratio: 0.31%





In [8]:
# orderly_retro
# non uspto
ground_truth_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/not_uspto_retro/tgt-test.txt'
predictions_path = '/Users/dsw46/Projects_local/orderly_reviewer_response/orderly_transformer_datasets/orderly_retro/results/predictions_or_1_on_not_uspto_retro_test.txt'
calc_accuracy(ground_truth_path, predictions_path)

100%|██████████| 28601/28601 [00:30<00:00, 929.69it/s] 

Accuracy with stereochemical information: 42.28%
Accuracy without stereochemical information: 42.47%
Invalid SMILES ratio: 0.09%



