In [1]:
import json
from openai import OpenAI

with open("openai/api_key.txt") as f:
    api_key = f.read().strip()

### Loading the results

In [2]:
with open("results/iupacs_saliency_map.json", "r") as f:
    iupacs_saliency_map = json.load(f)

with open("results/iupacs_grad_cam.json", "r") as f:
    iupacs_grad_cam = json.load(f)

### LLM

In [3]:
grad_cam_results = {}
saliency_map_results = {}

In [4]:
prompt_targets = {
    "herg": "HERG toxicity",
    "pampa": "PAMPA permeability",
    "cyp": "CYP2A4 inhibition"
}

for dataset_name in ("herg", "pampa", "cyp"):
    total_result = []
    for i in range(len(iupacs_grad_cam[dataset_name])):
        result = []
        for iupac in iupacs_grad_cam[dataset_name][i]:
            client = OpenAI(api_key=api_key)
            completion = client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {
                        "role": "system",
                        "content": "You are a helpful assistant."
                    },
                    {
                        "role": "user",
                        "content": f"""
                        Rate the molecule fragment with the IUPAC name: {iupac} with respect to its impact on the {prompt_targets[dataset_name]}.
                        Output an integer score between 1 and 10, where 1 means the component has a very low impact on the {prompt_targets[dataset_name]}
                        and 10 means the component has a very high impact on the {prompt_targets[dataset_name]}.
                        Do not output anything besides the score.
                        """
                    }
                ]
            )
            result.append(completion.choices[0].message.content)
        total_result.append(result)
    grad_cam_results[dataset_name] = total_result

for dataset_name in ("herg", "pampa", "cyp"):
    total_result = []
    for i in range(len(iupacs_saliency_map[dataset_name])):
        result = []
        for iupac in iupacs_saliency_map[dataset_name][i]:
            client = OpenAI(api_key=api_key)
            completion = client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {
                        "role": "system",
                        "content": "You are a helpful assistant."
                    },
                    {
                        "role": "user",
                        "content": f"""
                        Rate the molecule fragment with the IUPAC name: {iupac} with respect to its impact on the {prompt_targets[dataset_name]}.
                        Output an integer score between 1 and 10, where 1 means the component has a very low impact on the {prompt_targets[dataset_name]}
                        and 10 means the component has a very high impact on the {prompt_targets[dataset_name]}.
                        Do not output anything besides the score.
                        """
                    }
                ]
            )
            result.append(completion.choices[0].message.content)
        total_result.append(result)
    saliency_map_results[dataset_name] = total_result

In [6]:
saliency_map_results["pampa"]

[['3', '5', '5', '3', '5', '7', '3', '7'],
 ['5', '4', '5', '3', '5', '5', '5', '3', '5'],
 ['4', '5', '7', '7', '4', '4', '6', '4', '5'],
 ['5', '6', '7', '3', '5', '3', '8', '3', '5', '5'],
 ['8', '3', '6', '4', '4', '5', '5', '5', '6', '6'],
 ['2', '5', '7', '3', '4', '4', '6', '3']]

In [7]:
with open("results/llm_grad_cam_results.json", "w") as f:
    json.dump(grad_cam_results, f)

with open("results/llm_saliency_map_results.json", "w") as f:
    json.dump(saliency_map_results, f)