# Two paths to controlled molecule generation â€” ChemGPT+SMC vs SmileyLlama

In [None]:
!pip install -r requirements.txt
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))


## 1. Prepare ZINC data splits

In [None]:
from src.data_prep import run_data_prep

run_data_prep(
    input_path=None,  # Provide path if zinc250k not under data/
    output_dir="data",
    ranges_path="data/zinc_property_ranges.json",
    seed=42,
    ref_size=240_000,
)


In [None]:
import json
import pandas as pd

with open("data/zinc_property_ranges.json", "r", encoding="utf-8") as fp:
    ranges = json.load(fp)

pd.DataFrame(ranges).T.rename(columns={0: "p05", 1: "p95"})


## 2. Prompts for all models

In [None]:
from src.utils import PROMPTS

prompt_names = [p.name for p in PROMPTS]
pd.DataFrame({"name": [p.name for p in PROMPTS], "prompt": [p.text for p in PROMPTS]})


## 3. Baseline ChemGPT generation

In [None]:
from src import baseline_generate

baseline_generate.run_experiment(
    prompt_names=prompt_names,
    out_csv="results/baseline_results.csv",
    summary_csv="results/baseline_summary.csv",
    n=1_000,
    temperatures=[1.0, 0.7],
    top_p=0.9,
    max_new_tokens=128,
    batch_size=64,
    seed=42,
)


## 4. ChemGPT + GenLM SMC decoding

In [None]:
from src import smc_generate

smc_generate.run_experiment(
    prompt_names=prompt_names,
    out_csv="results/smc_results.csv",
    summary_csv="results/smc_summary.csv",
    n=1_000,
    temperatures=[1.0, 0.7],
    top_p=0.9,
    particles=10,
    ess_threshold=0.5,
    max_new_tokens=128,
    seed=43,
)


## 5. SmileyLlama inference

In [None]:
from src import smiley_generate

smiley_generate.run_experiment(
    prompt_names=prompt_names,
    out_csv="results/smiley_results.csv",
    summary_csv="results/smiley_summary.csv",
    n=1_000,
    temperatures=[1.0],
    top_p=0.9,
    max_new_tokens=128,
    batch_size=50,
    seed=44,
    device=None,
    quantize=True,
)


## 6. Evaluation

In [None]:
from src import evaluate

evaluate.main()


In [None]:
pd.read_csv("results/qed_table.csv")


In [None]:
pd.read_csv("results/panel_table.csv")


## 7. Figures

In [None]:
from src import plots

plots.main()
