In [1]:
pip install rdkit


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
from rdkit import Chem
from rdkit.Chem import AllChem

# List of common molecules (you can expand this list)
molecules = [
    "CCO",  # Ethanol
    "CC(=O)O",  # Acetic acid
    "C1=CC=CC=C1",  # Benzene
    "CCOC",  # Dimethyl ether
    "CCN(CC)CC",  # Triethylamine
    "C1CCCCC1",  # Cyclohexane
    "C1=CC=C(C=C1)C=O",  # Benzaldehyde
    "C1=CC=NC=C1",  # Pyridine
    "C1=CC=C(O)C=C1",  # Phenol
    "C1=CC=C(C=C1)Cl",  # Chlorobenzene
]

# Generate SMILES strings
smiles_list = []
for mol in molecules:
    m = Chem.MolFromSmiles(mol)
    if m:  # Ensure the molecule is valid
        smiles_list.append(Chem.MolToSmiles(m))

print(smiles_list)

['CCO', 'CC(=O)O', 'c1ccccc1', 'CCOC', 'CCN(CC)CC', 'C1CCCCC1', 'O=Cc1ccccc1', 'c1ccncc1', 'Oc1ccccc1', 'Clc1ccccc1']


In [3]:
import random

# Assign random effectiveness values
dataset = [(smiles, round(random.uniform(0, 1), 2)) for smiles in smiles_list]

# Print the dataset
for smiles, effectiveness in dataset:
    print(f"{smiles}, {effectiveness}")

CCO, 0.17
CC(=O)O, 0.76
c1ccccc1, 0.02
CCOC, 0.17
CCN(CC)CC, 0.69
C1CCCCC1, 0.37
O=Cc1ccccc1, 0.61
c1ccncc1, 0.96
Oc1ccccc1, 0.03
Clc1ccccc1, 0.63


In [4]:
import csv

# Save to CSV
with open("antibiotic_dataset.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["SMILES", "Effectiveness"])  # Write header
    writer.writerows(dataset)  # Write data