In [4]:
import urllib.request
import pandas as pd

def CAS_to_SMILES(cas_numbers):
    smiles_strings = {}
    for cas in cas_numbers:
        try:
            url = 'http://cactus.nci.nih.gov/chemical/structure/' + cas + '/smiles'
            smiles_string = urllib.request.urlopen(url).read().decode('utf8')
            smiles_strings[cas] = smiles_string
        except:
            smiles_strings[cas] = 'Conversion failed'
    return smiles_strings

# Load the SMILES strings from a CSV file
cas = pd.read_csv(r'failed-smiles.csv')
cas_numbers = cas['CAS Number'].tolist()

# Get the SMILES strings
smiles_strings = CAS_to_SMILES(cas_numbers)

# Convert the results to a DataFrame
results_df = pd.DataFrame(list(smiles_strings.items()), columns=['CAS Number', 'SMILES'])

# Export the results to a CSV file
results_df.to_csv('smiles_results_2.csv', index=False)

print("The results have been successfully exported to smiles_results.csv")



The results have been successfully exported to smiles_results.csv


In [7]:
import csv
import requests

def cas_to_smiles(cas_number):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{cas_number}/property/CanonicalSMILES/JSON"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        try:
            smiles = data['PropertyTable']['Properties'][0]['CanonicalSMILES']
            return smiles
        except (IndexError, KeyError):
            return "SMILES string not found."
    else:
        return "Error fetching data."

def convert_csv(input_csv, output_csv):
    with open(input_csv, mode='r') as infile, open(output_csv, mode='w', newline='') as outfile:
        reader = csv.reader(infile)
        writer = csv.writer(outfile)
        
        # Write header
        writer.writerow(["CAS Number", "SMILES"])
        
        # Skip header in input file
        next(reader)
        
        for row in reader:
            cas_number = row[0]
            smiles = cas_to_smiles(cas_number)
            writer.writerow([cas_number, smiles])

# Example usage
input_csv = 'failed-smiles.csv'
output_csv = 'cas_to_smiles.csv'
convert_csv(input_csv, output_csv)

print(f"Conversion complete. The results are saved in {output_csv}.")


Conversion complete. The results are saved in cas_to_smiles.csv.


In [10]:
import csv
import cirpy

def cas_to_smiles_cirpy(cas_number):
    smiles = cirpy.resolve(cas_number, 'smiles')
    return smiles if smiles else "SMILES string not found."

def convert_csv(input_csv, output_csv):
    with open(input_csv, mode='r') as infile, open(output_csv, mode='w', newline='') as outfile:
        reader = csv.reader(infile)
        writer = csv.writer(outfile)
        
        # Write header
        writer.writerow(["CAS Number", "SMILES"])
        
        # Skip header in input file
        next(reader)
        
        for row in reader:
            cas_number = row[0]
            smiles = cas_to_smiles_cirpy(cas_number)
            writer.writerow([cas_number, smiles])

# Example usage
input_csv = 'failed-smiles.csv'
output_csv = 'cas_to_smiles.csv'
convert_csv(input_csv, output_csv)

print(f"Conversion complete. The results are saved in {output_csv}.")

Conversion complete. The results are saved in cas_to_smiles.csv.


In [9]:
pip install cirpy

Collecting cirpy
  Downloading CIRpy-1.0.2.tar.gz (20 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: cirpy
  Building wheel for cirpy (setup.py): started
  Building wheel for cirpy (setup.py): finished with status 'done'
  Created wheel for cirpy: filename=CIRpy-1.0.2-py3-none-any.whl size=7288 sha256=710012fe899962d6eb7d3ca611c522cc504d9f6235c602a5196ab6907ccf13e2
  Stored in directory: c:\users\savannah talledo\appdata\local\pip\cache\wheels\48\78\17\44312649d6e053ad343fc82af1dac7ba7ddd866016abe86f41
Successfully built cirpy
Installing collected packages: cirpy
Successfully installed cirpy-1.0.2
Note: you may need to restart the kernel to use updated packages.
