#Run this code
Make sure to press on the play button before attempting to collect the LCSS/ SDS urls in the second section.


In [None]:
import requests
import time
import pandas as pd
from IPython.display import display, Javascript

In [None]:
def CAS_to_CID(CAS: str, wait_time: int=0) -> int|bool:
  """
  Input: A molecules CAS number (Chemical Abstracts Service) as str in
         the format "000-00-0". Ex, for Formaldehyde CAS = 30525-89-4. wait_time
         refers to time to wait between requests (only applicable when doing
         multiple calls of this function)
  Output: an int representing the molecule's CID (PubChem's Compound Identifier)

  """
  try:
    cas_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{CAS}/cids/JSON"
    response_cas = requests.get(cas_url, timeout=10)

    time.sleep(wait_time)
    data_cas = response_cas.json()
    CID = data_cas["IdentifierList"]["CID"][0]
    return CID
  except:
    return False

In [None]:
def CID_to_common_name(CID: int, wait_time: int=0) -> str|bool:
  """
  Input: A molecules CID (Pubchem's Compound Identifier). wait_time is the time
         between requests in seconds. Default wait_time = 0
  Output: The given molecules common name in string format. Returns False if
          unable to retrieve a name.

  """
  try:
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{CID}/JSON/"
    response = requests.get(url, timeout=10)
    data = response.json()
    common_name = data["Record"]["RecordTitle"]
    return common_name
  except:
    return False

In [None]:
def generate_LCSS_url(CID: int) -> str:
  """
  Input: A molecule's CID (PubChem's Compound Identifier) which is an int
  Output: A string that is a the url of the compounds LCSS (Labaratory Chemical
          Safety Summary) which is similar to an SDS (I am not sure what the
          difference is content wise)
  """
  LCSS_link = f"https://pubchem.ncbi.nlm.nih.gov/compound/{CID}#datasheet=LCSS"
  return LCSS_link

# Collecting LCSS/ SDS Urls

### Collecting One URL from A CAS number

All you need to do is paste the CAS number at:
CAS = {YOUR CAS NUMBER}. Copy-Paste the output into the inventory sheet

In [None]:
# You only need to replace what CAS is equal to
CAS = "67-68-5" #50-00-0 is an example CAS number


# You should not need to work with these
CAS = str(CAS)
CID = CAS_to_CID(CAS)
if CID:
  lcss_url = generate_LCSS_url(CID)
  print(lcss_url)
else:
    print("Could not obtain CID :( \nis the CAS number correct and in quotations?")

Could not obtain CID :( 
is the CAS number correct and in quotations?


### Collecting Many URLs from Many CAS Numbers
Copy paste

In [None]:
import io
import pandas as pd
cas_nums = pd.read_csv(io.StringIO('''
67-68-5
67-56-1
75-05-8
110-54-3
x
110-54-3
68-12-2
x
100-47-0
60-24-2
108-24-7
110-86-1
500-22-1
x
67-68-5
140-29-4
103-72-0
102-52-3
x
x
75-09-2
141-78-6
'''), header=None)


In [None]:
total_string = ""
for cas in cas_nums[0]:
  CAS = str(cas)
  CID = CAS_to_CID(CAS)
  if CID:
    lcss_url = generate_LCSS_url(CID)
  else:
    lcss_url = "Could not obtain LCSS"

  total_string += f"{lcss_url} \n"

print(total_string)

https://pubchem.ncbi.nlm.nih.gov/compound/6348#datasheet=LCSS 
https://pubchem.ncbi.nlm.nih.gov/compound/24602#datasheet=LCSS 
https://pubchem.ncbi.nlm.nih.gov/compound/75151#datasheet=LCSS 
https://pubchem.ncbi.nlm.nih.gov/compound/7397#datasheet=LCSS 
https://pubchem.ncbi.nlm.nih.gov/compound/8134#datasheet=LCSS 
Could not obtain LCSS 
https://pubchem.ncbi.nlm.nih.gov/compound/75151#datasheet=LCSS 



# Collecting Common Names



##Collecting One Common Name from a CAS Number

In [None]:
# You only need to replace what CAS is equal to
CAS = "100-47-0" #50-00-0 is an example CAS number


# You should not need to work with these
CAS = str(CAS)
CID = CAS_to_CID(CAS)
if CID:
  common_name = CID_to_common_name(CID)
  if common_name:
    print(common_name)
  else:
    print("Could not obtain common name :(")
else:
  print("Could not obtain CID :( \nis the CAS number correct and in quotations?")

Benzonitrile


##Collecting Many Common Names from Many CAS Numbers

In [None]:
import io
import pandas as pd
cas_nums = pd.read_csv(io.StringIO('''
75-15-0
7789-20-0
2206-27-1
98-59-9
111-77-3
12093-10-6
2206-27-1
'''), header=None)


In [None]:
total_string = ""
for cas in cas_nums[0]:
  CAS = str(cas)
  CID = CAS_to_CID(CAS)
  if CID:
    common_name = CID_to_common_name(CID)
    if common_name:
      entry = common_name
    else:
      entry = "Could not obtain common name"
  else:
    entry = "Could not obtain common name"

  total_string += entry + "\n"

print(total_string)

Carbon Disulfide
Deuterium Oxide
DMSO-d6
P-Toluenesulfonyl chloride
2-(2-Methoxyethoxy)ethanol
Could not obtain common name
DMSO-d6

