In [1]:
# import everything
from ChemScraper import *
# what are the known vendors?
print(VendorSources)

('Sigma-Aldrich', 'Thermo Fisher Scientific')


In [2]:
# Find compounds in PubChem that are likely purchasable
# One way is to find records deposited by chemical vendors
import pandas as pd
compound_sigma_csv = "compound_sigma.csv"
download_vendor_compounds((VendorSources[0],), saveas=compound_sigma_csv, count_limit=10)  # only download 10 entries
df_compound_sigma = pd.read_csv('compound_sigma.csv')
print(df_compound_sigma)

2022-08-08 07:07:53.740 | INFO     | ChemScraper.pubchem.entrez:download_vendor_compounds:94 - esearch for vendors: ('Sigma-Aldrich',)
2022-08-08 07:07:53.741 | INFO     | ChemScraper.pubchem.entrez:request_entrez_query:33 - request URL: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pccompound&term="has src vendor"[Filter] AND ("Sigma-Aldrich"[SourceName])&retstart=0&retmax=10&retmode=json&usehistory=y
2022-08-08 07:07:54.078 | INFO     | ChemScraper.pubchem.entrez:download_vendor_compounds:99 - esearch success!
2022-08-08 07:07:54.079 | INFO     | ChemScraper.pubchem.entrez:download_vendor_compounds:101 - esearch saved at: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pccompound&query_key=1&webenv=MCID_62f0ee880f483d51d80c8b58
2022-08-08 07:07:54.080 | INFO     | ChemScraper.pubchem.entrez:download_vendor_compounds:103 - esearch count: 249006
2022-08-08 07:07:55.092 | INFO     | ChemScraper.pubchem.entrez:download_vendor_compounds:105 - esearch converte

         cid      mw                                          isosmiles
0  146027892  289.40  C[C@@H]1CC[C@@H](C(C1)C(=O)NC2=CC=C(C=C2)OC)C(C)C
1  137315585  178.20                        [Li+].C1CCOC1.C1CCOC1.[Al-]
2  137313644  415.30  CC1=CC(=C(C=C1)/C=C(\\C)/C(=O)N=C(N)N)/C=C(\\C...
3  137243236  385.40  C1=CC=C(C=C1)N2C=C(C(=N2)C3=CC=C(C=C3)[N+](=O)...
4  137243235  402.40  C1=CC2=C(C(=C1)N=NC3=C4C=CC=NC4=C(C=C3)O)C(=CC...
5  137243234  307.40  C1=CC=C(C=C1)/C=C/2\\C(=O)N/C(=N\\N=C\\C3=CC=C...
6  137243233  418.90  C1=CC=C(C=C1)C[N+]2=CC=CC(=C2)C(=O)N/N=C/C3=C(...
7  137243232  491.40  C1=CC=C(C=C1)C(=O)C[N+]2=CC=CC(=C2)C(=O)N/N=C/...
8  137243231  471.30  COC1=CC=C(C=C1)C(=O)C[N+]2=CC=CC(=C2)C(=O)N/N=...
9  137243230  322.76  C1=CC=C(C(=C1)/C=N/NC(=O)C2=CC=[N+](C=C2)CCO)O.Cl





In [3]:
# from `CID` we can get chemical vendor links stored in PubChem
compound_id = 137243230  # pubchem CID
vendor_links = get_vendor_links(compound_id)
print(vendor_links)

{'Sigma-Aldrich': 'https://www.sigmaaldrich.com/catalog/product/aldrich/r358797'}


In [4]:
# use this link we can scrape sigma-aldrich website for pricing info
# `patable` is the table for price and availability, this may change based on your location
from ChemScraper.vscraper.sigma_aldrich import get_chrome_driver
browser_driver = get_chrome_driver(headless=True)
sigma_patable = get_sigma_aldrich_patable(browser_driver, vendor_links['Sigma-Aldrich'])
print(sigma_patable)

2022-08-08 07:07:57.093 | INFO     | ChemScraper.vscraper.sigma_aldrich:get_sigma_aldrich_patable:47 - sigma-aldrich product url: https://www.sigmaaldrich.com/catalog/product/aldrich/r358797
2022-08-08 07:08:00.682 | INFO     | ChemScraper.vscraper.sigma_aldrich:get_sigma_aldrich_patable:52 - page ready after: 0.008 s


           SKU Pack Size                                       Availability  \
0  R358797-5MG            Usually ships in 2 business days. (Orders outs...   

    Price Quantity                                                url  
0  $28.60           https://www.sigmaaldrich.com/catalog/product/a...  


In [5]:
# the problems of starting with a PubChem compound record are
# 1. the vendor link stored there maybe outdated
# 2. there can be different products correspond to one compound (package size, purity, etc.)
# so it's better to start a new search directly through the vendor with a CAS number
compound = identify_compound('C(CN)N', 'smiles')
cas_number = get_cas_number(compound.cid)  # for uncommon chemicals without a CAS number this method returns None
print(compound.as_dict())
print(cas_number)

{'@module': 'ChemScraper.schema', '@class': 'Compound', '@version': None, 'cid': 3301, 'smiles': 'C(CN)N', 'inchi': 'InChI=1S/C2H8N2/c3-1-2-4/h1-4H2', 'iupac': 'ethane-1,2-diamine', 'properties': {}}
107-15-3


In [6]:
# now we can use the search page to get all available products of the same CAS,
# then extract their prices and availabilities
# this is wrapped in the following function
df_pa = get_sigma_aldrich_patables(browser_driver, cas_number)
print(df_pa)

2022-08-08 07:08:01.755 | INFO     | ChemScraper.vscraper.sigma_aldrich:get_sigma_aldrich_patables:69 - sigma-aldrich search url: https://www.sigmaaldrich.com/US/en/search/107-15-3?focus=products&page=1&perpage=30&sort=relevance&term=107-15-3&type=cas_number
2022-08-08 07:08:03.939 | INFO     | ChemScraper.vscraper.sigma_aldrich:get_sigma_aldrich_patables:75 - page ready after: 0.010 s
2022-08-08 07:08:04.087 | INFO     | ChemScraper.vscraper.sigma_aldrich:get_sigma_aldrich_patable:47 - sigma-aldrich product url: https://www.sigmaaldrich.com/US/en/product/sial/e26266
2022-08-08 07:08:07.452 | INFO     | ChemScraper.vscraper.sigma_aldrich:get_sigma_aldrich_patable:52 - page ready after: 0.026 s
2022-08-08 07:08:08.105 | INFO     | ChemScraper.vscraper.sigma_aldrich:get_sigma_aldrich_patable:47 - sigma-aldrich product url: https://www.sigmaaldrich.com/US/en/product/sial/03550
2022-08-08 07:08:11.199 | INFO     | ChemScraper.vscraper.sigma_aldrich:get_sigma_aldrich_patable:52 - page ready

               SKU   Pack Size  \
0       E26266-5ML        5 ML   
1     E26266-100ML      100 ML   
2        E26266-1L         1 L   
3      E26266-2.5L       2.5 L   
4   E26266-4X100ML  4 X 100 ML   
5      03550-250ML      250 ML   
6         03550-1L         1 L   
7       03550-2.5L       2.5 L   
8       8009470100      100 ML   
9       8009470500      500 ML   
10      8009471000         1 L   
11      8009472500       2.5 L   
12      8009479051        50 L   
13    391085-100ML      100 ML   
14       391085-1L         1 L   
15     E1521-250ML      250 ML   
16        E1521-1L         1 L   
17     41008-1ML-F        1 ML   
18        15068-1L         1 L   
19     E1649-250ML      250 ML   
20        E1649-1L         1 L   

                                         Availability      Price Quantity  \
0    Available to ship on August 08, 2022\nDetails...     $36.20            
1    Available to ship on August 08, 2022\nDetails...     $47.70            
2    Available to sh