In [2]:
%matplotlib inline
from IPython.display import HTML, display
import matplotlib.pyplot as plt
import numpy as np
import pyrfume
from pyrfume import pubchem
import re
import requests
from tqdm.auto import tqdm

In [3]:
def update_results(records, results):
    for annotation in records['Annotations']['Annotation']:
        try:
            cids = annotation['LinkedRecords']['CID']
        except:
            pass
        else:
            strings = [] 
            for x in annotation['Data']:
                for y in x['Value']['StringWithMarkup']:
                    strings.append(y['String'])
            for cid in cids:
                if cid in results:
                    results[cid] += strings
                elif strings:
                    results[cid] = strings

In [4]:
def get_results(heading):
    page = 1
    results = {}
    with tqdm(total=100) as pbar:
        while True:
            url = (f"https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/annotations/heading/"
                   f"JSON?heading_type=Compound&heading={heading}&page={page}")
            response = requests.get(url)
            records = response.json()
            update_results(records, results)
            totalPages = records['Annotations']['TotalPages']
            if page==1:
                pbar.reset(total=totalPages)
            pbar.set_description("%d CIDs described" % len(results))
            pbar.update()
            page += 1
            if page > totalPages:
                break
    return results

In [5]:
pd_results = get_results("Optical+Rotation")

  0%|          | 0/100 [00:00<?, ?it/s]

In [6]:
pd_results

{263: ['Specific rotation: +9.8 at 20 °C/D (water)'],
 7896: ['Specific optical rotation: -18.8 deg at 25 °C/D (ethanol) /(R)-(-)isomer/'],
 5770: ['Specific optical rotation: -118 deg @ 23 °C/D (c= 1 in chloroform), -168 deg @ 26 °C/D (c= 0.624 in dimethylformamide), -164 deg @ 26 °C/D (c= 0.96 in pyridine); max absorption (chloroform): 216 nm, 267 nm, 295 nm (log e= 61,700; 17,000; 10,200)'],
 2724385: ['Prisms from methanol  + chloroform. Decomposes at 225 °C. Specific optical rotation: +18.9 °C at 20 °C/D (pyridine). Very sparingly soluble in ethyl acetate /alpha-Acetyldigoxin/',
  'Needles from alcohol + chloroform. Decomposes at 240 °C. Specific optical rotation: +30.4 °C at 20 °C/D (c = 1 in alcohol). More soluble in ethyl acetate than the alpha-form /beta-Acetyldigoxin/',
  'Specific optical rotation: +13.4 to 13.8 deg at 25 °C/Hg (c = 10 in pyridine)',
  'Specific optical rotation: +13.6 and +14.2 deg at 25 °C; 546.1 nm in anhydrous pyridine solution'],
 441207: ['Specific opt

In [8]:
path = 'pubchem/pubchem_scrape_optical_rotation.pkl'
# Then do something to save the data so you don't have to scrape it every time.