Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add backwards-compatible Python 3 support #4

Merged
merged 10 commits into from
Mar 31, 2014
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: python
python:
- "2.6"
- "2.7"
- "3.2"
- "3.3"
Expand Down
20 changes: 9 additions & 11 deletions docs/source/guide/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,25 @@
Installation
============

PubChemPy supports Python versions 2.7, 3.2, 3.3 and 3.4. There are no other dependencies.

There are a variety of ways to download and install PubChemPy.

Option 1: Use pip (recommended)
-------------------------------

The easiest and recommended way to install is using pip [#f1]_::
The easiest and recommended way to install is using pip::

pip install pubchempy

This will download the latest version of PubChemPy, and place it in your `site-packages` folder so it is automatically
available to all your python scripts.

If you don't already have pip installed, you can `install it using get-pip.py`_::

curl -O https://raw.github.com/pypa/pip/master/contrib/get-pip.py
python get-pip.py

Option 2: Download the latest release
-------------------------------------

Expand All @@ -38,15 +45,6 @@ stable, but may include new features that have not yet been released. Simply clo
cd PubChemPy
python setup.py install

.. rubric:: Footnotes

.. [#f1] You can `install pip`_ using get-pip.py:
::

curl -O https://raw.github.com/pypa/pip/master/contrib/get-pip.py
python get-pip.py


.. _`install pip`: http://www.pip-installer.org/en/latest/installing.html
.. _`install it using get-pip.py`: http://www.pip-installer.org/en/latest/installing.html
.. _`download the latest release`: https://github.com/mcs07/PubChemPy/releases/
.. _`available on GitHub`: https://github.com/mcs07/PubChemPy
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Features
- Download compound records as XML, ASNT/B, JSON, SDF and depiction as a PNG image.
- Construct property tables using *pandas* DataFrames.
- A complete Python wrapper around the `PubChem PUG REST web service`_.
- Supports Python versions 2.7 – 3.4.

User guide
----------
Expand Down
59 changes: 34 additions & 25 deletions pubchempy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@
import json
import logging
import os
import sys
import time
import urllib
import urllib2

try:
from urllib.error import HTTPError
from urllib.parse import quote, urlencode
from urllib.request import urlopen
except ImportError:
from urllib import urlencode
from urllib2 import quote, urlopen, HTTPError


__author__ = 'Matt Swain'
Expand All @@ -26,57 +33,59 @@
log.addHandler(logging.NullHandler())


if sys.version_info[0] == 3:
text_types = str, bytes
else:
text_types = basestring,


def request(identifier, namespace='cid', domain='compound', operation=None, output='JSON', searchtype=None, **kwargs):
"""
Construct API request from parameters and return the response.

Full specification at http://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html
"""

# If identifier is a list, join with commas into string
if isinstance(identifier, int):
identifier = str(identifier)
if not isinstance(identifier, basestring):
if not isinstance(identifier, text_types):
identifier = ','.join(str(x) for x in identifier)

# Filter None values from kwargs
kwargs = dict((k, v) for k, v in kwargs.iteritems() if v is not None)

kwargs = dict((k, v) for k, v in kwargs.items() if v is not None)
# Build API URL
urlid, postdata = None, None
if namespace == 'sourceid':
identifier = identifier.replace('/', '.')
if namespace in ['listkey', 'formula', 'sourceid'] or (searchtype and namespace == 'cid') or domain == 'sources':
urlid = urllib2.quote(identifier.encode('utf8'))
urlid = quote(identifier.encode('utf8'))
else:
postdata = '%s=%s' % (namespace, urllib2.quote(identifier.encode('utf8')))
postdata = urlencode([(namespace, identifier)]).encode('utf8')
comps = filter(None, [API_BASE, domain, searchtype, namespace, urlid, operation, output])
apiurl = '/'.join(comps)
if kwargs:
apiurl += '?%s' % urllib.urlencode(kwargs)

apiurl += '?%s' % urlencode(kwargs)
# Make request
try:
log.debug('Request URL: %s', apiurl)
log.debug('Request data: %s', postdata)
response = urllib2.urlopen(apiurl, postdata).read()
response = urlopen(apiurl, postdata).read()
return response
except urllib2.HTTPError as e:
except HTTPError as e:
raise PubChemHTTPError(e)


def get(identifier, namespace='cid', domain='compound', operation=None, output='JSON', searchtype=None, **kwargs):
"""Request wrapper that automatically handles async requests."""
if searchtype or namespace in ['formula']:
response = request(identifier, namespace, domain, None, 'JSON', searchtype, **kwargs)
status = json.loads(response)
status = json.loads(response.decode())
if 'Waiting' in status and 'ListKey' in status['Waiting']:
identifier = status['Waiting']['ListKey']
namespace = 'listkey'
while 'Waiting' in status and 'ListKey' in status['Waiting']:
time.sleep(2)
response = request(identifier, namespace, domain, operation, 'JSON', **kwargs)
status = json.loads(response)
status = json.loads(response.decode())
if not output == 'JSON':
response = request(identifier, namespace, domain, operation, output, searchtype, **kwargs)
else:
Expand All @@ -87,7 +96,7 @@ def get(identifier, namespace='cid', domain='compound', operation=None, output='
def get_json(identifier, namespace='cid', domain='compound', operation=None, searchtype=None, **kwargs):
"""Request wrapper that automatically parses JSON response and supresses NotFoundError."""
try:
return json.loads(get(identifier, namespace, domain, operation, 'JSON', searchtype, **kwargs))
return json.loads(get(identifier, namespace, domain, operation, 'JSON', searchtype, **kwargs).decode())
except NotFoundError as e:
log.info(e)
return None
Expand Down Expand Up @@ -180,7 +189,7 @@ def get_assays(identifier, namespace='aid', **kwargs):


def get_properties(properties, identifier, namespace='cid', searchtype=None, as_dataframe=False, **kwargs):
if isinstance(properties, basestring):
if isinstance(properties, text_types):
properties = properties.split(',')
properties = ','.join([PROPERTY_MAP.get(p, p) for p in properties])
properties = 'property/%s' % properties
Expand Down Expand Up @@ -229,7 +238,7 @@ def get_aids(identifier, namespace='cid', domain='compound', searchtype=None, **

def get_all_sources(domain='substance'):
"""Return a list of all current depositors of substances or assays."""
results = json.loads(get(domain, None, 'sources'))
results = json.loads(get(domain, None, 'sources').decode())
return results['InformationList']['SourceName']


Expand Down Expand Up @@ -275,7 +284,7 @@ def from_cid(cls, cid, **kwargs):

:param cid: The PubChem Compound Identifier (CID).
"""
record = json.loads(request(cid, **kwargs))['PC_Compounds'][0]
record = json.loads(request(cid, **kwargs).decode())['PC_Compounds'][0]
return cls(record)

def __repr__(self):
Expand Down Expand Up @@ -330,15 +339,15 @@ def atoms(self):
}
if 'z' in self.record['coords'][0]['conformers'][0]:
a['z'] = self.record['coords'][0]['conformers'][0]['z']
atomlist = map(dict, zip(*[[(k, v) for v in value] for k, value in a.items()]))
atomlist = list(map(dict, list(zip(*[[(k, v) for v in value] for k, value in a.items()]))))
if 'charge' in self.record['atoms']:
for charge in self.record['atoms']['charge']:
atomlist[charge['aid']]['charge'] = charge['value']
return atomlist

@property
def bonds(self):
blist = map(dict, zip(*[[(k, v) for v in value] for k, value in self.record['bonds'].items()]))
blist = list(map(dict, list(zip(*[[(k, v) for v in value] for k, value in self.record['bonds'].items()]))))
if 'style' in self.record['coords'][0]['conformers'][0]:
style = self.record['coords'][0]['conformers'][0]['style']
for i, annotation in enumerate(style['annotation']):
Expand Down Expand Up @@ -557,7 +566,7 @@ def _parse_prop(search, proplist):
"""Extract property value from record using the given urn search filter."""
props = [i for i in proplist if all(item in i['urn'].items() for item in search.items())]
if len(props) > 0:
return props[0]['value'][props[0]['value'].keys()[0]]
return props[0]['value'][list(props[0]['value'].keys())[0]]


class Substance(object):
Expand All @@ -578,7 +587,7 @@ def from_sid(cls, sid):

:param sid: The PubChem Substance Identifier (SID).
"""
record = json.loads(request(sid, 'sid', 'substance'))['PC_Substances'][0]
record = json.loads(request(sid, 'sid', 'substance').decode())['PC_Substances'][0]
return cls(record)

def __init__(self, record):
Expand Down Expand Up @@ -689,7 +698,7 @@ def __init__(self, record):

@classmethod
def from_aid(cls, aid):
record = json.loads(request(aid, 'aid', 'assay'))['PC_AssayContainer'][0]
record = json.loads(request(aid, 'aid', 'assay').decode())['PC_AssayContainer'][0]
return cls(record)

@property
Expand Down Expand Up @@ -794,4 +803,4 @@ def __init__(self, msg='Some problem on the server side'):


if __name__ == '__main__':
print __version__
print(__version__)
116 changes: 58 additions & 58 deletions pubchempy_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,82 +26,82 @@ def setUp(self):

def test_requests(self):
"""Test basic raw requests."""
print request('coumarin', 'name', record_type='3d')
print request('CCN(C1=N/C(=C/2\SC(=NC2=N)N(CC)CC)/C(=N/Nc2ccc(cc2)S(=O)(=O)C(F)(F)F)/S1)CC', 'smiles')
print request('DTP/NCI', 'sourceid', 'substance', '747285', 'SDF')
print request('coumarin', 'name', output='PNG', image_size='50x50')
print(request('coumarin', 'name', record_type='3d'))
print(request('CCN(C1=N/C(=C/2\SC(=NC2=N)N(CC)CC)/C(=N/Nc2ccc(cc2)S(=O)(=O)C(F)(F)F)/S1)CC', 'smiles'))
print(request('DTP/NCI', 'sourceid', 'substance', '747285', 'SDF'))
print(request('coumarin', 'name', output='PNG', image_size='50x50'))

def test_listkeys(self):
"""Test asynchronous listkey requests."""
print get('CC', 'smiles', operation='cids', searchtype='superstructure')
print get(self.molform, 'formula', listkey_count=3)
print(get('CC', 'smiles', operation='cids', searchtype='superstructure'))
print(get(self.molform, 'formula', listkey_count=3))

def test_properties(self):
print get_properties('IsomericSMILES', self.rucomplex, 'name')
print(get_properties('IsomericSMILES', self.rucomplex, 'name'))

def test_synonyms(self):
print get_synonyms(self.phenanthrolinesmiles, 'smiles')
print(get_synonyms(self.phenanthrolinesmiles, 'smiles'))

def test_compounds(self):
c = Compound.from_cid(1)
print c.cid
print c.record
print c.atoms
print c.bonds
print c.charge
print c.molecular_formula
print c.molecular_weight
print c.canonical_smiles
print c.isomeric_smiles
print c.inchi
print c.inchikey
print c.iupac_name
print c.xlogp
print c.exact_mass
print c.monoisotopic_mass
print c.tpsa
print c.complexity
print c.h_bond_donor_count
print c.h_bond_acceptor_count
print c.rotatable_bond_count
print c.fingerprint
print c.heavy_atom_count
print c.isotope_atom_count
print c.atom_stereo_count
print c.defined_atom_stereo_count
print c.undefined_atom_stereo_count
print c.bond_stereo_count
print c.defined_bond_stereo_count
print c.undefined_bond_stereo_count
print c.covalent_unit_count
print c.coordinate_type
print(c.cid)
print(c.record)
print(c.atoms)
print(c.bonds)
print(c.charge)
print(c.molecular_formula)
print(c.molecular_weight)
print(c.canonical_smiles)
print(c.isomeric_smiles)
print(c.inchi)
print(c.inchikey)
print(c.iupac_name)
print(c.xlogp)
print(c.exact_mass)
print(c.monoisotopic_mass)
print(c.tpsa)
print(c.complexity)
print(c.h_bond_donor_count)
print(c.h_bond_acceptor_count)
print(c.rotatable_bond_count)
print(c.fingerprint)
print(c.heavy_atom_count)
print(c.isotope_atom_count)
print(c.atom_stereo_count)
print(c.defined_atom_stereo_count)
print(c.undefined_atom_stereo_count)
print(c.bond_stereo_count)
print(c.defined_bond_stereo_count)
print(c.undefined_bond_stereo_count)
print(c.covalent_unit_count)
print(c.coordinate_type)

c = Compound.from_cid(1, record_type='3d')
print c.volume_3d
print c.multipoles_3d
print c.conformer_rmsd_3d
print c.effective_rotor_count_3d
print c.pharmacophore_features_3d
print c.mmff94_partial_charges_3d
print c.mmff94_energy_3d
print c.conformer_id_3d
print c.shape_selfoverlap_3d
print c.feature_selfoverlap_3d
print c.shape_fingerprint_3d
print c.coordinate_type
print(c.volume_3d)
print(c.multipoles_3d)
print(c.conformer_rmsd_3d)
print(c.effective_rotor_count_3d)
print(c.pharmacophore_features_3d)
print(c.mmff94_partial_charges_3d)
print(c.mmff94_energy_3d)
print(c.conformer_id_3d)
print(c.shape_selfoverlap_3d)
print(c.feature_selfoverlap_3d)
print(c.shape_fingerprint_3d)
print(c.coordinate_type)

def test_csaids(self):
print get_cids('Aspirin', 'name', 'substance')
print get_cids('Aspirin', 'name', 'compound')
print get_sids('Aspirin', 'name', 'substance')
print get_aids('Aspirin', 'name', 'substance')
print get_aids('Aspirin', 'name', 'compound')
print(get_cids('Aspirin', 'name', 'substance'))
print(get_cids('Aspirin', 'name', 'compound'))
print(get_sids('Aspirin', 'name', 'substance'))
print(get_aids('Aspirin', 'name', 'substance'))
print(get_aids('Aspirin', 'name', 'compound'))

def test_assays(self):
print get_assays(1, sid='67107,67121,67122')
print(get_assays(1, sid='67107,67121,67122'))

def test_substructure(self):
print get_compounds('C1=CC2=C(C3=C(C=CC=N3)C=C2)N=C1', 'smiles', searchtype='substructure', listkey_count=3)
print(get_compounds('C1=CC2=C(C3=C(C=CC=N3)C=C2)N=C1', 'smiles', searchtype='substructure', listkey_count=3))

def test_equality(self):
self.assertEqual(Compound.from_cid(241), Compound.from_cid(241))
Expand Down