Skip to content
Permalink
Browse files

Add functionality to retrieve specific types of structure info from P…

…ubChem (#544)

* implement the structure argument for geometry_from_pubchem

* add a test case for the argument 'structure' in geometry_from_pubchem

* add the molecule name to the error message

* explicitly check all scenarios in the if statement
  • Loading branch information
hay-k authored and ncrubin committed Dec 15, 2019
1 parent d591122 commit 6bba343b99cad8b73cd449d64cfdfc1d5f381a05
Showing with 42 additions and 22 deletions.
  1. +29 −22 src/openfermion/utils/_pubchem.py
  2. +13 −0 src/openfermion/utils/_pubchem_test.py
@@ -11,43 +11,50 @@
# limitations under the License.


def geometry_from_pubchem(name):
def geometry_from_pubchem(name, structure=None):
"""Function to extract geometry using the molecule's name from the PubChem
database.
database. The 'structure' argument can be used to specify which structure
info to use to extract the geometry. If structure=None, the geometry will
be constructed based on 3D info, if available, otherwise on 2D (to keep
backwards compatibility with the times when the argument 'structure'
was not implemented).
Args:
name: a string giving the molecule's name as required by the PubChem
database.
structure: a string '2d' or '3d', to specify a specific structure
information to be retrieved from pubchem. The default is None.
Recommended value is '3d'.
Returns:
geometry: a list of tuples giving the coordinates of each atom with
distances in Angstrom.
"""
import pubchempy

pubchempy_2d_molecule = pubchempy.get_compounds(name, 'name',
record_type='2d')

# Check if 2-D geometry is available. If not then no geometry is.
if not pubchempy_2d_molecule:
print('Unable to find molecule in the PubChem database.')
if structure in ['2d', '3d']:
pubchempy_molecule = pubchempy.get_compounds(name, 'name',
record_type=structure)
elif structure is None:
# Ideally get the 3-D geometry if available.
pubchempy_molecule = pubchempy.get_compounds(name, 'name',
record_type='3d')

# If the 3-D geometry isn't available, get the 2-D geometry instead.
if not pubchempy_molecule:
pubchempy_molecule = pubchempy.get_compounds(name, 'name',
record_type='2d')
else:
raise ValueError('Incorrect value for the argument structure=%s' % structure)

# Check if pubchempy_molecule is an empty list or None
if not pubchempy_molecule:
print('Unable to find structure info in the PubChem database for the specified molecule "%s".' % name)
return None

# Ideally get the 3-D geometry if available.
pubchempy_3d_molecule = pubchempy.get_compounds(name, 'name',
record_type='3d')

if pubchempy_3d_molecule:
pubchempy_geometry = \
pubchempy_3d_molecule[0].to_dict(properties=['atoms'])['atoms']
geometry = [(atom['element'], (atom['x'], atom['y'], atom['z']))
for atom in pubchempy_geometry]
return geometry

# If the 3-D geometry isn't available, get the 2-D geometry instead.
pubchempy_geometry = \
pubchempy_2d_molecule[0].to_dict(properties=['atoms'])['atoms']
geometry = [(atom['element'], (atom['x'], atom['y'], 0))
pubchempy_molecule[0].to_dict(properties=['atoms'])['atoms']
geometry = [(atom['element'], (atom['x'], atom['y'], atom.get('z', 0)))
for atom in pubchempy_geometry]

return geometry
@@ -76,3 +76,16 @@ def test_none(self):
none_geometry = geometry_from_pubchem('none')

self.assertIsNone(none_geometry)

def test_water_2d(self):
water_geometry = geometry_from_pubchem('water', structure='2d')
self.water_natoms = len(water_geometry)

water_natoms = 3
self.assertEqual(water_natoms, self.water_natoms)

self.oxygen_z_1 = water_geometry[0][1][2]
self.oxygen_z_2 = water_geometry[1][1][2]
z = 0
self.assertEqual(z, self.oxygen_z_1)
self.assertEqual(z, self.oxygen_z_2)

0 comments on commit 6bba343

Please sign in to comment.
You can’t perform that action at this time.