diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..be1907c --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +__pycache__/ +*.py[cod] +build/ +dist/ +*.egg-info/ +.coverage +.coverage.* +.cache +.pytest_cache/ +.ipynb_checkpoints +.idea/ diff --git a/.travis.yml b/.travis.yml index 4e84357..c43afe6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,13 +5,9 @@ python: - "3.6" env: global: - - secure: "QQlGoUF2W9DDvdhXhMMfJyA2nSlj8J0QH31b9A1JlbSoIf89H5cjEcYYWqP4WF5sVyBdTm6vbjqQijWSJAjMZkxJduUMYYsHGQZDFuYoyKherS6JbhbJOZH7TmbcepSbBkDf1IJ/JxTMw2yZdXXMO/YmGq6Tc5dmch2XKOGm4f0=" - matrix: - - OPTIONAL_DEPS=true - - OPTIONAL_DEPS=false + - secure: "N/t4txw1k9bOUsLQWQOpzdJpMAdFFzzJqN2rWiijMJPEC9E1meoKMzFYr4kgNjInhVfkud8+3fOHZL/Ns4MLWexf1vsG1NFvrXSBuBD6MlPKYe77bb9WTmRvWfLSDg6F5BP/1uFjwebj4USN14RWlxyIgmsC1+bdCFVN2Wktg4k=" install: - pip install coveralls pytest requests six - - if [ "$OPTIONAL_DEPS" = true ]; then pip install lxml; fi script: - coverage run --source=chemspipy -m pytest after_success: diff --git a/LICENSE b/LICENSE index 717ad00..fc606f2 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License -Copyright (c) 2013 Matt Swain +Copyright (c) 2018 Matt Swain Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.rst b/README.rst index 8f4518a..ef9fdf0 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ ChemSpiPy provides a way to interact with ChemSpider in Python. It allows chemic depiction and retrieval of chemical properties:: >>> from chemspipy import ChemSpider - >>> cs = ChemSpider('') + >>> cs = ChemSpider('') >>> c1 = cs.get_compound(236) # Specify compound by ChemSpider ID >>> c2 = cs.search('benzene') # Search using name, SMILES, InChI, InChIKey, etc. diff --git a/chemspipy/__init__.py b/chemspipy/__init__.py index b018155..99c9663 100644 --- a/chemspipy/__init__.py +++ b/chemspipy/__init__.py @@ -5,8 +5,6 @@ Python wrapper for the ChemSpider API. -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. """ from __future__ import print_function @@ -20,7 +18,7 @@ __license__ = 'MIT' -from .api import ChemSpider, MOL2D, MOL3D, BOTH, ASCENDING, DESCENDING, CSID, MASS_DEFECT, MOLECULAR_WEIGHT +from .api import ChemSpider, MOL2D, MOL3D, BOTH, ASCENDING, DESCENDING, RECORD_ID, CSID, MASS_DEFECT, MOLECULAR_WEIGHT from .api import REFERENCE_COUNT, DATASOURCE_COUNT, PUBMED_COUNT, RSC_COUNT -from .objects import Compound, Spectrum +from .objects import Compound from .search import Results diff --git a/chemspipy/api.py b/chemspipy/api.py index 960b147..8019ff4 100644 --- a/chemspipy/api.py +++ b/chemspipy/api.py @@ -5,38 +5,32 @@ Core API for interacting with ChemSpider web services. -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. """ from __future__ import print_function from __future__ import unicode_literals from __future__ import division -from base64 import b64decode +import base64 import logging import sys import warnings - -try: - from lxml import etree -except ImportError: - try: - import xml.etree.cElementTree as etree - except ImportError: - import xml.etree.ElementTree as etree +import zlib import requests -import six -from . import __version__ -from .errors import ChemSpiPyError, ChemSpiPyParseError, ChemSpiPyAuthError, ChemSpiPyServerError -from .errors import ChemSpiPyNotFoundError -from .objects import Compound, Spectrum +from . import __version__, errors +from .objects import Compound from .search import Results log = logging.getLogger(__name__) + +#: Default API URL. +API_URL = 'https://api.rsc.org' +#: Default API version. +API_VERSION = 'v1' + #: 2D coordinate dimensions MOL2D = '2d' #: 3D coordinate dimensions @@ -49,7 +43,9 @@ #: Descending sort direction DESCENDING = 'descending' -#: CSID sort order +#: Record ID sort order +RECORD_ID = 'record_id' +#: CSID sort order (same as RECORD_ID, kept for backwards compatibility) CSID = 'csid' #: Mass defect sort order MASS_DEFECT = 'mass_defect' @@ -65,175 +61,600 @@ RSC_COUNT = 'rsc_count' -#: Coordinate dimensions -DIMENSIONS = { - MOL2D: 'e2D', - MOL3D: 'e3D', - BOTH: 'eBoth' -} - -#: Sort directions +#: Map sort directions to strings required by REST API. DIRECTIONS = { - ASCENDING: 'eAscending', - DESCENDING: 'eDescending' + ASCENDING: 'ascending', + DESCENDING: 'descending' } -#: Sort orders +#: Map sort orders to strings required by REST API. ORDERS = { - CSID: 'eCSID', - MASS_DEFECT: 'eMassDefect', - MOLECULAR_WEIGHT: 'eMolecularWeight', - REFERENCE_COUNT: 'eReferenceCount', - DATASOURCE_COUNT: 'eDataSourceCount', - PUBMED_COUNT: 'ePubMedCount', - RSC_COUNT: 'eRscCount' + RECORD_ID: 'recordId', + CSID: 'recordId', + MASS_DEFECT: 'massDefect', + MOLECULAR_WEIGHT: 'molecularWeight', + REFERENCE_COUNT: 'referenceCount', + DATASOURCE_COUNT: 'dataSourceCount', + PUBMED_COUNT: 'pubMedCount', + RSC_COUNT: 'rscCount' } -#: API to python field mappings -FIELDS = { - 'CSID': ('csid', int), - 'csid': ('csid', int), - 'MF': ('molecular_formula', six.text_type), - 'SMILES': ('smiles', six.text_type), - 'InChI': ('inchi', six.text_type), - 'InChIKey': ('inchikey', six.text_type), - 'AverageMass': ('average_mass', float), - 'MolecularWeight': ('molecular_weight', float), - 'MonoisotopicMass': ('monoisotopic_mass', float), - 'NominalMass': ('nominal_mass', float), - 'ALogP': ('alogp', float), - 'XLogP': ('xlogp', float), - 'CommonName': ('common_name', six.text_type), - 'MOL2d': ('mol_2d', six.text_type), - 'MOL3d': ('mol_3d', six.text_type), - 'ReferenceCount': ('reference_count', int), - 'DataSourceCount': ('datasource_count', int), - 'PubMedCount': ('pubmed_count', int), - 'RSCCount': ('rsc_count', int), - 'ExternalReferences': ('external_references', list), - 'ds_name': ('datasource_name', six.text_type), - 'ds_url': ('datasource_url', six.text_type), - 'ext_id': ('external_id', six.text_type), - 'ext_url': ('external_url', six.text_type), - 'Status': ('status', six.text_type), - 'Count': ('count', int), - 'Message': ('message', six.text_type), - 'Elapsed': ('elapsed', six.text_type), - 'spc_id': ('spectrum_id', int), - 'spc_type': ('spectrum_type', six.text_type), - 'file_name': ('file_name', six.text_type), - 'comments': ('comments', six.text_type), - 'original_url': ('original_url', six.text_type), - 'submitted_date': ('submitted_date', six.text_type), -} +#: All available compound details fields. +FIELDS = [ + 'SMILES', 'Formula', 'AverageMass', 'MolecularWeight', 'MonoisotopicMass', 'NominalMass', 'CommonName', + 'ReferenceCount', 'DataSourceCount', 'PubMedCount', 'RSCCount', 'Mol2D', 'Mol3D' +] class BaseChemSpider(object): - def __init__(self, security_token=None, user_agent=None, api_url=None): + def __init__(self, api_key, user_agent=None, api_url=API_URL, api_version=API_VERSION): """ - :param string security_token: (Optional) Your ChemSpider security token. + :param string api_key: Your ChemSpider API key. :param string user_agent: (Optional) Identify your application to ChemSpider servers. - :param string api_url: (Optional) Alternative API server. + :param string api_url: (Optional) API server. Default https://api.rsc.org. + :param string api_version: (Optional) API version. Default v1. """ log.debug('Initializing ChemSpider') - self.api_url = api_url if api_url else 'https://www.chemspider.com' + self.api_url = api_url self.http = requests.session() - self.http.headers['User-Agent'] = user_agent if user_agent else 'ChemSpiPy/%s Python/%s ' % (__version__, sys.version.split()[0]) - self.security_token = security_token + self.http.headers['User-Agent'] = user_agent if user_agent else 'ChemSpiPy/{} Python/{} '.format( + __version__, sys.version.split()[0] + ) + self.api_key = api_key + self.api_version = api_version + + def request(self, method, api, namespace, endpoint, params=None, json=None): + """Make a request to the ChemSpider API. + + :param string method: HTTP method. + :param string api: Top-level API, e.g. compounds. + :param string namespace: API namespace, e.g. filter, lookups, records, or tools. + :param string endpoint: Web service endpoint URL. + :param dict params: Query parameters to add to the URL. + :param dict json: JSON data to send in the request body. + :return: Web Service response JSON. + :rtype: dict + """ + # Construct request URL + url = '{}/{}/{}/{}/{}'.format(self.api_url, api, self.api_version, namespace, endpoint) + + # Set apikey header + headers = {'apikey': self.api_key} + + log.debug('{} : {} : {} : {}'.format(url, headers, params, json)) + + # Make request + r = self.http.request(method, url, params=params, json=json, headers=headers) + + # Raise exception for HTTP errors + if not r.ok: + err = { + 400: errors.ChemSpiPyBadRequestError, + 401: errors.ChemSpiPyAuthError, + 404: errors.ChemSpiPyNotFoundError, + 405: errors.ChemSpiPyMethodError, + 413: errors.ChemSpiPyPayloadError, + 429: errors.ChemSpiPyRateError, + 500: errors.ChemSpiPyServerError, + 503: errors.ChemSpiPyUnavailableError + }.get(r.status_code, errors.ChemSpiPyHTTPError) + raise err(message=r.reason, http_code=r.status_code) + + log.debug('Request duration: {}'.format(r.elapsed)) + return r.json() + + def get(self, api, namespace, endpoint, params=None): + """Convenience method for making GET requests. + + :param string api: Top-level API, e.g. compounds. + :param string namespace: API namespace, e.g. filter, lookups, records, or tools. + :param string endpoint: Web service endpoint URL. + :param dict params: Query parameters to add to the URL. + :return: Web Service response JSON. + :rtype: dict + """ + return self.request('GET', api=api, namespace=namespace, endpoint=endpoint, params=params) - def request(self, api, endpoint, **params): - """Construct API request and return the XML response. + def post(self, api, namespace, endpoint, json=None): + """Convenience method for making POST requests. - :param string api: The specific ChemSpider API to call (MassSpec, Search, Spectra, InChI). - :param string endpoint: ChemSpider API endpoint. - :param params: (Optional) Parameters for the ChemSpider endpoint as keyword arguments. - :rtype: xml tree + :param string api: Top-level API, e.g. compounds. + :param string namespace: API namespace, e.g. filter, lookups, records, or tools. + :param string endpoint: Web service endpoint URL. + :param dict json: JSON data to send in the request body. + :return: Web Service response content. + :rtype: dict or string """ - url = '%s/%s.asmx/%s' % (self.api_url, api, endpoint) - log.debug('Request: %s %s', url, params) - params['token'] = self.security_token - try: - response = self.http.post(url, data=params) - except requests.RequestException as e: - raise ChemSpiPyError(six.text_type(e)) - if response.status_code == 500: - if 'Missing parameter: token.' in response.text: - raise ChemSpiPyAuthError('Endpoint requires a security token.') - elif 'Error converting data type nvarchar to uniqueidentifier' in response.text: - # Generally when supplying a security token with incorrect format - raise ChemSpiPyAuthError('Invalid security token. Did you copy the entire token?') - elif 'Unauthorized web service usage' in response.text: - # Fake/incorrect token (but in correct format) - raise ChemSpiPyAuthError(response.text) - elif 'Unable to get record details' in response.text: - # Generally when requesting a non-existent CSID - raise ChemSpiPyNotFoundError(response.text) - elif 'Unable to get records spectra' in response.text: - # No spectra for a CSID, shouldn't be an exception - return [] - else: - raise ChemSpiPyServerError(response.text) - try: - tree = etree.fromstring(response.content) - except etree.ParseError as e: - raise ChemSpiPyParseError('Unable to parse XML response: %s' % e) - return tree + return self.request('POST', api=api, namespace=namespace, endpoint=endpoint, json=json) + + +class LookupsApi(BaseChemSpider): + """""" + + def get_datasources(self): + """Get the list of datasources in ChemSpider. + + Many other endpoints let you restrict which sources are used to lookup the requested query. Restricting the + sources makes queries faster. + + :return: List of datasources. + :rtype: list[string] + """ + response = self.get(api='compounds', namespace='lookups', endpoint='datasources') + return response['dataSources'] + + +class RecordsApi(BaseChemSpider): + """""" + + def get_details(self, record_id, fields=FIELDS): + """Get details for a compound record. + + The available fields are listed in :data:`~chemspipy.api.FIELDS`. + + :param int record_id: Record ID. + :param list[string] fields: (Optional) List of fields to include in the result. + :return: Record details. + :rtype: dict + """ + params = {'fields': ','.join(fields)} + endpoint = '{}/details'.format(record_id) + response = self.get(api='compounds', namespace='records', endpoint=endpoint, params=params) + return response + + def get_details_batch(self, record_ids, fields=FIELDS): + """Get details for a list of compound records. + + The available fields are listed in :data:`~chemspipy.api.FIELDS`. + + :param list[int] record_ids: List of record IDs (up to 100). + :param list[string] fields: (Optional) List of fields to include in the results. + :return: List of record details. + :rtype: list[dict] + """ + json = {'recordIds': record_ids, 'fields': fields} + response = self.post(api='compounds', namespace='records', endpoint='batch', json=json) + return response['records'] + + def get_external_references(self, record_id, datasources=None): + """Get external references for a compound record. + + Optionally filter the results by data source. Use :meth:`~chemspipy.api.ChemSpider.get_datasources` to get the + available datasources. + + :param int record_id: Record ID. + :param list[string] datasources: (Optional) List of datasources to restrict the results to. + :return: External references. + :rtype: list[dict] + """ + params = {} + if datasources is not None: + params['dataSources'] = ','.join(datasources) + endpoint = '{}/externalreferences'.format(record_id) + response = self.get(api='compounds', namespace='records', endpoint=endpoint, params=params) + return response['externalReferences'] + + def get_image(self, record_id): + """Get image for a compound record. + + :param int record_id: Record ID. + :return: Image. + :rtype: bytes + """ + endpoint = '{}/image'.format(record_id) + response = self.get(api='compounds', namespace='records', endpoint=endpoint) + return base64.b64decode(response['image']) + + def get_mol(self, record_id): + """Get MOLfile for a compound record. + + :param int record_id: Record ID. + :return: MOLfile. + :rtype: string + """ + endpoint = '{}/mol'.format(record_id) + response = self.get(api='compounds', namespace='records', endpoint=endpoint) + return response['sdf'] + - def construct_api_url(self, api, endpoint, **params): - """Construct a Chemspider API url, encoded, with parameters as a GET querystring. +class FilterApi(BaseChemSpider): + """""" - :param string api: The specific ChemSpider API to call (MassSpecAPI, Search, Spectra, InChI). - :param string endpoint: ChemSpider API endpoint. - :param params: (Optional) Parameters for the ChemSpider endpoint as keyword arguments. + def filter_element(self, include_elements, exclude_elements=None, include_all=False, complexity=None, isotopic=None, + order=None, direction=None): + """Search compounds by element. + + Set include_all to true to only consider records that contain all of the elements in ``include_elements``, + otherwise all records that contain any of the elements will be returned. + + A compound with a complexity of 'multiple' has more than one disconnected system in it or a metal atom or ion. + + The accepted values for ``order`` are: :data:`~chemspipy.api.RECORD_ID`, :data:`~chemspipy.api.MASS_DEFECT`, + :data:`~chemspipy.api.MOLECULAR_WEIGHT`, :data:`~chemspipy.api.REFERENCE_COUNT`, + :data:`~chemspipy.api.DATASOURCE_COUNT`, :data:`~chemspipy.api.PUBMED_COUNT` or + :data:`~chemspipy.api.RSC_COUNT`. + + :param list[string] include_elements: List of up to 15 elements to search for compounds containing. + :param list[string] exclude_elements: List of up to 100 elements to exclude compounds containing. + :param bool include_all: (Optional) Whether to only include compounds that have all include_elements. + :param string complexity: (Optional) 'any', 'single', or 'multiple' + :param string isotopic: (Optional) 'any', 'labeled', or 'unlabeled'. + :param string order: (Optional) Field to sort the result by. + :param string direction: (Optional) :data:`~chemspipy.api.ASCENDING` or :data:`~chemspipy.api.DESCENDING`. + :return: Query ID that may be passed to ``filter_status`` and ``filter_results``. + :rtype: string + """ + json = { + 'includeElements': include_elements, + 'excludeElements': exclude_elements, + 'options': {'includeAll': include_all, 'complexity': complexity, 'isotopic': isotopic}, + 'orderBy': ORDERS.get(order), + 'orderDirection': DIRECTIONS.get(direction) + } + response = self.post(api='compounds', namespace='filter', endpoint='element', json=json) + return response['queryId'] + + def filter_formula(self, formula, datasources=None, order=None, direction=None): + """Search compounds by formula. + + Optionally filter the results by data source. Use :meth:`~chemspipy.api.ChemSpider.get_datasources` to get the + available datasources. + + The accepted values for ``order`` are: :data:`~chemspipy.api.RECORD_ID`, :data:`~chemspipy.api.MASS_DEFECT`, + :data:`~chemspipy.api.MOLECULAR_WEIGHT`, :data:`~chemspipy.api.REFERENCE_COUNT`, + :data:`~chemspipy.api.DATASOURCE_COUNT`, :data:`~chemspipy.api.PUBMED_COUNT` or + :data:`~chemspipy.api.RSC_COUNT`. + + :param string formula: Molecular formula. + :param list[string] datasources: (Optional) List of datasources to restrict the results to. + :param string order: (Optional) Field to sort the result by. + :param string direction: (Optional) :data:`~chemspipy.api.ASCENDING` or :data:`~chemspipy.api.DESCENDING`. + :return: Query ID that may be passed to ``filter_status`` and ``filter_results``. + :rtype: string + """ + json = { + 'formula': formula, + 'dataSources': datasources, + 'orderBy': ORDERS.get(order), + 'orderDirection': DIRECTIONS.get(direction) + } + response = self.post(api='compounds', namespace='filter', endpoint='formula', json=json) + return response['queryId'] + + def filter_formula_batch(self, formulas, datasources=None, order=None, direction=None): + """Search compounds with a list of formulas. + + Optionally filter the results by data source. Use :meth:`~chemspipy.api.ChemSpider.get_datasources` to get the + available datasources. + + The accepted values for ``order`` are: :data:`~chemspipy.api.RECORD_ID`, :data:`~chemspipy.api.MASS_DEFECT`, + :data:`~chemspipy.api.MOLECULAR_WEIGHT`, :data:`~chemspipy.api.REFERENCE_COUNT`, + :data:`~chemspipy.api.DATASOURCE_COUNT`, :data:`~chemspipy.api.PUBMED_COUNT` or + :data:`~chemspipy.api.RSC_COUNT`. + + :param list[string] formulas: Molecular formula. + :param list[string] datasources: (Optional) List of datasources to restrict the results to. + :param string order: (Optional) Field to sort the result by. + :param string direction: (Optional) :data:`~chemspipy.api.ASCENDING` or :data:`~chemspipy.api.DESCENDING`. + :return: Query ID that may be passed to ``filter_formula_batch_status`` and ``filter_formula_batch_results``. + :rtype: string + """ + json = { + 'formulas': formulas, + 'dataSources': datasources, + 'orderBy': ORDERS.get(order), + 'orderDirection': DIRECTIONS.get(direction) + } + response = self.post(api='compounds', namespace='filter', endpoint='formula/batch', json=json) + return response['queryId'] + + def filter_formula_batch_status(self, query_id): + """Get formula batch filter status using a query ID that was returned by a previous filter request. + + :param string query_id: Query ID from a previous formula batch filter request. + :return: Status dict with 'status', 'count', and 'message' fields. + :rtype: dict + """ + endpoint = 'formula/batch/{}/status'.format(query_id) + response = self.get(api='compounds', namespace='filter', endpoint=endpoint) + return response + + def filter_formula_batch_results(self, query_id): + """Get formula batch filter results using a query ID that was returned by a previous filter request. + + Each result is a dict containing a ``formula`` key and a ``results`` key. + + :param string query_id: Query ID from a previous formula batch filter request. + :return: List of results. + :rtype: list[dict] + """ + endpoint = 'formula/batch/{}/results'.format(query_id) + response = self.get(api='compounds', namespace='filter', endpoint=endpoint) + return response['batchResults'] + + def filter_inchi(self, inchi): + """Search compounds by InChI. + + :param string inchi: InChI. + :return: Query ID that may be passed to ``filter_status`` and ``filter_results``. + :rtype: string + """ + json = {'inchi': inchi} + response = self.post(api='compounds', namespace='filter', endpoint='inchi', json=json) + return response['queryId'] + + def filter_inchikey(self, inchikey): + """Search compounds by InChIKey. + + :param string inchikey: InChIKey. + :return: Query ID that may be passed to ``filter_status`` and ``filter_results``. + :rtype: string + """ + json = {'inchikey': inchikey} + response = self.post(api='compounds', namespace='filter', endpoint='inchikey', json=json) + return response['queryId'] + + def filter_intrinsicproperty(self, formula=None, molecular_weight=None, nominal_mass=None, average_mass=None, + monoisotopic_mass=None, molecular_weight_range=None, nominal_mass_range=None, + average_mass_range=None, monoisotopic_mass_range=None, complexity=None, isotopic=None, + order=None, direction=None): + """Search compounds by intrinsic property, such as formula and mass. + + At least one of formula, molecular_weight, nominal_mass, average_mass, monoisotopic_mass must be specified. + + A compound with a complexity of 'multiple' has more than one disconnected system in it or a metal atom or ion. + + The accepted values for ``order`` are: :data:`~chemspipy.api.RECORD_ID`, :data:`~chemspipy.api.MASS_DEFECT`, + :data:`~chemspipy.api.MOLECULAR_WEIGHT`, :data:`~chemspipy.api.REFERENCE_COUNT`, + :data:`~chemspipy.api.DATASOURCE_COUNT`, :data:`~chemspipy.api.PUBMED_COUNT` or + :data:`~chemspipy.api.RSC_COUNT`. + + :param string formula: Molecular formula. + :param float molecular_weight: Molecular weight. + :param float nominal_mass: Nominal mass. + :param float average_mass: Average mass. + :param float monoisotopic_mass: Monoisotopic mass. + :param float molecular_weight_range: Molecular weight range. + :param float nominal_mass_range: Nominal mass range. + :param float average_mass_range: Average mass range. + :param float monoisotopic_mass_range: Monoisotopic mass range. + :param string complexity: (Optional) 'any', 'single', or 'multiple' + :param string isotopic: (Optional) 'any', 'labeled', or 'unlabeled'. + :param string order: (Optional) Field to sort the result by. + :param string direction: (Optional) :data:`~chemspipy.api.ASCENDING` or :data:`~chemspipy.api.DESCENDING`. + :return: Query ID that may be passed to ``filter_status`` and ``filter_results``. + :rtype: string + """ + json = { + 'formula': formula, + 'options': {'complexity': complexity, 'isotopic': isotopic}, + 'orderBy': ORDERS.get(order), + 'orderDirection': DIRECTIONS.get(direction) + } + if molecular_weight is not None and molecular_weight_range is not None: + json['molecularWeight'] = {'mass': molecular_weight, 'range': molecular_weight_range} + if nominal_mass is not None and nominal_mass_range is not None: + json['nominalMass'] = {'mass': nominal_mass, 'range': nominal_mass_range} + if average_mass is not None and average_mass_range is not None: + json['averageMass'] = {'mass': average_mass, 'range': average_mass_range} + if monoisotopic_mass is not None and monoisotopic_mass_range is not None: + json['monoisotopicMass'] = {'mass': monoisotopic_mass, 'range': monoisotopic_mass_range} + response = self.post(api='compounds', namespace='filter', endpoint='intrinsicproperty', json=json) + return response['queryId'] + + def filter_mass(self, mass, mass_range, datasources=None, order=None, direction=None): + """Search compounds by mass. + + Filter to compounds within ``mass_range`` of the given ``mass``. + + Optionally filter the results by data source. Use :meth:`~chemspipy.api.ChemSpider.get_datasources` to get the + available datasources. + + The accepted values for ``order`` are: :data:`~chemspipy.api.RECORD_ID`, :data:`~chemspipy.api.MASS_DEFECT`, + :data:`~chemspipy.api.MOLECULAR_WEIGHT`, :data:`~chemspipy.api.REFERENCE_COUNT`, + :data:`~chemspipy.api.DATASOURCE_COUNT`, :data:`~chemspipy.api.PUBMED_COUNT` or + :data:`~chemspipy.api.RSC_COUNT`. + + :param float mass: Mass between 1 and 11000 Atomic Mass Units. + :param float mass_range: Mass range between 0.0001 and 100 Atomic Mass Units. + :param list[string] datasources: (Optional) List of datasources to restrict the results to. + :param string order: (Optional) Field to sort the result by. + :param string direction: (Optional) :data:`~chemspipy.api.ASCENDING` or :data:`~chemspipy.api.DESCENDING`. + :return: Query ID that may be passed to ``filter_status`` and ``filter_results``. :rtype: string """ - querystring = [] - for k, v in params.items(): - querystring.append('%s=%s' % (k, six.moves.urllib.parse.quote_plus(six.text_type(v)))) - if self.security_token: - querystring.append('token=%s' % self.security_token) - return '%s/%s.asmx/%s?%s' % (self.api_url, api, endpoint, '&'.join(querystring)) - - -def xml_to_dict(t): - """Convert a ChemSpider XML response to a python dict.""" - d = {} - for child in t: - tag = child.tag.split('}')[1] - tag, rtype = FIELDS.get(tag, (tag, six.text_type)) - if rtype == list: - d[tag] = [xml_to_dict(grandchild) for grandchild in child] - elif rtype == dict: - d[tag] = xml_to_dict(child) - elif child.text is not None: - d[tag] = rtype(child.text.strip()) - return d + json = { + 'mass': mass, + 'range': mass_range, + 'dataSources': datasources, + 'orderBy': ORDERS.get(order), + 'orderDirection': DIRECTIONS.get(direction) + } + response = self.post(api='compounds', namespace='filter', endpoint='mass', json=json) + return response['queryId'] + + def filter_mass_batch(self, masses, datasources=None, order=None, direction=None): + """Search compounds with a list of masses and mass ranges. + + The ``masses`` parameter should be a list of tuples, each with two elements: A mass, and a mass range:: + + qid = cs.filter_mass_batch(masses=[(12, 0.001), (24, 0.001)]) + + Optionally filter the results by data source. Use :meth:`~chemspipy.api.ChemSpider.get_datasources` to get the + available datasources. + + The accepted values for ``order`` are: :data:`~chemspipy.api.RECORD_ID`, :data:`~chemspipy.api.MASS_DEFECT`, + :data:`~chemspipy.api.MOLECULAR_WEIGHT`, :data:`~chemspipy.api.REFERENCE_COUNT`, + :data:`~chemspipy.api.DATASOURCE_COUNT`, :data:`~chemspipy.api.PUBMED_COUNT` or + :data:`~chemspipy.api.RSC_COUNT`. + + :param list[tuple[float, float]] masses: List of (mass, range) tuples. + :param list[string] datasources: (Optional) List of datasources to restrict the results to. + :param string order: (Optional) Field to sort the result by. + :param string direction: (Optional) :data:`~chemspipy.api.ASCENDING` or :data:`~chemspipy.api.DESCENDING`. + :return: Query ID that may be passed to ``filter_formula_batch_status`` and ``filter_formula_batch_results``. + :rtype: string + """ + masses = [{'mass': m, 'range': r} for m, r in masses] + json = { + 'masses': masses, + 'dataSources': datasources, + 'orderBy': ORDERS.get(order), + 'orderDirection': DIRECTIONS.get(direction) + } + response = self.post(api='compounds', namespace='filter', endpoint='mass/batch', json=json) + return response['queryId'] + + def filter_mass_batch_status(self, query_id): + """Get formula batch filter status using a query ID that was returned by a previous filter request. + + :param string query_id: Query ID from a previous formula batch filter request. + :return: Status dict with 'status', 'count', and 'message' fields. + :rtype: dict + """ + endpoint = 'mass/batch/{}/status'.format(query_id) + response = self.get(api='compounds', namespace='filter', endpoint=endpoint) + return response + + def filter_mass_batch_results(self, query_id): + """Get formula batch filter results using a query ID that was returned by a previous filter request. + + Each result is a dict containing a ``formula`` key and a ``results`` key. + + :param string query_id: Query ID from a previous formula batch filter request. + :return: List of results. + :rtype: list[dict] + """ + endpoint = 'mass/batch/{}/results'.format(query_id) + response = self.get(api='compounds', namespace='filter', endpoint=endpoint) + return response['batchResults'] + + def filter_name(self, name, order=None, direction=None): + """Search compounds by name. + + The accepted values for ``order`` are: :data:`~chemspipy.api.RECORD_ID`, :data:`~chemspipy.api.MASS_DEFECT`, + :data:`~chemspipy.api.MOLECULAR_WEIGHT`, :data:`~chemspipy.api.REFERENCE_COUNT`, + :data:`~chemspipy.api.DATASOURCE_COUNT`, :data:`~chemspipy.api.PUBMED_COUNT` or + :data:`~chemspipy.api.RSC_COUNT`. + + :param string name: Compound name. + :param string order: (Optional) Field to sort the result by. + :param string direction: (Optional) :data:`~chemspipy.api.ASCENDING` or :data:`~chemspipy.api.DESCENDING`. + :return: Query ID that may be passed to ``filter_status`` and ``filter_results``. + :rtype: string + """ + json = {'name': name, 'orderBy': ORDERS.get(order), 'orderDirection': DIRECTIONS.get(direction)} + response = self.post(api='compounds', namespace='filter', endpoint='name', json=json) + return response['queryId'] + + def filter_smiles(self, smiles): + """Search compounds by SMILES. + + :param string smiles: Compound SMILES. + :return: Query ID that may be passed to ``filter_status`` and ``filter_results``. + :rtype: string + """ + json = {'smiles': smiles} + response = self.post(api='compounds', namespace='filter', endpoint='smiles', json=json) + return response['queryId'] + + def filter_status(self, query_id): + """Get filter status using a query ID that was returned by a previous filter request. + + :param string query_id: Query ID from a previous filter request. + :return: Status dict with 'status', 'count', and 'message' fields. + :rtype: dict + """ + endpoint = '{}/status'.format(query_id) + response = self.get(api='compounds', namespace='filter', endpoint=endpoint) + return response + + def filter_results(self, query_id, start=None, count=None): + """Get filter results using a query ID that was returned by a previous filter request. + + :param string query_id: Query ID from a previous filter request. + :param int start: Zero-based results offset. + :param int count: Number of results to return. + :return: List of results. + :rtype: list[int] + """ + endpoint = '{}/results'.format(query_id) + params = {'start': start, 'count': count} + response = self.get(api='compounds', namespace='filter', endpoint=endpoint, params=params) + return response['results'] + + def filter_results_sdf(self, query_id): + """Get filter results as SDF file using a query ID that was returned by a previous filter request. + + :param string query_id: Query ID from a previous filter request. + :return: SDF file containing the results. + :rtype: bytes + """ + endpoint = '{}/results/sdf'.format(query_id) + response = self.get(api='compounds', namespace='filter', endpoint=endpoint) + return zlib.decompress(base64.b64decode(response['results']), 16 + zlib.MAX_WBITS) + + +class ToolsApi(BaseChemSpider): + """""" + + def convert(self, input, input_format, output_format): + """Convert a chemical from one format to another. + + Format: ``SMILES``, ``InChI``, ``InChIKey`` or ``Mol``. + + Allowed conversions: from InChI to InChIKey, from InChI to Mol file, from InChI to SMILES, from InChIKey to + InChI, from InChIKey to Mol file, from Mol file to InChI, from Mol file to InChIKey, from SMILES to InChI. + + :param string input: Input chemical. + :param string input_format: Input format. + :param string output_format: Output format. + :return: Input chemical in output format. + :rtype: string + """ + json = {'input': input, 'inputFormat': input_format, 'outputFormat': output_format} + response = self.post(api='compounds', namespace='tools', endpoint='convert', json=json) + return response['output'] + + def validate_inchikey(self, inchikey): + """Return whether ``inchikey`` is valid. + + :param string inchikey: The InChIKey to validate. + :return: Whether the InChIKey is valid. + :rtype: bool + """ + json = {'inchikey': inchikey} + try: + response = self.post(api='compounds', namespace='tools', endpoint='validate/inchikey', json=json) + return response['valid'] + except errors.ChemSpiPyHTTPError: + return False class MassSpecApi(BaseChemSpider): def get_databases(self): """Get the list of datasources in ChemSpider.""" - response = self.request('MassSpecApi', 'GetDatabases') - return [el.text for el in response] + warnings.warn('Use get_datasources instead of get_databases.', DeprecationWarning) + return self.get_datasources() def get_extended_compound_info(self, csid): """Get extended record details for a CSID. Security token is required. :param string|int csid: ChemSpider ID. """ - response = self.request('MassSpecApi', 'GetExtendedCompoundInfo', csid=csid) - return xml_to_dict(response) + warnings.warn('Use get_details instead of get_extended_compound_info.', DeprecationWarning) + return self.get_details(record_id=csid) def get_extended_compound_info_list(self, csids): """Get extended record details for a list of CSIDs. Security token is required. :param list[string|int] csids: ChemSpider IDs. """ - response = self.request('MassSpecApi', 'GetExtendedCompoundInfoArray', csids=csids) - return [xml_to_dict(result) for result in response] + warnings.warn('Use get_details_batch instead of get_extended_compound_info.', DeprecationWarning) + return self.get_details_batch(record_ids=csids) def get_extended_mol_compound_info_list(self, csids, mol_type=MOL2D, include_reference_counts=False, include_external_references=False): @@ -247,11 +668,8 @@ def get_extended_mol_compound_info_list(self, csids, mol_type=MOL2D, include_ref :param bool include_reference_counts: Whether to include reference counts. :param bool include_external_references: Whether to include external references. """ - response = self.request('MassSpecApi', 'GetExtendedMolCompoundInfoArray', csids=csids, - eMolType=DIMENSIONS.get(mol_type, mol_type), - includeReferenceCounts=include_reference_counts, - includeExternalReferences=include_external_references) - return [xml_to_dict(result) for result in response] + warnings.warn('Use get_details_batch instead of get_extended_mol_compound_info_list.', DeprecationWarning) + return self.get_details_batch(record_ids=csids) def get_record_mol(self, csid, calc3d=False): """Get ChemSpider record in MOL format. Security token is required. @@ -259,61 +677,10 @@ def get_record_mol(self, csid, calc3d=False): :param string|int csid: ChemSpider ID. :param bool calc3d: Whether 3D coordinates should be calculated before returning record data. """ - response = self.request('MassSpecApi', 'GetRecordMol', csid=csid, calc3d=calc3d) - return response.text - - def simple_search_by_formula(self, formula): - """Search ChemSpider by molecular formula. - - :param string formula: Molecular formula - :returns: A list of Compounds. - :rtype: list[:class:`~chemspipy.Compound`] - """ - warnings.warn("Use search_by_formula instead of simple_search_by_formula.", DeprecationWarning) - response = self.request('MassSpecApi', 'SearchByFormula2', formula=formula) - return [Compound(self, el.text) for el in response] - - def simple_search_by_mass(self, mass, mass_range): - """Search ChemSpider by mass +/- range. - - :param float mass: The mass to search for. - :param float mass_range: The +/- mass range to allow. - :returns: A list of Compounds. - :rtype: list[:class:`~chemspipy.Compound`] - """ - warnings.warn("Use search_by_mass instead of simple_search_by_mass.", DeprecationWarning) - response = self.request('MassSpecApi', 'SearchByMass2', mass=mass, range=mass_range) - return [Compound(self, el.text) for el in response] - - # def get_compressed_records_sdf(self, rid): - # """Get an SDF containing all the results from a search operation. - # - # A maximum of 10000 records can be fetched per request. Subscriber role security token is required. - # - # Warning: This doesn't work reliably. - # - # :param string rid: A transaction ID, returned by an asynchronous search method. - # :returns: SDF containing the requested records. - # :rtype: string - # """ - # response = self.request('MassSpecApi', 'GetCompressedRecordsSdf', rid=rid, eComp='eGzip') - # if response.text: - # return zlib.decompress(b64decode(response.text.encode('utf-8')), 16+zlib.MAX_WBITS) - # - # def get_records_sdf(self, rid): - # """Get an SDF containing all the results from a search operation. - # - # A maximum of 10000 records can be fetched per request. Subscriber role security token is required. - # - # Warning: This doesn't work reliably. - # - # :param string rid: A transaction ID, returned by an asynchronous search method. - # :returns: SDF containing the requested records. - # :rtype: string - # """ - # response = self.request('MassSpecApi', 'GetRecordsSdf', rid=rid) - # if response.text: - # return response.text.encode('utf-8') + warnings.warn('Use get_mol instead of get_record_mol.', DeprecationWarning) + if calc3d: + warnings.warn('calc3d parameter for get_record_mol is no longer supported.', DeprecationWarning) + return self.get_mol(record_id=csid) class SearchApi(BaseChemSpider): @@ -326,11 +693,11 @@ def async_simple_search(self, query): Security token is required. :param string query: Search query - a name, SMILES, InChI, InChIKey, CSID, etc. - :returns: Transaction ID. + :return: Transaction ID. :rtype: string """ - response = self.request('Search', 'AsyncSimpleSearch', query=query) - return response.text + warnings.warn('Use filter_name instead of async_simple_search.', DeprecationWarning) + return self.filter_name(name=query) def async_simple_search_ordered(self, query, order=CSID, direction=ASCENDING): """Search ChemSpider with arbitrary query, returning results with a custom order. @@ -340,17 +707,13 @@ def async_simple_search_ordered(self, query, order=CSID, direction=ASCENDING): Security token is required. :param string query: Search query - a name, SMILES, InChI, InChIKey, CSID, etc. - :param string order: :data:`~chemspipy.api.CSID`, :data:`~chemspipy.api.MASS_DEFECT`, - :data:`~chemspipy.api.MOLECULAR_WEIGHT`, :data:`~chemspipy.api.REFERENCE_COUNT`, - :data:`~chemspipy.api.DATASOURCE_COUNT`, :data:`~chemspipy.api.PUBMED_COUNT` or - :data:`~chemspipy.api.RSC_COUNT`. - :param string direction: :data:`~chemspipy.api.ASCENDING` or :data:`~chemspipy.api.DESCENDING`. - :returns: Transaction ID. + :param string order: (Optional) Field to sort the result by. + :param string direction: (Optional) :data:`~chemspipy.api.ASCENDING` or :data:`~chemspipy.api.DESCENDING`. + :return: Transaction ID. :rtype: string """ - response = self.request('Search', 'AsyncSimpleSearchOrdered', query=query, orderBy=ORDERS[order], - orderDirection=DIRECTIONS[direction]) - return response.text + warnings.warn('Use filter_name instead of async_simple_search.', DeprecationWarning) + return self.filter_name(name=query, order=order, direction=direction) def get_async_search_status(self, rid): """Check the status of an asynchronous search operation. @@ -358,12 +721,12 @@ def get_async_search_status(self, rid): Security token is required. :param string rid: A transaction ID, returned by an asynchronous search method. - :returns: Unknown, Created, Scheduled, Processing, Suspended, PartialResultReady, ResultReady, Failed, + :return: Unknown, Created, Scheduled, Processing, Suspended, PartialResultReady, ResultReady, Failed, TooManyRecords :rtype: string """ - response = self.request('Search', 'GetAsyncSearchStatus', rid=rid) - return response.text + warnings.warn('Use filter_status instead of get_async_search_status.', DeprecationWarning) + return self.filter_status(query_id=rid)['status'] def get_async_search_status_and_count(self, rid): """Check the status of an asynchronous search operation. If ready, a count and message are also returned. @@ -373,18 +736,19 @@ def get_async_search_status_and_count(self, rid): :param string rid: A transaction ID, returned by an asynchronous search method. :rtype: dict """ - response = self.request('Search', 'GetAsyncSearchStatusAndCount', rid=rid) - return xml_to_dict(response) + warnings.warn('Use filter_status instead of get_async_search_status_and_count.', DeprecationWarning) + return self.filter_status(query_id=rid) def get_async_search_result(self, rid): """Get the results from a asynchronous search operation. Security token is required. :param string rid: A transaction ID, returned by an asynchronous search method. - :returns: A list of Compounds. + :return: A list of Compounds. :rtype: list[:class:`~chemspipy.Compound`] """ - response = self.request('Search', 'GetAsyncSearchResult', rid=rid) - return [Compound(self, el.text) for el in response] + warnings.warn('Use filter_results instead of get_async_search_result.', DeprecationWarning) + results = self.filter_results(query_id=rid) + return [Compound(self, record_id) for record_id in results] def get_async_search_result_part(self, rid, start=0, count=-1): """Get a slice of the results from a asynchronous search operation. Security token is required. @@ -392,11 +756,14 @@ def get_async_search_result_part(self, rid, start=0, count=-1): :param string rid: A transaction ID, returned by an asynchronous search method. :param int start: The number of results to skip. :param int count: The number of results to return. -1 returns all through to end. - :returns: A list of Compounds. + :return: A list of Compounds. :rtype: list[:class:`~chemspipy.Compound`] """ - response = self.request('Search', 'GetAsyncSearchResultPart', rid=rid, start=start, count=count) - return [Compound(self, el.text) for el in response] + warnings.warn('Use filter_results instead of get_async_search_result_part.', DeprecationWarning) + if count == -1: + count = None + results = self.filter_results(query_id=rid, start=start, count=count) + return [Compound(self, record_id) for record_id in results] def get_compound_info(self, csid): """Get SMILES, StdInChI and StdInChIKey for a given CSID. Security token is required. @@ -404,8 +771,8 @@ def get_compound_info(self, csid): :param string|int csid: ChemSpider ID. :rtype: dict """ - response = self.request('Search', 'GetCompoundInfo', csid=csid) - return xml_to_dict(response) + warnings.warn('Use get_details instead of get_compound_info.', DeprecationWarning) + return self.get_details(record_id=csid) def get_compound_thumbnail(self, csid): """Get PNG image as binary data. @@ -413,86 +780,21 @@ def get_compound_thumbnail(self, csid): :param string|int csid: ChemSpider ID. :rtype: bytes """ - response = self.request('Search', 'GetCompoundThumbnail', id=csid) - return b64decode(response.text.encode('utf-8')) + warnings.warn('Use get_image instead of get_compound_thumbnail.', DeprecationWarning) + return self.get_image(record_id=csid) def simple_search(self, query): """Search ChemSpider with arbitrary query. - A maximum of 100 results are returned. Security token is required. + .. deprecated:: 2.0 + Use :meth:`~chemspipy.api.ChemSpider.search` instead. - :param string query: Search query - a name, SMILES, InChI, InChIKey, CSID, etc. - :returns: List of :class:`Compounds `. - :rtype: list[:class:`~chemspipy.Compound`] + :param string query: Search query - a chemical name. + :return: Search Results list. + :rtype: chemspipy.search.Results """ - response = self.request('Search', 'SimpleSearch', query=query) - return [Compound(self, el.text) for el in response] - - -class SpectraApi(BaseChemSpider): - - def get_all_spectra_info(self): - """Get full list of all spectra in ChemSpider. Subscriber role security token is required. - - rtype: list[dict] - """ - response = self.request('Spectra', 'GetAllSpectraInfo') - return [xml_to_dict(result) for result in response] - - def get_spectrum_info(self, spectrum_id): - """Get information for a specific spectrum ID. Subscriber role security token is required. - - :param string|int spectrum_id: spectrum ID. - :returns: Spectrum info. - :rtype: dict - """ - response = self.request('Spectra', 'GetSpectrumInfo', spc_id=spectrum_id) - return xml_to_dict(response) - - def get_compound_spectra_info(self, csid): - """Get information about all the spectra for a ChemSpider ID. Subscriber role security token is required. - - :param string|int csid: ChemSpider ID. - :returns: List of spectrum info. - :rtype: list[dict] - """ - response = self.request('Spectra', 'GetCompoundSpectraInfo', csid=csid) - return [xml_to_dict(result) for result in response] - - def get_spectra_info_list(self, csids): - """Get information about all the spectra for a list of ChemSpider IDs. - - :param list[string|int] csids: ChemSpider IDs. - :returns: List of spectrum info. - :rtype: list[dict] - """ - response = self.request('Spectra', 'GetSpectraInfoArray', csids=csids) - return [xml_to_dict(result) for result in response] - - -class InchiApi(BaseChemSpider): - - def get_original_mol(self, csid): - """Get original submitted MOL file. Security token is required. - - :param string|int csid: ChemSpider ID. - """ - response = self.request('InChI', 'CSIDToMol', csid=csid) - return response.text - - # TODO - # InChIKeyToCSID - inchi_key - csid - # InChIKeyToInChI - inchi_key - InChI - # InChIKeyToMol - inchi_key - Mol - # InChIToCSID - inchi - csid - # InChIToInChIKey - inchi - inchikey - # InChIToMol - inchi - mol - # InChIToSMILES - inchi - smiles - # IsValidInChIKey - inchi_key - bool - # MolToInChI - mol - inchi - # MolToInChIKey - mol - inchi - # ResolveInChIKey - inchi_key, out_format (MOL/SDF/SMILES/InChI) - list of strings - # SMILESToInChI - smiles - inchi + warnings.warn('Use search instead of simple_search.', DeprecationWarning) + return self.search(query=query) class CustomApi(BaseChemSpider): @@ -501,7 +803,7 @@ def get_compound(self, csid): """Return a Compound object for a given ChemSpider ID. Security token is required. :param string|int csid: ChemSpider ID. - :returns: The Compound with the specified ChemSpider ID. + :return: The Compound with the specified ChemSpider ID. :rtype: :class:`~chemspipy.Compound` """ return Compound(self, csid) @@ -510,76 +812,37 @@ def get_compounds(self, csids): """Return a list of Compound objects, given a list ChemSpider IDs. Security token is required. :param list[string|int] csids: List of ChemSpider IDs. - :returns: List of Compounds with the specified ChemSpider IDs. + :return: List of Compounds with the specified ChemSpider IDs. :rtype: list[:class:`~chemspipy.Compound`] """ return [Compound(self, csid) for csid in csids] - def get_spectrum(self, spectrum_id): - """Return a :class:`~chemspipy.Spectrum` object for a given spectrum ID. Subscriber role security token is required. - - :param string|int spectrum_id: Spectrum ID. - :returns: The Spectrum with the specified spectrum ID. - :rtype: :class:`~chemspipy.Spectrum` - """ - return Spectrum(self, spectrum_id) - - def get_spectra(self, spectrum_ids): - """Return a :class:`~chemspipy.Spectrum` object for a given spectrum ID. Subscriber role security token is required. - - :param list[string|int] spectrum_ids: List of spectrum IDs. - :returns: List of spectra with the specified spectrum IDs. - :rtype: list[:class:`~chemspipy.Spectrum`] - """ - return [Spectrum(self, spectrum_id) for spectrum_id in spectrum_ids] - - def get_compound_spectra(self, csid): - """Return :class:`~chemspipy.Spectrum` objects for all the spectra associated with a ChemSpider ID. - - :param csid: string|int csid: ChemSpider ID. - :returns: List of spectra for the specified ChemSpider ID. - :rtype: list[:class:`~chemspipy.Spectrum`] - """ - return [Spectrum.from_info_dict(self, info) for info in self.get_spectra_info_list([csid])] - - def get_all_spectra(self): - """Return a full list of :class:`~chemspipy.Spectrum` objects for all spectra in ChemSpider. - - Subscriber role security token is required. - - :returns: Full list of spectra in ChemSpider. - :rtype: list[:class:`~chemspipy.Spectrum`] - """ - return [Spectrum.from_info_dict(self, info) for info in self.get_all_spectra_info()] - def search(self, query, order=None, direction=ASCENDING, raise_errors=False): """Search ChemSpider for the specified query and return the results. Security token is required. + The accepted values for ``order`` are: :data:`~chemspipy.api.RECORD_ID`, :data:`~chemspipy.api.MASS_DEFECT`, + :data:`~chemspipy.api.MOLECULAR_WEIGHT`, :data:`~chemspipy.api.REFERENCE_COUNT`, + :data:`~chemspipy.api.DATASOURCE_COUNT`, :data:`~chemspipy.api.PUBMED_COUNT` or + :data:`~chemspipy.api.RSC_COUNT`. + :param string|int query: Search query. - :param string order: (Optional) :data:`~chemspipy.api.CSID`, :data:`~chemspipy.api.MASS_DEFECT`, - :data:`~chemspipy.api.MOLECULAR_WEIGHT`, :data:`~chemspipy.api.REFERENCE_COUNT`, - :data:`~chemspipy.api.DATASOURCE_COUNT`, :data:`~chemspipy.api.PUBMED_COUNT` or - :data:`~chemspipy.api.RSC_COUNT`. + :param string order: (Optional) Field to sort the result by. :param string direction: (Optional) :data:`~chemspipy.api.ASCENDING` or :data:`~chemspipy.api.DESCENDING`. - :param bool raise_errors: If True, raise exceptions. If False, store on Results ``exception`` property. - :returns: Search Results list. - :rtype: Results + :param bool raise_errors: (Optional) If True, raise exceptions. If False, store on Results ``exception`` + property. + :return: Search Results list. + :rtype: chemspipy.search.Results """ - if order and direction: - return Results(self, self.async_simple_search_ordered, (query, order, direction), raise_errors=raise_errors) - else: - return Results(self, self.async_simple_search, (query,), raise_errors=raise_errors) - - # TODO: Wrappers for subscriber role asynchronous searches + return Results(self, self.filter_name, (query, order, direction), raise_errors=raise_errors) -class ChemSpider(CustomApi, MassSpecApi, SearchApi, SpectraApi, InchiApi): +class ChemSpider(CustomApi, FilterApi, LookupsApi, RecordsApi, ToolsApi, MassSpecApi, SearchApi): """Provides access to the ChemSpider API. Usage:: >>> from chemspipy import ChemSpider - >>> cs = ChemSpider('') + >>> cs = ChemSpider('') """ diff --git a/chemspipy/errors.py b/chemspipy/errors.py index 01a10dd..78577e1 100644 --- a/chemspipy/errors.py +++ b/chemspipy/errors.py @@ -5,8 +5,6 @@ Exceptions raised by ChemSpiPy. -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. """ from __future__ import print_function @@ -19,26 +17,89 @@ class ChemSpiPyError(Exception): pass -class ChemSpiPyParseError(ChemSpiPyError): - """Raised when ChemSpiPy fails to parse a response from the ChemSpider servers.""" - pass +class ChemSpiPyHTTPError(ChemSpiPyError): + """Base exception to handle HTTP errors.""" + #: Default message if none supplied. Override in subclasses. + MESSAGE = 'ChemSpiPy Error' + HTTP_CODE = None -class ChemSpiPyAuthError(ChemSpiPyError): - """Raised when the security token doesn't have access to an endpoint.""" - pass + def __init__(self, message=None, http_code=None, *args, **kwargs): + """ + :param string|bytes message: Error message. + :param http_code: HTTP code. + """ -class ChemSpiPyNotFoundError(ChemSpiPyError): - """Raised when no record is present for the requested CSID.""" - pass + # Decode message to unicode if necessary + if isinstance(message, bytes): + try: + message = message.decode('utf-8') + except UnicodeDecodeError: + message = message.decode('iso-8859-1') + self.message = message if message is not None else self.MESSAGE + self.http_code = http_code if http_code is not None else self.HTTP_CODE + super(ChemSpiPyHTTPError, self).__init__(*args, **kwargs) -class ChemSpiPyTimeoutError(ChemSpiPyError): - """Raised when an asynchronous request times out.""" - pass + def __repr__(self): + args = 'message={!r}'.format(self.message) + if self.http_code is not None: + args += ', http_code={!r}'.format(self.http_code) + return '{}({})'.format(self.__class__.__name__, args) + + def __str__(self): + return self.message + + +class ChemSpiPyBadRequestError(ChemSpiPyHTTPError): + """Raised for a bad request.""" + MESSAGE = 'Bad request.' + HTTP_CODE = 400 + + +class ChemSpiPyAuthError(ChemSpiPyHTTPError): + """Raised when API key authorization fails.""" + MESSAGE = 'Unauthorized.' + HTTP_CODE = 401 + + +class ChemSpiPyNotFoundError(ChemSpiPyHTTPError): + """Raised when the requested resource was not found.""" + MESSAGE = 'Not found.' + HTTP_CODE = 404 -class ChemSpiPyServerError(ChemSpiPyError): - """Raised when ChemSpider returns a 500 status code with an error message.""" +class ChemSpiPyMethodError(ChemSpiPyHTTPError): + """Raised when an invalid HTTP method is used.""" + MESSAGE = 'Method Not Allowed.' + HTTP_CODE = 405 + + +class ChemSpiPyPayloadError(ChemSpiPyHTTPError): + """Raised when a request payload is too large.""" + MESSAGE = 'Payload Too Large.' + HTTP_CODE = 413 + + +class ChemSpiPyRateError(ChemSpiPyHTTPError): + """Raised when too many requests are sent in a given amount of time.""" + MESSAGE = 'Too Many Requests.' + HTTP_CODE = 429 + + +class ChemSpiPyServerError(ChemSpiPyHTTPError): + """Raised when an internal server error occurs.""" + MESSAGE = 'Internal Server Error.' + HTTP_CODE = 500 + + +class ChemSpiPyUnavailableError(ChemSpiPyHTTPError): + """Raised when the service is temporarily unavailable.""" + MESSAGE = 'Service Unavailable.' + HTTP_CODE = 503 + + +class ChemSpiPyTimeoutError(ChemSpiPyError): + """Raised when an asynchronous request times out.""" pass diff --git a/chemspipy/objects.py b/chemspipy/objects.py index b097557..093f96f 100644 --- a/chemspipy/objects.py +++ b/chemspipy/objects.py @@ -5,15 +5,15 @@ Objects returned by ChemSpiPy API methods. -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. """ from __future__ import print_function from __future__ import unicode_literals from __future__ import division +import warnings -from .utils import memoized_property, timestamp + +from .utils import memoized_property class Compound(object): @@ -23,14 +23,14 @@ class Compound(object): a compound given its ChemSpider ID. Information is loaded lazily when requested, and cached for future access. """ - def __init__(self, cs, csid): + def __init__(self, cs, record_id): """ :param ChemSpider cs: ``ChemSpider`` session. - :param int|string csid: ChemSpider ID. + :param int|string record_id: Compound record ID. """ self._cs = cs - self._csid = int(csid) + self._record_id = int(record_id) # TODO: Allow optional initialize with a record-type response from the API (kwarg or class method from_dict?). def __eq__(self, other): @@ -43,27 +43,26 @@ def _repr_png_(self): """For IPython notebook, display 2D image.""" return self.image + @property + def record_id(self): + """Compound record ID.""" + return self._record_id + @property def csid(self): """ChemSpider ID.""" - return self._csid - - # TODO: csid setter that clears cached properties? + warnings.warn('Use record_id instead of csid.', DeprecationWarning) + return self._record_id @property def image_url(self): """Return the URL of a PNG image of the 2D chemical structure.""" - return 'http://www.chemspider.com/ImagesHandler.ashx?id=%s' % self.csid + return 'http://www.chemspider.com/ImagesHandler.ashx?id=%s' % self.record_id @memoized_property - def _compound_info(self): + def _details(self): """Request compound info and cache the result.""" - return self._cs.get_compound_info(self.csid) - - @memoized_property - def _extended_compound_info(self): - """Request extended compound info and cache the result.""" - return self._cs.get_extended_compound_info(self.csid) + return self._cs.get_details(self.record_id) @property def molecular_formula(self): @@ -71,7 +70,7 @@ def molecular_formula(self): :rtype: string """ - return self._extended_compound_info['molecular_formula'] + return self._details['formula'] @property def smiles(self): @@ -79,7 +78,9 @@ def smiles(self): :rtype: string """ - return self._compound_info['smiles'] + return self._details['smiles'] + + # TODO: Convert tool to get inchi? @property def stdinchi(self): @@ -87,7 +88,8 @@ def stdinchi(self): :rtype: string """ - return self._compound_info['inchi'] + warnings.warn('Use inchi instead of stdinchi.', DeprecationWarning) + return self.inchi @property def stdinchikey(self): @@ -95,7 +97,8 @@ def stdinchikey(self): :rtype: string """ - return self._compound_info['inchikey'] + warnings.warn('Use inchikey instead of stdinchikey.', DeprecationWarning) + return self.inchikey @property def inchi(self): @@ -103,7 +106,7 @@ def inchi(self): :rtype: string """ - return self._extended_compound_info['inchi'] + return self._cs.convert(self.mol_2d, 'Mol', 'InChI') @property def inchikey(self): @@ -111,7 +114,7 @@ def inchikey(self): :rtype: string """ - return self._extended_compound_info['inchikey'] + return self._cs.convert(self.mol_2d, 'Mol', 'InChIKey') @property def average_mass(self): @@ -119,7 +122,7 @@ def average_mass(self): :rtype: float """ - return self._extended_compound_info['average_mass'] + return self._details['averageMass'] @property def molecular_weight(self): @@ -127,7 +130,7 @@ def molecular_weight(self): :rtype: float """ - return self._extended_compound_info['molecular_weight'] + return self._details['molecularWeight'] @property def monoisotopic_mass(self): @@ -135,7 +138,7 @@ def monoisotopic_mass(self): :rtype: float """ - return self._extended_compound_info['monoisotopic_mass'] + return self._details['monoisotopicMass'] @property def nominal_mass(self): @@ -143,23 +146,7 @@ def nominal_mass(self): :rtype: float """ - return self._extended_compound_info['nominal_mass'] - - @property - def alogp(self): - """Return the calculated AlogP for this Compound. - - :rtype: float - """ - return self._extended_compound_info['alogp'] - - @property - def xlogp(self): - """Return the calculated XlogP for this Compound. - - :rtype: float - """ - return self._extended_compound_info['xlogp'] + return self._details['nominalMass'] @property def common_name(self): @@ -167,7 +154,7 @@ def common_name(self): :rtype: string """ - return self._extended_compound_info['common_name'] + return self._details['commonName'] @memoized_property def mol_2d(self): @@ -175,7 +162,7 @@ def mol_2d(self): :rtype: string """ - return self._cs.get_record_mol(self.csid, calc3d=False) + return self._details['mol2D'] @memoized_property def mol_3d(self): @@ -183,15 +170,7 @@ def mol_3d(self): :rtype: string """ - return self._cs.get_record_mol(self.csid, calc3d=True) - - @memoized_property - def mol_raw(self): - """Return unprocessed MOL file for this Compound. - - :rtype: string - """ - return self._cs.get_original_mol(self.csid) + return self._details['mol3D'] @memoized_property def image(self): @@ -199,124 +178,12 @@ def image(self): :rtype: bytes """ - return self._cs.get_compound_thumbnail(self.csid) - - @memoized_property - def spectra(self): - """Return all the available spectral data for this Compound. - - :rtype: list[:class:`~chemspipy.Spectrum`] - """ - return [Spectrum.from_info_dict(self._cs, info) for info in self._cs.get_spectra_info_list([self.csid])] - - -class Spectrum(object): - """ A class for retrieving and caching details about a Spectrum.""" - - def __init__(self, cs, spectrum_id): - """Initializing a Spectrum from a spectrum ID requires a subscriber role security token. - - :param ChemSpider cs: ``ChemSpider`` session. - :param int|string spectrum_id: Spectrum ID. - """ - self._cs = cs - self._spectrum_id = int(spectrum_id) - - def __eq__(self, other): - return isinstance(other, Spectrum) and self.spectrum_id == other.spectrum_id - - def __repr__(self): - return 'Spectrum(%r)' % self.spectrum_id - - @classmethod - def from_info_dict(cls, cs, info): - """Initialize a Spectrum from an info dict that has already been retrieved.""" - s = cls(cs, info['spectrum_id']) - s._info = info - return s - - @property - def _spectrum_info(self): - """Full spectrum info. - - :rtype: dict - """ - if not hasattr(self, '_info'): - self._info = self._cs.get_spectrum_info(self._spectrum_id) - return self._info - - @property - def spectrum_id(self): - """Spectrum ID. - - :rtype: int - """ - return self._spectrum_id - - @property - def csid(self): - """ChemSpider ID of related compound. - - :rtype: int - """ - return self._spectrum_info['csid'] - - @property - def spectrum_type(self): - """Spectrum type. - - Possible values include HNMR, CNMR, IR, UV-Vis, NIR, EI, 2D1H1HCOSY, 2D1H13CD, APCI+, R, MALDI+, 2D1H13CLR, - APPI-, CI+ve, ESI+, 2D1H1HOESY, FNMR, CI-ve, ESI-, PNMR. - - :rtype: string - """ - return self._spectrum_info['spectrum_type'] - - @property - def file_name(self): - """Spectrum file name. - - :rtype: string - """ - return self._spectrum_info['file_name'] - - @property - def comments(self): - """Spectrum comments. Can be None. - - :rtype: string - """ - return self._spectrum_info.get('comments') - - @property - def url(self): - """Spectrum URL. - - :rtype: string - """ - return 'https://www.chemspider.com/FilesHandler.ashx?type=blob&disp=1&id=%s' % self.spectrum_id + return self._cs.get_image(self.record_id) @memoized_property - def data(self): - """Spectrum data file contents. Requires an additional request. Result is cached. - - :rtype: string - """ - r = self._cs.http.get(self.url) - return r.text - - @property - def original_url(self): - """Original spectrum URL. Can be None. - - :rtype: string - """ - return self._spectrum_info.get('original_url') - - @property - def submitted_date(self): - """Spectrum submitted date. + def external_references(self): + """Return external references for this Compound. - :rtype: :py:class:`datetime.datetime` + :rtype: list[string] """ - return timestamp(self._spectrum_info['submitted_date']) + return self._cs.get_external_references(self.record_id) diff --git a/chemspipy/search.py b/chemspipy/search.py index 12a413d..d3deab2 100644 --- a/chemspipy/search.py +++ b/chemspipy/search.py @@ -5,34 +5,25 @@ A wrapper for asynchronous search requests. -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. """ from __future__ import print_function from __future__ import unicode_literals from __future__ import division +import datetime import logging import threading import time -try: - from lxml import etree -except ImportError: - try: - import xml.etree.cElementTree as etree - except ImportError: - import xml.etree.ElementTree as etree +from six.moves import range -import six - -from .errors import ChemSpiPyServerError, ChemSpiPyTimeoutError -from .utils import duration +from . import errors, objects, utils log = logging.getLogger(__name__) +# TODO: Use Sequence abc metaclass? class Results(object): """Container class to perform a search on a background thread and hold the results when ready.""" @@ -46,12 +37,15 @@ def __init__(self, cs, searchfunc, searchargs, raise_errors=False, max_requests= :param int max_requests: Maximum number of times to check if search results are ready. """ log.debug('Results init') + self._cs = cs self._raise_errors = raise_errors self._max_requests = max_requests self._status = 'Created' self._exception = None + self._qid = None self._message = None - self._duration = None + self._start = None + self._end = None self._results = [] self._searchthread = threading.Thread(name='SearchThread', target=self._search, args=(cs, searchfunc, searchargs)) self._searchthread.start() @@ -59,34 +53,36 @@ def __init__(self, cs, searchfunc, searchargs, raise_errors=False, max_requests= def _search(self, cs, searchfunc, searchargs): """Perform the search and retrieve the results.""" log.debug('Searching in background thread') + self._start = datetime.datetime.utcnow() try: - rid = searchfunc(*searchargs) - log.debug('Setting rid: %s' % rid) - for _ in six.moves.range(self._max_requests): - log.debug('Checking status: %s' % rid) - status = cs.get_async_search_status_and_count(rid) + self._qid = searchfunc(*searchargs) + log.debug('Setting qid: %s' % self._qid) + for _ in range(self._max_requests): + log.debug('Checking status: %s' % self._qid) + status = cs.filter_status(self._qid) self._status = status['status'] self._message = status.get('message', '') - self._duration = duration(status['elapsed']) log.debug(status) time.sleep(0.2) - if status['status'] == 'ResultReady': + if status['status'] == 'Complete': break - elif status['status'] in {'Failed', 'Unknown', 'Suspended'}: - raise ChemSpiPyServerError('Search Failed: %s' % status.get('message', '')) - elif status['status'] == 'TooManyRecords': - raise ChemSpiPyServerError('Too many results') + elif status['status'] in {'Failed', 'Unknown', 'Suspended', 'Not Found'}: + raise errors.ChemSpiPyServerError('Search Failed: %s' % status.get('message', '')) else: - raise ChemSpiPyTimeoutError('Search took too long') + raise errors.ChemSpiPyTimeoutError('Search took too long') log.debug('Search success!') + self._end = datetime.datetime.utcnow() if status['count'] > 0: - self._results = cs.get_async_search_result(rid) + self._results = [objects.Compound(cs, csid) for csid in cs.filter_results(self._qid)] log.debug('Results: %s', self._results) elif not self._message: self._message = 'No results found' except Exception as e: # Catch and store exception so we can raise it in the main thread self._exception = e + self._end = datetime.datetime.utcnow() + if self._status == 'Created': + self._status = 'Failed' def ready(self): """Return True if the search finished. @@ -113,8 +109,7 @@ def wait(self): def status(self): """Current status string returned by ChemSpider. - - :returns: 'Unknown', 'Created', 'Scheduled', 'Processing', 'Suspended', 'PartialResultReady', 'ResultReady' + :return: 'Unknown', 'Created', 'Scheduled', 'Processing', 'Suspended', 'PartialResultReady', 'ResultReady' :rtype: string """ return self._status @@ -125,6 +120,14 @@ def exception(self): self.wait() # TODO: If raise_errors=True this will raise the exception when trying to access it? return self._exception + @property + def qid(self): + """Search query ID. + + :rtype: string + """ + return self._qid + @property def message(self): """A contextual message about the search. Blocks until the search is finished. @@ -149,19 +152,17 @@ def duration(self): :rtype: :py:class:`datetime.timedelta` """ self.wait() - return self._duration - - # @memoized_property - # def sdf(self): - # """Get an SDF containing all the search results. - # - # Warning: The SDF API endpoints don't seem to work properly. - # - # :rtype: string - # :returns: SDF containing the search results. - # """ - # self.wait() - # return self._cs.get_records_sdf(self._rid) + return self._end - self._start + + @utils.memoized_property + def sdf(self): + """Get an SDF containing all the search results. + + :return: SDF containing the search results. + :rtype: bytes + """ + self.wait() + return self._cs.filter_results_sdf(self._qid) def __getitem__(self, index): """Get a single result or a slice of results. Blocks until the search is finished. @@ -189,9 +190,3 @@ def __repr__(self): return 'Results(%s)' % self._results else: return 'Results(%s)' % self.status - - -# TODO: fetch method that gets the property values for every Compound in the list of results. -# Do this by running get_extended_mol_compound_info_list and then inserting info into Compounds -# Do multiple requests in chunks of 250 Compounds if necessary -# Compound will need a method to insert info from JSON response diff --git a/chemspipy/utils.py b/chemspipy/utils.py index b1ac9f4..9c0ca99 100644 --- a/chemspipy/utils.py +++ b/chemspipy/utils.py @@ -5,8 +5,6 @@ Miscellaneous utility functions. -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. """ from __future__ import print_function diff --git a/docs/source/api.rst b/docs/source/api.rst index ce88156..bfccda0 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -14,24 +14,15 @@ This part of the documentation is automatically generated from the ChemSpiPy sou .. automethod:: get_compound(csid) .. automethod:: get_compounds(csids) - .. automethod:: get_spectrum(spectrum_id) - .. automethod:: get_spectra(spectrum_ids) - .. automethod:: get_compound_spectra(csid) - .. automethod:: get_all_spectra() .. automethod:: search(query, order=None, direction=ASCENDING, raise_errors=False) .. automethod:: simple_search(query) .. automethod:: get_record_mol(csid, calc3d=False) - .. automethod:: get_original_mol(csid) .. automethod:: get_compound_thumbnail(csid) .. automethod:: get_databases() .. automethod:: get_compound_info(csid) .. automethod:: get_extended_compound_info(csid) .. automethod:: get_extended_compound_info_list(csids) .. automethod:: get_extended_mol_compound_info_list(csids, mol_type=MOL2D, include_reference_counts=False, include_external_references=False) - .. automethod:: get_compound_spectra_info(csid) - .. automethod:: get_spectrum_info(spectrum_id) - .. automethod:: get_spectra_info_list(csids) - .. automethod:: get_all_spectra_info() .. automethod:: request(api, endpoint, **params) .. automethod:: construct_api_url(api, endpoint, **params) .. automethod:: async_simple_search(query) @@ -59,8 +50,6 @@ This part of the documentation is automatically generated from the ChemSpiPy sou .. automodule:: chemspipy.objects .. autoclass:: chemspipy.Compound() :members: -.. autoclass:: chemspipy.Spectrum() - :members: .. automodule:: chemspipy.search .. autoclass:: chemspipy.Results() diff --git a/docs/source/conf.py b/docs/source/conf.py index 7d406ad..e885122 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -43,7 +43,7 @@ # General information about the project. project = u'ChemSpiPy' -copyright = u'2014, Matt Swain' +copyright = u'2018, Matt Swain' # The version info for the project you're documenting, acts as replacement for |version| and |release|, also used in # various other places throughout the built documents. @@ -244,16 +244,3 @@ # Concatenate the class and __init__ docstrings together autoclass_content = 'both' - - -def process_docstring(app, what, name, obj, options, lines): - """Filter out meta fields from module docstrings when used by autodoc.""" - if not what == 'module': - return - for l in reversed(lines): - if l.startswith(':copyright:') or l.startswith(':license:'): - lines.remove(l) - - -def setup(app): - app.connect('autodoc-process-docstring', process_docstring) diff --git a/docs/source/guide/advanced.rst b/docs/source/guide/advanced.rst index b68ef1f..176f4b0 100644 --- a/docs/source/guide/advanced.rst +++ b/docs/source/guide/advanced.rst @@ -3,31 +3,30 @@ Advanced ======== -Keep your security token secret -------------------------------- +Keep your API key secret +------------------------ -Be careful not to include your security token when sharing code. A simple way to ensure this doesn't happen by accident -is to store your security token as an environment variable that can be specified in your `.bash_profile` or `.zshrc` -file:: +Be careful not to include your API key when sharing code. A simple way to ensure this doesn't happen by accident is to +store your API key as an environment variable that can be specified in your `.bash_profile` or `.zshrc` file:: - export CHEMSPIDER_SECURITY_TOKEN= + export CHEMSPIDER_API_KEY= This can then be retrieved in your scripts using ``os.environ``:: - >>> CST = os.environ['CHEMSPIDER_SECURITY_TOKEN'] - >>> cs = ChemSpider(security_token=CST) + >>> api_key = os.environ['CHEMSPIDER_API_KEY'] + >>> cs = ChemSpider(api_key) Specify a User Agent -------------------- -As well as using your security token, it is possible to identify your program to the ChemSpider servers using a User +As well as using your API key, it is possible to identify your program to the ChemSpider servers using a User Agent string. You can specify a custom User Agent through ChemSpiPy through the optional ``user_agent`` parameter to the ChemSpider class:: >>> from chemspipy import ChemSpider - >>> cs = ChemSpider('', user_agent='My program 1.3, ChemSpiPy 1.0.5, Python 2.7') + >>> cs = ChemSpider('', user_agent='My program 1.3, ChemSpiPy 1.0.5, Python 3.6') Logging ------- diff --git a/docs/source/guide/compound.rst b/docs/source/guide/compound.rst index ece5c6f..7c17c4a 100644 --- a/docs/source/guide/compound.rst +++ b/docs/source/guide/compound.rst @@ -61,7 +61,6 @@ Compound properties - ``mol_3d``: MOL file containing 3D coordinates. - ``mol_raw``: Unprocessed MOL file. - ``image``: 2D depiction as binary data in PNG format. -- ``spectra``: List of spectra. Implementation details ---------------------- diff --git a/docs/source/guide/gettingstarted.rst b/docs/source/guide/gettingstarted.rst index c571476..a3a8fe4 100644 --- a/docs/source/guide/gettingstarted.rst +++ b/docs/source/guide/gettingstarted.rst @@ -9,7 +9,7 @@ Before we start --------------- - Make sure you have :ref:`installed ChemSpiPy `. -- :ref:`Obtain a security token ` from the ChemSpider web site. +- :ref:`Obtain an API key ` from the ChemSpider web site. First steps ----------- @@ -18,9 +18,9 @@ Start by importing ChemSpider:: >>> from chemspipy import ChemSpider -Then connect to ChemSpider by creating a ``ChemSpider`` instance using your security token:: +Then connect to ChemSpider by creating a ``ChemSpider`` instance using your API key:: - >>> cs = ChemSpider('') + >>> cs = ChemSpider('') All your interaction with the ChemSpider database should now happen through this ChemSpider object, ``cs``. diff --git a/docs/source/guide/intro.rst b/docs/source/guide/intro.rst index 0186521..a61a267 100644 --- a/docs/source/guide/intro.rst +++ b/docs/source/guide/intro.rst @@ -7,29 +7,18 @@ ChemSpiPy is a Python wrapper that allows simple access to the web APIs offered interface for users to access and query the ChemSpider database using Python, facilitating programs that can automatically carry out the tasks that you might otherwise perform manually via the `ChemSpider website`_. -The ChemSpider website has `full documentation for the ChemSpider APIs`_. It can be useful to browse through this +The RSC website has `full documentation for the ChemSpider APIs`_. It can be useful to browse through this documentation before getting started with ChemSpiPy to get an idea of what sort of features are available. -.. _securitytoken: +.. _apikey: -Obtaining a security token --------------------------- +Obtaining an API key +-------------------- -Access to the ChemSpider API is free to academic users. Commercial users should contact the ChemSpider team to obtain -access. +The Royal Society of Chemistry web services are currently available as an Open Developer Preview. During the preview you +can make 1000 calls per month. For an increased allowance, contact `api@rsc.org`_. -Most operations require a "security token" that is issued to you automatically when you `register for a RSC ID`_ and -then sign in to ChemSpider. Once you have done this, you can find your security token on your -`ChemSpider User Profile`_. - -Some operations require a further "Service Subscriber" role. Contact the ChemSpider team to discuss upgrading your user -account for access to these features. - -.. warning:: - - Make sure you copy the entire token from the Chemspider profile page. The text field is quite narrow so you may have - to drag across to the right to select the entire token. The token format should be - ``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``. +All operations require an API key. To obtain one, `Register for a RSC Developers account`_ and then `Add a new key`_. ChemSpiPy license ----------------- @@ -37,6 +26,7 @@ ChemSpiPy license .. include:: ../../../LICENSE .. _`ChemSpider website`: http://www.chemspider.com -.. _`full documentation for the ChemSpider APIs`: http://www.chemspider.com/AboutServices.aspx -.. _`register for a RSC ID`: https://www.rsc.org/rsc-id/sign-in -.. _`ChemSpider User Profile`: http://www.chemspider.com/UserProfile.aspx +.. _`full documentation for the ChemSpider APIs`: https://developer.rsc.org/compounds-v1/apis +.. _`api@rsc.org`: api@rsc.org +.. _`Register for a RSC Developers account`: https://developer.rsc.org/user/register +.. _`Add a new key`: https://developer.rsc.org/user/me/apps diff --git a/docs/source/guide/misc.rst b/docs/source/guide/misc.rst index 8c23a66..b946bb6 100644 --- a/docs/source/guide/misc.rst +++ b/docs/source/guide/misc.rst @@ -3,22 +3,10 @@ Miscellaneous ============= -Constructing API URLs ---------------------- - -See the `ChemSpider API documentation`_ for more details. - - >>> cs.construct_api_url('MassSpec', 'GetExtendedCompoundInfo', csid='2157') - u'http://www.chemspider.com/MassSpec.asmx/GetExtendedCompoundInfo?csid=2157' - Data sources ------------ Get a list of data sources in ChemSpider:: - >>> cs.get_databases() + >>> cs.get_datasources() ['Abacipharm', 'Abblis Chemicals', 'Abcam', 'ABI Chemicals', 'Abmole Bioscience', 'ACB Blocks', 'Accela ChemBio', ... ] - - - -.. _`ChemSpider API documentation`: http://www.chemspider.com/AboutServices.aspx diff --git a/docs/source/guide/spectra.rst b/docs/source/guide/spectra.rst deleted file mode 100644 index f2ceb13..0000000 --- a/docs/source/guide/spectra.rst +++ /dev/null @@ -1,59 +0,0 @@ -.. _spectra: - -Spectra -======= - -Many compound records in ChemSpider have spectra associated with them. - -Retrieving spectra ------------------- - -If there are spectra available for a :class:`~chemspipy.Compound`, you can retrieve them using the ``spectra`` -property:: - - >>> compound = cs.get_compound(2157) - >>> print(compound.spectra) - [Spectrum(2303), Spectrum(2304), Spectrum(3558), Spectrum(6639), Spectrum(6640), Spectrum(6641), Spectrum(6642), Spectrum(6643), Spectrum(6644), Spectrum(6645), Spectrum(8553), Spectrum(8554)] - - - - - -Alternatively, you can get spectra directly by using either the compound ChemSpider ID or the Spectrum ID:: - - >>> cs.get_spectrum(362) - Spectrum(362) - >>> cs.get_compound_spectra(71358) - [Spectrum(360), Spectrum(361), Spectrum(3172)] - -Spectrum metadata ------------------ - -Each :class:`~chemspipy.Spectrum` object has a number of properties:: - - >>> spectrum = cs.get_spectrum(3558) - >>> print(spectrum.spectrum_id) - 3558 - >>> print(spectrum.csid) - 2157 - >>> print(spectrum.spectrum_type) - HNMR - >>> print(spectrum.file_name) - Spectrum_315.jdx - >>> print(spectrum.comments) - collected by David Bulger at Oral Roberts University on a JEOL 300 MHz NMR with methanol as the solvent - >>> print(spectrum.original_url) - http://onschallenge.wikispaces.com/Exp072 - >>> print(spectrum.url) - http://www.chemspider.com/FilesHandler.ashx?type=blob&disp=1&id=3558 - -Spectrum data -------------- - -The data file for each spectrum is also available using the data property:: - - >>> spectra = cs.get_compound_spectra(2424) - >>> caffeine_ir = spectra[8] - >>> print(caffeine_ir.data) - -Typically this is in JCAMP-DX format. diff --git a/docs/source/index.rst b/docs/source/index.rst index 658cdbd..b6d548b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -9,7 +9,7 @@ ChemSpiPy downloads, depiction and retrieval of chemical properties. Here's a quick peek:: >>> from chemspipy import ChemSpider - >>> cs = ChemSpider('') + >>> cs = ChemSpider('') >>> c1 = cs.get_compound(236) # Specify compound by ChemSpider ID >>> c2 = cs.search('benzene') # Search using name, SMILES, InChI, InChIKey, etc. @@ -21,7 +21,6 @@ Features - Get identifiers and calculated properties for any compound record in ChemSpider. - Download compound records as a MOL file with 2D or 3D coordinates. - Get a 2D compound depiction as a PNG image. -- Retrieve all available spectral information for a specific compound. - Complete interface to every endpoint of the ChemSpider Web APIs. - Supports Python versions 2.7 – 3.4. @@ -38,7 +37,6 @@ A step-by-step guide to getting started with ChemSpiPy. guide/gettingstarted guide/compound guide/searching - guide/spectra guide/misc guide/advanced guide/contributing diff --git a/environment.yml b/environment.yml index cab9429..41f6b9e 100644 --- a/environment.yml +++ b/environment.yml @@ -3,6 +3,5 @@ channels: - mcs07 - defaults dependencies: -- lxml=3.7.3 - requests=2.13.0 - six=1.10.0 diff --git a/examples/Spectra.ipynb b/examples/Spectra.ipynb deleted file mode 100644 index 3a1369d..0000000 --- a/examples/Spectra.ipynb +++ /dev/null @@ -1,588 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "source": [ - "# ChemSpiPy: Spectra\n", - "\n", - "Here are some examples of using ChemSpiPy to retrieve spectra from ChemSpider.\n", - "\n", - "First we'll start by setting up our ChemSpider session in the usual way:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [], - "source": [ - "import os\n", - "from chemspipy import ChemSpider" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [], - "source": [ - "# Security token is retrieved from environment variables\n", - "CHEMSPIDER_SECURITY_TOKEN = os.environ['CHEMSPIDER_SECURITY_TOKEN']\n", - "cs = ChemSpider(security_token=CHEMSPIDER_SECURITY_TOKEN)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "source": [ - "## Retrieving Spectra\n", - "\n", - "Get the spectra for a compound:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJYAAACWCAYAAAA8AXHiAAAAAXNSR0IArs4c6QAAAARnQU1BAACx\njwv8YQUAAAAgY0hSTQAAeiYAAICEAAD6AAAAgOgAAHUwAADqYAAAOpgAABdwnLpRPAAACo1JREFU\neF7tnTGIVD0Qx/0EwU4RBEsbwcLCwkLEQrFQO68QxEq8QkFQwQPtBEEsLS1ULLUTC7G0tBJLsRIU\ntLRQEBRc95919svt5Xbf22QyeTMTWE6z702Smd/m5U0yyX+jcdriyTVQWgMAy5NroLQGtpQW6PJc\nA+Ep6GpwDXBowMHi0KrL9B7LGeDRgPdYPHo1L9XBMo8AjwIcLB69mpfqYJlHgEcBDhaPXs1LdbDM\nI8CjAAeLR6/mpTpY5hHgUYCDxaNX81IdLPMI8CjAweLRq3mpDpZ5BHgU4GDx6NW8VAfLPAI8CnCw\nePRqXqqDZR4BHgU4WDx6NS/VwTKPAI8CHCwevZqX6mCZR4BHAQ4Wj17NS3WwzCPAowAHi0ev5qU6\nWOYR4FGAg8WjV/NSHSzzCPAowMHi0at5qQ6WeQR4FOBg8ejVvFQTYP38+XO0uro6unnz5uj27dtF\nPzdu3AiyPa3XgAmwrl69in1WWT9ra2vOVqQB9WB9/fp1tH379tG2bdtG165dK9pbofe7dOlSAHbP\nnj2jb9++OVz/NKAerAsXLgTDX79+nc3oly9fZi+DrfJMglWD9e7du2lvgnEWV6JeET3j+/fvuYoZ\nlFzVYB08eDCA9eTJk6lRAFsJyPDYiyG6f/9+KOvMmTODAoCrsmrBevr0aTA04KIEwJB37969bH3i\n0QpZr1+/DrIA6969e0Pe8+fPs+UPXYBKsGBkDKZh5Ddv3kwNT3notXJTClwARTCX6BVz6yh5v0qw\nbt26teGxRHnnzp0rpu/Dhw+Hch48eDCVeerUqZCHR6PlpA6seCCNfyOl8koYPX45IFdDnEfllyhr\naDLUgYXBM3oM+JgopfJKGSrlzqDxF6eLo1T9ueSoAgvjKXJW0hgnlVdSmSlXA/J27twZHLMlxnMl\n61tLliqwyL2AgTWlVF5p5eItc9bVYN39oAYsDKBhXAyoKZF7IXY5lIYK8uK3UHI1IG///v1m3Q8q\nwIoNS4+eVB4HVCSTXA2AiR7Dlt0PKsCiwTIG0pQ43AuLwCT3Q+xqsOp+GDxYmFbBIBkfer3/+PHj\nhrxFUJT4nlwNGLhTXZCHusV5JcpqXcbgwSJXQtxLUF6JqZu+BqSVDvhLyaL7YdBgvXr1KgyOMUdH\n4xrM3c26HPrCkXM9uR9QBxrvIa/kdFJO/WrdO1iwABK5EuJJ31ReLWVSOeRqOHbs2LRoysOYy0Ia\nLFj0yDl+/PjUToAN45nY5SBhRFrpgF6KUux+sDCPOFiwTpw4ER55Kysr69jBY6eFlQWYO5xdqky9\naTz+kgC/RpmDBau2nyrXGLFPK1fWEO4fLFhQbi3Peq4h4x8BXjgspEGDBQOllh+3ZrjUXGJrdSxd\nn8GDxb16IVfh8eoHOG6tpMGDBUNhVSgG8pjGaS3VCD9rrc2ojwqwWu0VaoWfOViMGmhxHEPjv3hN\nPKMKmhKtoseCRuM3LwrJktR0KopHsj61y1YDFhTXiq8oFX5W27DS5akCC8rE/Jx0+BUCOaxHRasD\nSzr8iivUTLoH6lu+OrCgAMndX2gtWIuuj75w5FyvEiyp8KvWnbU5oPS9VyVYUMK89U+Yr8NYbNkP\neqXUm2eNULO+Bt5wPba13Lfv/w/TNpdqwdpsIWA8wM/ZPjIO3IDMQUyIHz06domPTY4P4KJ/I79w\nUgtW7H6IQ7KQj3VS6HFyPvGar0Es4bl7dwISIPrwYYIR/hJs+L5gUg0W9JQKtiiovyCKgiVK7mRT\nuo7THurffl5T+fg/9WAFC1UPFoWHYXPbZ8+eZfVSqR7u8ePHYePcOPysoH3KiaJxVUrivO+WrIF6\nsKCXQ4cOsW7FjbHa2bNnlzRBpdscrLKKxvhn9+7dAazz588X34775MmTQfa+seFaWGu/qfZosO6P\nwjKA1Qi1rzGOy9ZGPHgnuPDXB+/9VVsr1H4wYfTubugPUeoOzp38ZstrNYwey3fgbvnx48ekyu4g\nzYOrdqh9i2H0sX+t9sEGat8KJULtWwujT40v0YPB38b9oqESrNTufnn9X7e7500jdZNQ7qpFu0dz\nHyilDiwoTHJnl9QOOOVw6S5p3u7RNXZzVgcWDaIl90eQdj8s2j2au7cC/qrASu3u1/03Xu5K6XrM\n2z26VsSQKrCke4oYTameM7V8R2JJjxqwUrsWl+uD+kuSGOst2j26ZlicCrBaehuLEUzt7Ncf0e53\npKKDUgdWdZe4/JUqwGrNfxSbg8Y73L1Fyr1Qa0orhV93sDZbdrFZfqWpA6nAia6/Zcwjfvr0iX0q\nJbUximTEEA9YFSc7W52jWwcesz5Sm4/UntKa/aGVB6vi2mrp4NROvVYFfaQ2n5OOGCoPVsUFZRRO\nf+TIEfa5r04QpS5i1kdqvwqa0uI+nGqeTvqBRYvu47i02YX4HZfAwreybFwf7jtw4EBYublr1y7x\nvRrmQtdRH3ijW0YfODxh69atYQUrVrO2EjEkBhYduZYT24d779y50/ZZNR3Aov3pc3WB+6kXn417\nXLrHXfLGfmBBSbNpVnEdu368zS0T13flypXQQ50+fXr08uXLUJumB/Ad9QHXwDL6oHsePnwY9IKI\noR07dkwPiVqSi+zbyoPFvLaa3AuDOauGWR8xAbTf6cWLF7PByBVQHizUiPn1mhyi8dEmtb3cvRTP\nrA+qS+p86l71LHgxD1ioIKODNJ7CSZ3/HB/aVFBXeaIY9RFXrJWzqLuDlafW4nfTazYW9bVypFzx\nRi4hkA6IwlBB8hSMwYIFnaemLGpG5ixh9yq3tHAW9aDBaunY3irE9ChE+izqQYMVuxrQU1GqEf3c\nw8Yil0pPdw0erNT+7r4d9oRlSf/e4MGCAlNzY9Y38IdeJINoVYAFJdJsfnwsLvxceDuqFUAg8sxb\nUKjUIkg1YMUhTxTeZPmQJOJNatm2GrBi90MckEnTHIDMapIIolUFVjylEYP0588fq0xN24235keP\nHo1+/fpVRReqwILGKFLl8+fPEwVWmkqpYq2MQn7//v1PHath7RY+R8dzmC9evFgnlb6bLWqz/M2q\npA4sNPTt27eT9laa/M2wd9VbARJtawlQaP1XDJeDtcgkFdaaL6pCS9/fHesDIAGuL1++THsvyqO6\nOliLrNZxgd0iMVq+p97qAx0eMG4YAFtZWVl3fIuDtcjiHZYELxKh6fuuYyR6RNL19JceoV11onKM\nFRrvYHUalKcG6fE4zMHaqKHJUR6V9jXv+kuWuo56In8U5lqg4lrz3KrWuH9tba3o4D2OKErVX++j\n0N0N6+z9/fv34Lsq4W6AjDjN/h/f6QYLLXQH6ZQBwLU61kdpB6lNsGo8Z4yWQY9De49Cowav3Wzv\nsWprXGl5PsZSatgWmmX7rbAFCxitg/63QqOGlW62gyVtAaXlO1hKDSvdLAdL2gJKy3ewlBpWulkO\nlrQFlJbvYCk1rHSzHCxpCygt38FSaljpZjlY0hZQWr6DpdSw0s1ysKQtoLR8B0upYaWb5WBJW0Bp\n+Q6WUsNKN8vBkraA0vIdLKWGlW6WgyVtAaXlO1hKDSvdLAdL2gJKy3ewlBpWulkOlrQFlJbvYCk1\nrHSzHCxpCygt38FSaljpZv0FqtR95byLG1IAAAAASUVORK5CYII=\n", - "text/plain": [ - "Compound(2157)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "compound = cs.get_compound(2157)\n", - "compound" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2157\n", - "CC(=O)Oc1ccccc1C(=O)O\n", - "[Spectrum(2303), Spectrum(2304), Spectrum(3558), Spectrum(6639), Spectrum(6640), Spectrum(6641), Spectrum(6642), Spectrum(6643), Spectrum(6644), Spectrum(6645), Spectrum(8553), Spectrum(8554)]\n" - ] - } - ], - "source": [ - "print(compound.csid)\n", - "print(compound.smiles)\n", - "print(compound.spectra)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "source": [ - "Or get the spectra for all the results of a search:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Compound(71358)\n", - "[Spectrum(360), Spectrum(361), Spectrum(3172)]\n" - ] - } - ], - "source": [ - "for result in cs.search('glucose'):\n", - " if result.spectra:\n", - " print(result)\n", - " print(result.spectra)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "source": [ - "Alternatively, if you aren't interested in any of the other compound properties, you can get spectra directly by using either the compound ChemSpider ID or the Spectrum ID:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Spectrum(362)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cs.get_spectrum(362)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[Spectrum(360), Spectrum(361), Spectrum(3172)]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cs.get_compound_spectra(71358)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "source": [ - "## Spectrum metadata\n", - "\n", - "Each Spectrum object has a number of properties:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Spectrum ID: 3558\n", - "Compound ID: 2157\n", - "Spectrum type: HNMR\n", - "File name: Spectrum_315.jdx\n", - "Comments: collected by David Bulger at Oral Roberts University on a JEOL 300 MHz NMR with methanol as the solvent\n", - "Original URL: http://onschallenge.wikispaces.com/Exp072\n", - "URL: https://www.chemspider.com/FilesHandler.ashx?type=blob&disp=1&id=3558\n" - ] - } - ], - "source": [ - "spectrum = cs.get_spectrum(3558)\n", - "print('Spectrum ID: %s' % spectrum.spectrum_id)\n", - "print('Compound ID: %s' % spectrum.csid)\n", - "print('Spectrum type: %s' % spectrum.spectrum_type)\n", - "print('File name: %s' % spectrum.file_name)\n", - "print('Comments: %s' % spectrum.comments)\n", - "print('Original URL: %s' % spectrum.original_url)\n", - "print('URL: %s' % spectrum.url)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "source": [ - "## Spectrum data\n", - "\n", - "The data file for each spectrum is also available using the `data` property:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "##TITLE=Caffeine\r\n", - "##JCAMP-DX=4.24\r\n", - "##DATA TYPE=INFRARED SPECTRUM\r\n", - "##ORIGIN=Sadtler Research Labs Under US-EPA Contract\r\n", - "##OWNER=NIST Standard Reference Data Program\r\n", - "##CAS REGISTRY NO=58-08-2\r\n", - "##MOLFORM=C 8 H 10 N 4 O 2\r\n", - "##XUNITS=1/CM\r\n", - "##YUNITS=ABSORBANCE\r\n", - "##XFACTOR=1.0\r\n", - "##YFACTOR=0.000149628\r\n", - "##DELTAX=4.0\r\n", - "##FIRSTX=450.0\r\n", - "##LASTX=3966.0\r\n", - "##FIRSTY=0.004489\r\n", - "##MAXX=3966\r\n", - "##MINX=450\r\n", - "##MAXY=1.49628\r\n", - "##MINY=0\r\n", - "##NPOINTS=880\r\n", - "##XYDATA=(X++(Y..Y))\r\n", - "450.0 30 46 38 120 79 193 340 391 388 250\r\n", - "490.0 145 11 40 45 17 45 22 81 7 50\r\n", - "530.0 18 68 11 30 38 26 59 38 48 6\r\n", - "570.0 41 14 30 21 41 39 50 116 118 194\r\n", - "610.0 202 155 88 71 32 33 33 29 46 29\r\n", - "650.0 35 21 33 10 37 40 17 29 27 41\r\n", - "690.0 22 40 26 39 26 33 31 17 32 41\r\n", - "730.0 91 137 209 249 296 304 406 323 280 341\r\n", - "770.0 195 156 98 83 43 39 48 43 59 52\r\n", - "810.0 82 68 112 146 194 248 244 163 116 88\r\n", - "850.0 43 50 24 39 23 26 21 31 29 16\r\n", - "890.0 34 19 34 14 38 29 41 54 70 91\r\n", - "930.0 88 80 35 43 33 45 51 80 121 181\r\n", - "970.0 296 370 411 354 264 147 88 79 113 180\r\n", - "1010.0 298 500 693 828 838 720 504 307 191 131\r\n", - "1050.0 121 111 131 112 118 103 98 82 82 84\r\n", - "1090.0 76 76 67 73 51 66 60 63 66 72\r\n", - "1130.0 82 81 106 95 100 86 92 76 79 97\r\n", - "1170.0 111 144 171 232 277 334 351 362 338 324\r\n", - "1210.0 345 355 393 440 598 847 1067 1105 984 691\r\n", - "1250.0 438 331 276 269 250 276 330 414 455 450\r\n", - "1290.0 405 294 209 145 160 146 191 249 347 446\r\n", - "1330.0 515 540 501 490 515 639 744 806 775 658\r\n", - "1370.0 553 491 484 469 502 493 509 503 541 618\r\n", - "1410.0 754 855 872 791 703 632 623 628 661 701\r\n", - "1450.0 908 1071 1289 1301 1341 1358 1419 1496 1374 1200\r\n", - "1490.0 950 741 606 484 423 406 356 447 485 698\r\n", - "1530.0 722 929 1101 1247 1443 1469 1326 1094 715 545\r\n", - "1570.0 426 369 323 302 324 404 528 758 942 1099\r\n", - "1610.0 1042 836 568 335 233 194 213 209 266 348\r\n", - "1650.0 528 621 896 1002 1308 1864 3319 5240 7104 10000\r\n", - "1690.0 7708 7086 4064 3234 2173 2605 3092 4394 5100 5090\r\n", - "1730.0 4747 2880 1794 976 646 395 403 307 314 270\r\n", - "1770.0 253 195 155 155 91 107 71 95 64 97\r\n", - "1810.0 87 82 84 74 80 47 71 45 54 35\r\n", - "1850.0 51 47 48 59 42 43 23 35 11 22\r\n", - "1890.0 16 21 18 19 27 13 25 14 21 8\r\n", - "1930.0 19 16 14 19 17 27 14 27 14 17\r\n", - "1970.0 10 17 15 11 21 11 19 10 22 9\r\n", - "2010.0 16 14 17 13 13 22 11 18 11 17\r\n", - "2050.0 7 14 9 5 6 5 10 1 11 5\r\n", - "2090.0 9 5 10 7 8 14 5 9 1 11\r\n", - "2130.0 6 8 5 6 6 6 10 3 11 11\r\n", - "2170.0 15 10 13 14 9 13 9 14 7 16\r\n", - "2210.0 10 13 10 11 10 10 16 14 18 16\r\n", - "2250.0 22 14 17 13 14 15 10 13 7 14\r\n", - "2290.0 10 15 7 15 11 5 13 5 10 2\r\n", - "2330.0 10 1 2 0 8 14 0 9 0 0\r\n", - "2370.0 0 1 5 11 17 18 19 17 19 10\r\n", - "2410.0 15 10 14 10 11 10 3 9 5 7\r\n", - "2450.0 5 9 7 9 10 10 13 8 16 13\r\n", - "2490.0 14 10 15 8 8 11 9 10 11 16\r\n", - "2530.0 11 15 13 16 17 17 22 19 19 22\r\n", - "2570.0 27 18 21 21 23 21 19 18 14 21\r\n", - "2610.0 15 18 15 17 13 10 13 11 15 14\r\n", - "2650.0 21 14 17 18 21 21 18 18 15 16\r\n", - "2690.0 13 17 10 17 15 13 16 16 18 16\r\n", - "2730.0 18 16 24 22 27 24 24 27 22 26\r\n", - "2770.0 22 26 25 27 26 26 26 26 34 31\r\n", - "2810.0 41 45 49 48 54 54 49 56 56 62\r\n", - "2850.0 57 65 66 74 79 89 95 99 112 118\r\n", - "2890.0 132 138 161 165 173 179 181 186 188 210\r\n", - "2930.0 224 253 288 338 395 466 536 597 630 617\r\n", - "2970.0 567 484 399 316 256 215 188 156 140 132\r\n", - "3010.0 115 107 92 87 76 70 65 56 58 56\r\n", - "3050.0 56 51 50 51 54 50 50 55 49 50\r\n", - "3090.0 49 49 45 51 45 46 46 50 51 47\r\n", - "3130.0 45 32 32 25 23 23 21 18 16 18\r\n", - "3170.0 16 21 17 16 11 16 14 15 18 16\r\n", - "3210.0 11 9 13 15 18 16 18 15 16 23\r\n", - "3250.0 17 17 11 14 15 21 17 18 19 17\r\n", - "3290.0 19 18 16 13 21 15 10 11 15 21\r\n", - "3330.0 15 13 13 17 10 18 11 10 23 21\r\n", - "3370.0 26 19 26 32 48 58 66 70 68 48\r\n", - "3410.0 39 29 16 23 18 26 22 26 8 14\r\n", - "3450.0 16 3 18 19 21 13 15 18 21 18\r\n", - "3490.0 24 19 16 18 10 22 23 16 18 21\r\n", - "3530.0 17 14 16 8 26 21 23 24 21 22\r\n", - "3570.0 19 29 15 23 24 21 19 26 16 21\r\n", - "3610.0 16 29 27 14 31 21 22 18 22 23\r\n", - "3650.0 8 26 25 26 21 32 14 30 19 23\r\n", - "3690.0 31 21 31 23 35 25 25 25 37 33\r\n", - "3730.0 25 21 30 25 35 24 19 30 25 31\r\n", - "3770.0 24 23 38 30 26 24 35 27 21 32\r\n", - "3810.0 22 29 29 37 29 33 24 27 35 31\r\n", - "3850.0 47 32 26 26 23 17 31 21 18 41\r\n", - "3890.0 32 40 13 38 15 32 33 42 26 35\r\n", - "3930.0 34 30 23 25 39 35 54 31 38 34\r\n", - "##END=\r\n", - "\n" - ] - } - ], - "source": [ - "spectra = cs.get_compound_spectra(2424)\n", - "caffeine_ir = spectra[8]\n", - "print(caffeine_ir.data)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "source": [ - "We can do some quick-and-dirty parsing of the JCAMP-JDX file:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [], - "source": [ - "# See https://github.com/nzhagen/jcamp/ for more robust parsing\n", - "\n", - "import numpy as np\n", - "\n", - "xstart, xnum, y, meta = [], [], [], {}\n", - "\n", - "for line in caffeine_ir.data.split('\\n'):\n", - " if line.startswith('##'):\n", - " key, val = line.split('=', 1)\n", - " meta[key.lstrip('#')] = val.strip()\n", - " else:\n", - " numbers = [float(n) for n in line.strip().split()]\n", - " if numbers:\n", - " xstart.append(numbers[0])\n", - " xnum.append(len(numbers) - 1)\n", - " for number in numbers[1:]:\n", - " y.append(float(number))\n", - "xstart.append(float(meta['LASTX']))\n", - "x = np.array([])\n", - "for n in range(len(xnum)):\n", - " x = np.append(x, np.linspace(xstart[n], xstart[n + 1], xnum[n])) \n", - "y = np.array(y)\n", - "y = y * float(meta['YFACTOR'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "source": [ - "Then we can use matplotlib to plot it:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "run_control": { - "frozen": false, - "read_only": false - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEWCAYAAACNJFuYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcXHWZ7/HPU0tv6e5s3UnIHrKAbCI0m6iAjLK44IyM\ngiO4Xl54gXsdrgpevTpXZ+64jxsCkUF0dEBmRMwoO4iAECCRAAlJyEJCOlt31t7SS1U9949zqrq6\nq7u6O+nqhfq+X696pc5Sp5466a6nf7u5OyIiItkiox2AiIiMPUoOIiKSQ8lBRERyKDmIiEgOJQcR\nEcmh5CAiIjmUHEQGyczONrMNZtZiZh8ws+lm9oSZNZvZdwd47dzwddGRilfkSCg5SNExs4+Y2Yrw\ny3qnmd1vZm8bxEu/BvzY3Svd/V7gKmAPUO3u/yvfC9399fB1yWH4CCIFp+QgRcXMrge+D/w/YDow\nF7gJeP8gXj4PWNNr+xXXSFJ5A1JykKJhZhMJ/vq/xt3vcfdWd+9y99+7+xfM7HQze8bMDoQlih+b\nWUn42k3A0cB/hSWOO4GPAV8It//KzCJmdqOZbTKzvWZ2t5lNCV8/38zczGLh9uNm9nUz+3NYLfWQ\nmdVkxXqmmT0dxvKimZ07wrdLipySgxSTs4Ay4Lf9HE8Cfw/UhOeeD/x3AHdfCLwOvC+sHroc+BXw\nrXD7EeA64APAOcBMYD9BqaQ/HwE+AUwDSoDPAZjZLOAPwD8CU8L9vzGz2sP72CJDp+QgxWQqsMfd\nE30ddPeV7r7c3RPuvgW4leCLfrCuBr7k7vXu3gH8A3BpurTQh5+5+6vufgi4Gzg53P9R4D53v8/d\nU+7+MLACuHgIsYgckf5+aEXeiPYCNWYW6ytBmNkS4HtAHVBB8PuxcgjXnwf81sxSWfuSBG0bfdmV\n9bwNqMy6zt+a2fuyjseBPw4hFpEjopKDFJNngA6Cqp++3AysAxa7ezXwvwEbwvW3ARe5+6SsR5m7\nbx9inNuAf+t1nQnu/o0hXkfksCk5SNFw94PAV4CbwnEKFWYWN7OLzOxbQBXQBLSY2bHAZ4b4FrcA\n/2Rm8wDMrNbMLjmMUH8JvM/MLjCzqJmVmdm5Zjb7MK4lcliUHKSouPt3geuBLwONBH+lXwvcS9Dw\n+xGgGfgp8OshXv4HwDLgITNrBpYDZxxGjNuASwhKLukYP49+X2UEmbpoi4hIb/pLREREcig5iIhI\nDiUHERHJoeQgIiI5xt0guJqaGp8/f/5ohyEiMq6sXLlyj7sPegqWcZcc5s+fz4oVK0Y7DBGRccXM\ntg7lfFUriYhIDiUHERHJoeQgIiI5lBxERCSHkoOIiORQchARkRxKDiIikkPJQUREcig5iIhIDiUH\nERHJoeQgIiI5lBxEJOO2Jzez7MUdox2GjAHjbuI9ESmcf/zDWgDe/+aZoxyJjDaVHEREJIeSg4iI\n5FByEBGRHEoOIiKSQ8lBRERyKDmIiEgOJQcREcmh5CAiIjmUHEREJEfBkoOZ3W5mDWa2eoDzTjOz\nhJldWqhYRERkaApZcrgDuDDfCWYWBb4JPFTAOEREZIgKlhzc/Qlg3wCnXQf8BmgoVBwiIjJ0o9bm\nYGazgL8Gbh7EuVeZ2QozW9HY2Fj44EREitxoNkh/H7jB3VMDnejuS929zt3ramtrRyA0EZHiNppT\ndtcBd5kZQA1wsZkl3P3eUYxJREQYxeTg7gvSz83sDuD3SgwiImNDwZKDmd0JnAvUmFk98FUgDuDu\ntxTqfUVE5MgVLDm4++VDOPfjhYpDRESGTiOkRUQkh5KDiIjkUHIQEZEcSg4iIpJDyUFERHIoOYiI\nSA4lBxERyaHkICIiOZQcREQkh5KDiIjkUHIQEZEcSg4iIpJDyUFERHIoOYiISA4lBxERyaHkICIi\nOZQcREQkh5KDiIjkKFhyMLPbzazBzFb3c/zvzOwlM3vZzJ42szcXKhYRERmaQpYc7gAuzHP8NeAc\ndz8R+DqwtICxiIjIEMQKdWF3f8LM5uc5/nTW5nJgdqFiERGRoRkrbQ6fAu7v76CZXWVmK8xsRWNj\n4wiGJSJSnEY9OZjZeQTJ4Yb+znH3pe5e5+51tbW1IxeciEiRKli10mCY2UnAbcBF7r53NGMREZFu\no1ZyMLO5wD3AFe7+6mjFISIiuQpWcjCzO4FzgRozqwe+CsQB3P0W4CvAVOAnZgaQcPe6QsUjIiKD\nV8jeSpcPcPzTwKcL9f4icvjcnfCPNilSo94gLSJjj/toRyCjTclBRHKklB2KnpKDiORQahAlBxHJ\noZKDKDmISA7lBlFyEBEg6KGUppKDKDmICABdyezkMIqByJig5CAiAHQkkpnnKjmIkoOIANDelco8\nV24QJQcRAaC9q7vk4MoORU/JQUQA6Eh0lxzU5iBKDiIC9Cw5qM1BlBxEBOhdclByKHZKDiIC9Oyt\npNwgSg4iAvRMCEk1OhQ9JQcRAZQcpCclBxEBerYzqM1B+k0OZvbOrOcLeh37m0IGJSIjLzshqOQg\n+UoO38l6/ptex7480IXN7HYzazCz1f0cNzP7oZltNLOXzOyUQcQrIgWSXVhQbpB8ycH6ed7Xdl/u\nAC7Mc/wiYHH4uAq4eRDXFJECcVStJN3yJQfv53lf27kvdn8C2JfnlEuAX3hgOTDJzI4a6LoiUhip\n7mEOqlYSYnmOHW1mywhKCennhNsL+n/ZoM0CtmVt14f7dg7DtUVkiNTmINnyJYdLsp5/p9ex3tsF\nZWZXEVQ9MXfu3JF8a5GikerR5qDkUOzyJYdXgFp3fyV7p5kdBzQOw3tvB+Zkbc8O9+Vw96XAUoC6\nujr91IoUhBb7kW752hx+BNT0sX8q8INheO9lwJVhr6UzgYPuriolkVGS0iA4yZKv5LAobFTuwd2f\nNLMBexaZ2Z3AuUCNmdUDXwXi4TVuAe4DLgY2Am3AJ4YcvYgMGw2Ck2z5kkNVnmPxgS7s7pcPcNyB\nawa6joiMDJUcJFu+aqWNZnZx751mdhGwuXAhichoyF79LaXkUPTylRw+C/zBzD4ErAz31QFnAe8t\ndGAiMrJ6TLynaqWi12/Jwd03ACcCfwLmh48/ASe5+6sjEZyIjJyebQ6jGIiMCflKDrh7B/CzEYpF\nREZRj3EOyg5Fr9/kYGav0f80Ge7uCwsTkoiMBtcIacmSr+RQ12s7AnwI+BzwQsEiEpFRoTYHydZv\ncnD3vQBmFgGuAD4PrALe03vUtIiMfyn1VpIs+aqV4sAngb8HngI+4O4bRyowERlZPedWGr04ZGzI\nV630GpAAvg+8DpxkZielD7r7PQWOTURGUPZ6DqpWknzJ4RGCBuk3h49sDig5iLyBqLeSZMvX5vDx\n/o6Z2fSCRCMio0a9lSRbvukzejCzSWb2KTN7FPVWEnnDyS4tqFpJ8g6CM7NygkV/PgK8hWAyvg8A\nObO1isj41mNdYCWHotdvycHM/h14FXgXwdoO84H97v64u6f6e52IjE89Z2UdvThkbMhXrXQcsB9Y\nC6x19yT9j5gWkXGuR5uDSg5FL9/EeycTjIiuAh4xs6eAKjVGi7wxaRCcZMvbIO3u69z9q+5+LPA/\ngV8Az5vZ0yMSnYiMmB7TZyg5FL28DdLZ3H0lsNLMPge8vXAhicho6DlCWsmh2OWbPqMM+DBBu8N/\nAV8gSAqbgK+PSHQiMmK0hrRky1et9Avg3QTzKz0OzAV+DDQDdwzm4mZ2oZmtN7ONZnZjH8cnmtl/\nmdmLZrbGzD4x1A8gIsOj5yC4UQxExoR81UrHufsJZhYD6t39nHD/A2b24kAXNrMocBNBV9h6graK\nZb1mdL0GeMXd32dmtcB6M/uVu3ce3scRkcPlqlaSLPlKDp0A7p4AdvQ6lhzEtU8HNrr75vDL/i6C\nAXXZnKAHlAGVwD6Cyf5EZISl1CAtWfKVHGab2Q8By3pOuD1rENeeBWzL2q4Hzuh1zo+BZQTJpwr4\ncF8D7MzsKuAqgLlz5w7irUVkqFKaW0my5EsOn896vqLXsd7bh+sCggWE3gksBB42syfdvSn7JHdf\nCiwFqKur00+tSAGk2xzMNH2G5J+V9ef9HTOzwfz5vh2Yk7U9O9yX7RPANzz4SdwYrlt9LPDcIK4v\nIsPIgYiBmWmEtOQfBGdmZ5nZpWY2Ldw+KZxz6c+DuPbzwGIzW2BmJcBlBFVI2V4Hzg+vPR04Btg8\nxM8gIsMg5U7EjKiZeitJ3on3vg3cDnwQ+IOZ/SPwEPAssHigC4cN2dcCDxLMz3S3u68xs6vN7Orw\ntK8DbzWzl4FHgRvcfc+RfCARGbz7X97J+370FNv2tZFyiJgRiai3kuRvc3gP8BZ3bzezyQSNyye4\n+5bBXtzd7wPu67XvlqznOwjGUojIKPiXR17l1d0trN3ZFCQEIyw5KDkUu3zVSu3u3g7g7vuBDUNJ\nDCIy9pXEgq+AzmQK96DNIRIxlRwkb8nhaDNLtxEYsCBrG3d/f0EjE5GCK41FAejoSuFhm0PETLOy\nSt7k0HvA2ncKGYiIjLySaHfJId3mEI2ot5Lk78r6J8hMwLco3L0xXdUkIuNfaTxIDh1dSVLuGEGC\nUG8lyddbKWZm3yIY2fxzgon4tpnZt8wsPlIBikjhZJcc3IMBcNGIFvuR/A3S3wamAAvc/VR3P4Vg\nFPMkVMUk8oZQGu/V5hAJxzmoWqno5UsO7wX+m7s3p3eE01p8Bri40IGJSOFFLfg3u83BTL2VJH9y\ncO9jghV3TxKMtBeRcS4Z/iZ3JFKZNodoRL2VJH9yeMXMruy908w+CqwrXEgiMlLSSaAzEZQcLNNb\naZQDk1GXryvrNcA9ZvZJYGW4rw4oB/660IGJSOGlR0J3JIIlWiIWPFRykHxdWbcDZ5jZO4Hjw933\nufujIxKZiBRcuuG5I5EiHol0j3NQcih6/SYHM6sAutz9MeAxMzsGuNjMqt39tyMWoYgUTCrVnRyi\ncQtLDmqQlvxtDg8A8wHMbBHwDHA0cK2Z/XPhQxORQksngew2ByUHgfzJYbK7bwiffwy4092vAy4i\n6OYqIuNcuuG5M5HC8XAQnKqVZICurFnP3wk8DODunYAG14u8AaSrlZIpD2dlNSLqrSTk7630kpl9\nh2Bpz0UEC/1gZpNGIjARKbx0CSGRSoUrwQUD49RbSfKVHP4bsIeg3eHd7t4W7j8OTZ8h8oaQ7q2U\nTHmPcQ5qc5B8XVkPAd/oY//TwNOFDEpERkYqU3Jw3IM2B9NKcEL+WVkXm9nPzOx7ZjbbzO43s1Yz\ne9HM6gZzcTO70MzWm9lGM7uxn3PONbNVZrbGzP50uB9ERIYuu+SQbnOIqreSkL9a6WcE3Vd3AM8C\ntwNTgc8BNw10YTOLhuddRFAVdbmZHdfrnEnAT4D3u/vxwN8exmcQkcOULiAkkt5jbiWVHCRfcqh0\n96Xu/h3gkLv/h7u3u/vDQOkgrn06weJAm8MeTneRu7rcR4B73P11AHdvOIzPICKHKbu3UiLlxKIR\n9VYSIH9yyO6u2pTnWH9mAduytuvDfdmWAJPN7HEzW9nXRH8AZnaVma0wsxWNjY2DeGsRGYx0CaEr\nlaIzkaIkauqtJED+rqzHmtlLgAELw+eE20cP4/ufCpxPMKHfM2a23N1fzT7J3ZcCSwHq6ur0Uysy\nTFJZbQ5dyRTxaEQjpAXInxze1Mc+A+YAXxzEtbeH56bNDvdlqwf2unsr0GpmTwBvBl5FRAouM84h\nGSSHklhYraSSQ9Hrt1rJ3bemHwTLhV4LPA58DbhvENd+HlhsZgvMrAS4DFjW65zfAW8L16uuAM4A\n1g79Y4jI4cjurdSZCEoO6q0kkH9W1iXA5eFjD/BrwNz9vMFc2N0TZnYt8CAQBW539zVmdnV4/BZ3\nX2tmDwAvEbRj3Obuq4/oE4nIoGWPc+hMOiWxiHorCZC/Wmkd8CTwXnffCGBmfz+Ui7v7ffQqZbj7\nLb22vw18eyjXFZHh0V1ySAXVSmFvJeUGyddb6W+AncAfzeynZnY+QZuDiLxBpMJ+h4lMtVLYW0nV\nSkUvX5vDve5+GXAs8Efgs8A0M7vZzN49UgGKSOH07q1UEgt6K6laSfKVHABw91Z3/3d3fx9Bj6MX\ngBsKHpmIFFwyu80hbJCOREzjHGTg5JDN3feHo6bPL1RAIjJy0iWHRDJFZ7K7t1JS1UpFb0jJQUTe\nWNIlh5QH60iXZsY5jHJgMuqUHESKWHbbQmacQwRcJYeip+QgUsR6Ny1kGqSVHIqekoNIEevdZTU9\nt5J6K4mSg0gRS6ackmj310A8asQ0QlpQchApaikPpsxI0/QZkqbkIFLEkqmeySFokNbEe6LkIFK0\n3J2U06NaKRYxohEjoZJD0VNyEClS6e//0nhuycFdq8EVOyUHkSKVbleoKOmenDkWNkgD6s5a5JQc\nRIpUul2hqjQrOUSCEdKAGqWLnJKDSJFKJ4fKsu7kEM8uOSg5FDUlB5Eilf7yryrLrlaKEI0EXwtq\nlC5uSg4iRaqv5BCPBIv9ZB+X4lTQ5GBmF5rZejPbaGY35jnvNDNLmNmlhYxHRLp1hlOvTiyPZ/bF\nohGiYddWJYfiVrDkYGZR4CbgIuA44HIzO66f874JPFSoWEQkVyIZfPlPKi/J7IupzUFChSw5nA5s\ndPfN7t4J3AVc0sd51wG/ARoKGIuI9NIVlhyqy7OrlYLFfgASKS3qUMwKmRxmAduytuvDfRlmNgv4\na+DmfBcys6vMbIWZrWhsbBz2QEWKUVdYcijvNc4hGpYclBuK22g3SH8fuMHd8/4YhkuT1rl7XW1t\n7QiFJvLGli45xMNkAMEI6VhUJQeB2MCnHLbtwJys7dnhvmx1wF0WFGNrgIvNLOHu9xYwLhGhu80h\n3mvK7oipzUEKmxyeBxab2QKCpHAZ8JHsE9x9Qfq5md0B/F6JQWRkpHsrpUsKwfOIps8QoIDJwd0T\nZnYt8CAQBW539zVmdnV4/JZCvbeIDCxdrdRjsZ+IZabPSJcspDgVsuSAu98H3NdrX59Jwd0/XshY\nRKSn9Jd/LHvK7uySg6qVitpoN0iLyCjJNEj3qFbq7q2kaqXipuQgUqS6k0N2tVKkOzmo5FDUlBxE\nilRXH72VepQclByKmpKDSJFKj2PoUa0UMWIRza0kSg4iRasz0V2tdM6SYHCpmZEuSGjK7uJW0N5K\nIjJ2ZVcr3XrFqexv6wTIrOegNaSLm0oOIkUqXa0Uixpl8ShHTSwPttPjHJQcipqSg0iRyq5WytY9\nfYbmVipmSg4iRSpdMijplRzS02kklRuKmpKDSJHqSuTOrQTdJQfNylrclBxEilRXWHKIRXomh5jm\nVhKUHESKVlcyRTxqhFPmZ5TGg6+FTtUrFTUlB5Ei1ZVI5TRGA5TFogC0dyVHOiQZQ5QcRIpUIuU5\nVUoA5SVBcjik5FDUlBxEilRnMkVJLPcroDTc196laqVipuQgUqQSyb6rlcyMsnhE1UpFTslBpEh1\nJT2nG2taeTyq5FDklBxEilRXPyUHgLJ4lEOdR54clm/ey86Dh474OjLyCpoczOxCM1tvZhvN7MY+\njv+dmb1kZi+b2dNm9uZCxiMi3bqSKeKRvr8CyuNR2hODa3O46Y8buf7Xq3Im6kumnMuWLueCf3ni\niGOVkVew5GBmUeAm4CLgOOByMzuu12mvAee4+4nA14GlhYpHRHrqSjrxWN/VSkMpOXz7wfXc88J2\nXth2oMf+rXtbAWhqT2RWnZPxo5Alh9OBje6+2d07gbuAS7JPcPen3X1/uLkcmF3AeEQkS1cylVnY\np7eyeISOxMDJwbPWmd5xoGf10au7mzPPH13bcJhRymgpZHKYBWzL2q4P9/XnU8D9BYxHRLJ0JVM5\nk+6llZcMruTQ2NyReb67qb3Hsdf3tQFQXRbjy/e+zJ6WDmT8GBMN0mZ2HkFyuKGf41eZ2QozW9HY\n2DiywYm8QSXy9FaqKInR0pEY8Bq7shLCroM9k8OOA+1Ulsb4+SdPZ09LJ//vvrVHFrCMqEImh+3A\nnKzt2eG+HszsJOA24BJ339vXhdx9qbvXuXtdbW1tQYIVKTb5eitNLI/TdKhrwGvsbenMPN/Rq1dS\n/f5DzJpUzlvmTuaMBVN4bU/rkQUsI6qQyeF5YLGZLTCzEuAyYFn2CWY2F7gHuMLdXy1gLCLSS1fS\n8yaHA4NIDo1hVdHC2gls3dvW49je1g5qq0oBmDmpnIYmVSuNJwVLDu6eAK4FHgTWAne7+xozu9rM\nrg5P+wowFfiJma0ysxWFikdEekrPytqXSeVx2jqTmdXi0lZu3dejETrdjlA3bwpb9rT2OOYOkXDu\nptqqUhpbOnocP1K/f2kH+1s7Bz5RDktB2xzc/T53X+LuC939n8J9t7j7LeHzT7v7ZHc/OXzUFTIe\nEemWt1qpIg7AwazSwzOb9vLBm59h6RObM/v2tXRSURLl2KOqaO1Msq+fL+tpVaV0JlI0HRq4HWMw\ntu1r49p/f4EP3frMsFxPco2JBmkRGXn5ps+YWJ6bHA51BV/sT23ck9nX3J6guizOjOoyABqa+646\nmjExON67XeJwrdsVdJPd0NAyLNeTXEoOIkUq3wjpqROCtoLs7qld4cpwrVm9mFo6ElSWxZhWnXt+\nttmTK4CgkXo4bGjoHkMxnFVV0k3JQaRItYZf7H059qgqAF7Z0ZTZly4VdGS1QzR3JKgsjTGtKiw5\nZDU6J1Ip0gWTOZPLgaA6aDhkj69oah+eqirpSclBpAh1JlK0diaZFFYf9VZTWcr06lLW7spKDmGp\nIHtt6Zb2Lqr6KTk0NndQUxnsnzKhhFjEMo3S33xgHY+8svuw4z/Y1l3dtVeD6wpCyUGkCB04FDQc\nT6roOzkATK8u6zGOIf3Fv7e1+8u4pSPBhJIYpbEokyvi7G4OzkmmnD0tnUwP2yLMjEi4VvWmxhZu\nfnwTn/7Fih6T9R1o62TFln2DjL87OexpGZ4eS9ffvYp3fe9PParNipmSg0gRSv/lPbGipN9zJlWU\ncKCt+4s3nSj2tnZmJtJrae+umppeXcbusFppb2sHyZRnShTZnt7UPdZ1857uBuX3/fgpLr3lmR5V\nRv050NbJ1AlB7MMxLYe7c89ftrOhoYUX6w8M/IIioOQgUoT2h8lhcp6Sw+SKeOY8CJICBOMX0l/I\n6TYHgGnVZZmqp3TbQ7otItufs3o7bdgdJIe2zgTb9gWN1Y+tG7i66cChLhZOqwziGobk0Jh1jc2N\nGskNSg4iRSldIphU3n/JYXJFCfuzSw6tHVSFiWB3UwcdiSQtHYlM1dS0qtJMyWFTY/ClP7+mIvP6\n0liELXtaWb55H6fMnQRAMuxp9Oru7hLE6u3d7Rz9aTrUxYKpEzCDxmGoVsoe3a3kEFBykEF5fss+\nPv3zFVrV6w0iXWefr81hckUJzVlrMexr6eRNR1UDQeP0zgPtuMOsSUFPpOnVwSjoZMrZ1NBCxGBB\nzYTM9T561jzuX72Lg4e6eOvCmh7vlV77YVJFnLU7B04OiZRTFo8wuaJkWEoO2Q3p+hkPKDnIoPzo\nsY08snY333tIU2C9EWRKDnmSw8xJQZXQ1r2ttHclae1M8qawi+vu5g62h+s3zJqcTg5lJFPO3tYO\nNjS0MG/qBEpj0cz1rjlvEX/1pum8+7jpfPDUYOmW9Eyu6aEKJ86ayLpdzYMeu1BTWdLvwLuhSFeD\nvemo6h4zzRYzJQcZlHQj4cNrd5PQql7j3oG2LqIRy7QX9OUtcycD8JetBzLTYiyeXkXEgpJDenGf\ndMkhe6zDxoYWFtZW9rheZWmM2z5Wx9Ir61hQM4Gjaydw6xObWbuzKTMS+8RZE2npSHDmPz/KLX/a\nNODnmDWpnO3DMLBud3M7JdEIx86oYvdBJQdQcpBB2NvSwdqdTRw/s5oDbV08v2X/wC+SMe3AoS4m\nlccx63v6DIB5U4P2gt1N7ZnkUFtVysxJ5Wzd25YZMV0WD0oH08OeSa/taWXL3lYWTavs46rdPnL6\nXBqbO7joB0/y1WVrmDe1gktOnhW+ZwffuH9d5tz2riSbG3Onypg7pYJt+9qOeJR0Q1Mwg+yMiWU0\nNHfkrIddjJQcZEDprof/++I3URKL8PARDF6SseG1xtbM5Hr9yU4b6d5JUyeUsHhaJa/ubsbp+QV6\n7IxqaipLue7OF+hKOnXzJue9/ifPXsBP/u4Ujp1RxbEzqvjWB0/imBlV3PSRU5g9uZySWCTzpX/9\n3at453f/xNNhT6d0LpgzpYLmjsQRVy3tbmpnenUpM6rLSKQ80zNr1bYDw77+dWNzB+1dg1ufezQp\nOciAntqwh6qyGGcePZWzF07loVd26S+rcey1Pa08s3kv/ZcZcqVLDlMmlLBkRhWbG1t57rV9VJRE\nqS4Lkkx5SZR3HhssxjVrUjlvX1LT7/UgmM774hOP4oHPvoMHPvsOzjh6KgDvOekorjlvEZ2JVKZ7\na/rfXz67lYNtXRw81MWMieWce8w0zOD2p17judf2HXaVZ5AcyjKD9nY3tbNmx0E+cNOf+T/3rj6s\na/alK5nitH96hOvvXjVs1+xLS0eC9/7oSb54z0uHfQ0lB8nL3Xlq4x7eunAq0Yjx/pNnUr//EMtf\n63PRPhkHbg3r8v/H+YsHdX5XyjPJYWplKafNm0JnMsXvVu3gkpNnUl7S3eh8alha+Nolx/dojB6q\nk2ZPBGBV/QGSKc9UKT27eR8vbz8IwAmzqlk0rZILj5/BrU9s5kO3PsPlP12eswbFYDQ0dzC9uiwz\ne+yug+38/qWdAPzmL/XD8sfQ3pYOFn/pfgDue3nXEV8vn1d2NLF6exN3Prct0zY0VEoOktfWvW1s\nP3CIty0K/gq86ISjqCqL8avlr49yZHI4Nja0cNfz2yiJRXj/m2fmPTcWjbCwdgIv1x9gU2MLE8vj\nVJfFOHPhVEpjwVfHlWfN7/Gavz11Dvf9j7dz/pumH1GcS6ZXURaP8OK2A2xsaKG1M8k7ltSyt7WT\nT9zxHCWxCG+eE4yV+L/vP57F0yqpqSzh+S37eXx9w5Deq7m9i+b2BNOqSzPtJrub23nutWAqj66k\nD8vU4KuYgzVUAAAOQElEQVR39OyieyRzSw0ku31mzY6Buwb3RclB8rp/dfAXztlhciiLR/nYWfP5\nw8s7eWrDnnwvlTHogdXBX8P//ukz8jZGp711YQ1/XN/IslU7OGn2RMyCHk4P/f07+P11b8uMe0iL\nRIzjZlb3c7XBi0cjnDBzIs9v2ceL24LpLL5wwTFUlsboSjofO2tepjprWnUZD19/Ds988XxqKkv5\nj5X1Q3qv9AC8JdOqKAtLO4c6k7y8/SDHzgi67ja1D7xkatqvnt3K8V95ICdJpXuGnThrIrMnl/Od\nh9YPqUQylHO3ZA3qS48hGSolB+nXul1N/PixDbxjSS1HZ3VLvPadi6ipLOXqX67kS799mXtf2M6h\nzrHfwFbsupIp/nNlPSfOmkjd/CmDes0H3hKULlo7k5yzpDazf97UCZwwa2JB4ky74PgZvFR/kG89\nuJ7qshjHHVXNsmvP5t8+dTpfes9xOefHoxEuPXU2j61ryEzjMRjpQXfpacohaJfpTKQ4fmbwGYfS\nGeo3K+tp7Uzy/Uc29OhF1RE2Qn/ugmO4/l1LWLeredDVs60dCY75P/dz25ObBz6ZYNbdqtIYkyri\nvLZHyUGG0dOb9nDlvz7HhNIY3/rgST2OlcWj/OCykzlhVjXLXtzBZ3+9ivf+6MlMMVzGnlTK+cTP\nnmfL3jauOGveoF936rwp/OwTp/H5C47ho2cO/nXDIT1Qbk9LB29fXEskYhxdW8nbF9f2+5rLTpsD\n0KMbbH9/uDQ0tfPDRzfw5XtXM726lFmTyqkojVIWj2TaG845Jniv9bsGVzXT0NTOS/UHqS6LsWrb\ngR7dvu9esY3K0hgnzZrIhSfMoCQa4dG1g6sCW/biDrqSzj/+Ye2g2lTSPckW1Vbyq2df7zHF+WD1\nPwJmGJjZhcAPgChwm7t/o9dxC49fDLQBH3f3vwx03VTKMwuXj3fuzuPrGymNRThtwZR+1/QtlI5E\n9y/OvtZONuxu4cE1u/jVs0Gbwu+ve1umkS7b2YtqOHtRDamU85u/1PP5/3yJD936DG9bVMN7TjqK\nE2dNZOakckpjESbkGWglw8fd2d/WxeSKOOt2NfOfK+vZ1NjC4+sbM+ecMncSl54ye0jXPe+YaZx3\nzLThDndAUyaU8JO/O4XH1jVw7XmLBvWa+TUTuOodR3Pz45tYte0AOw4eoiOR4uITj+KjZ8zj9X2t\n/HFdI+2JJH/euCczVuOSk2dhZpTGopyzpJYH1+xmckWci0+YwU9mVPFvy7dy+elzue2p13jh9f18\n/ZITmFad+3vx3YdexYG7rz6Ly5cu50O3PsMj159DW2eCP7y8kw+fNofJ4Wyy5x5Tyz1/qeeGC4+l\nJNb/731XMsWvn9+W2f71im1cESbqts4EpbEo2/cfYsveVrbubeWKs+az62A7tVWlmc4CX1k29B5X\nVqgl9swsCrwKvAuoB54HLnf3V7LOuRi4jiA5nAH8wN3PyHfd6jnHeM1Hv8eFJ8zgUGeSzmSKfa1d\nrN3ZRHk8ysfeOp+qshibG1t5YPVOPnrWPHYfbGfrvjaOnVFNLGIkUinq9x/i+JkTae9KknJn6942\n5k+tIB6N0NqZJJFMsb+ti0XTKolFjOb2LhZPr+Ll+oPMmVLO1MpS3IPBOR2JFBNKo7gHA4EOdSWJ\nmJFIBlMWt3YkqSqLcfBQFxEL5oXZdbCdeDTCY+sa+NOrwS/v4mmVXHnWPKrK4kyrKiXpzp6WDmZP\nrqClPYHjdCacZMpJuVNZGmPnwXZmTS6nK5FiUkWcts4kZlASjdDQ3EFpLMLBQ11s2dvKY+saaWzu\nIB41msIRqa19/FVVFo/w7uNm8Nm/WtyjOimfbfva+OXyrTy4ZleP+k4IZvdsbk9wzXmLiEcNMyMe\nNRIpJ2LGzEnlTCyPk3IHB7NgPYCm9i6qy+I0NneweHoVpbEIr+9ro6U9QVVZjL2tncSjRlk8Smks\nQmk8SmtHgvlTJzChNEZHIklbZ5J4JEJlWYxYxJhQGgsWum/voqUjwcbdLVSWxZg6oYQ3zaymNBah\nsbmDiBkTy+PsamrH3Tl4KEHToS5qKktpD6/7+r42Zk8up7osllmvIGIQMcMsqOYoiwV/iUYiRsqD\nXj+lsSgRg2jEcA/+XbXtQGZ7UkWcVdsOMKO6jJJYhHg0wrZ9bTS2dHB0TfDZNje2EIkYVWVxdh9s\n58X6A6zb1TzgdNd/vvGdmRHNb1RN7V1c+a/PsbGhhbfMncST/bSNffCU2bz3zUdRXRbn5DmTiIZ/\ncK7d2cRnfrmSz5y7kA+fNpf7X97JZ371F6ZMKMn02qooiTKhNEZ1+Hv94dPm8OjaBtbtauacJbX8\n/JOn86NHN/Ddh7unm5lWVco9//2tmSVTf/HMFr7yuzVcdMIMOhIpHlvXQE1lCXOnVNDQ3EFJNMLm\nPcFgwo0NLXztkuP54aMbae1IcPzMalZszT8Y9cyjp3D1OQv5+M+eB2DrN9+70t3rBnsfC5kczgL+\nwd0vCLe/CODu/5x1zq3A4+5+Z7i9HjjX3Xf2d90Js5Z47RX/QsRgsO0zE0qiPb4Eh/LakXDZaXM4\ne1EN331ofc4X63CJRYxZk8uZO6WCaVVlmS/K8pLgC8wMpkwoZX5NBSfOmkhVWf4BUv1xD3p2rN/V\nzL7WTto6kzyydjcbdjdrOccCi0eNc5bU8uSGPRxdW8nZC6fyhQuPpbUjQTRqmQbcYuPubNnbxkv1\nB2hs7uC9J83sszSczx1/fo3/WFlPWTzK/3rXEn742AaWb86tRi2LR/jplXW8fXEt7s4vl2/lL68f\nYH9bJ1efs5Azw7EcEDQUX7Z0efgHSP/vPb26lLcurOFfPnwyz2zay1eXre4xi21/rjxrHl+75AQe\nXLOLr/xuNc996V1jJjlcClzo7p8Ot68AznD3a7PO+T3wDXd/Ktx+FLjB3Vf0utZVwFXh5jHA+oIE\nXTg1wHjt2jOeY4fxHf94jh0U/2jqK/Z57t5/g00v46Iy2N2XAktHO47DZWYrhpKxx5LxHDuM7/jH\nc+yg+EfTcMReyNbP7cCcrO3Z4b6hniMiIiOskMnheWCxmS0wsxLgMmBZr3OWAVda4EzgYL72BhER\nGRkFq1Zy94SZXQs8SNCV9XZ3X2NmV4fHbwHuI+iptJGgK+snChXPKBu3VWKM79hhfMc/nmMHxT+a\njjj2gjVIi4jI+KUR0iIikkPJQUREcig5HAYzKzOz58zsRTNbY2b/N9z/D2a23cxWhY+Ls17zRTPb\naGbrzeyCrP2nmtnL4bEf2mCmyixM7FPM7GEz2xD+OznrNWMi9vA955jZH83slTD+/xnuHw/3vr/Y\nx8u9v93MGsxsdda+MX/fB4h/XNz7Pj7LljCGVWa24nA/S17urscQHwQrKFaGz+PAs8CZwD8An+vj\n/OOAF4FSYAGwCYiGx54LX2vA/cBFoxT7t4Abw/03At8ca7GH73kUcEr4vIpgipbjxsm97y/28XLv\n3wGcAqzO2jfm7/sA8Y+Le9/HZ9kC1PTaN+TPku+hksNh8EB6/Ho8fORr2b8EuMvdO9z9NYLeWaeb\n2VFAtbsv9+B/8RfAB0Yp9kuAn4f7f54Vx5iJPYx/p4eTM7p7M7AWmJXnJWMm/jyxj5d7/wQw2Kl3\nx1Ts0G/84+LeD9KQPstAF1NyOExmFjWzVUAD8LC7Pxseus7MXgqLsOli3SxgW9bL68N9s8LnvfcX\nVD+xT/fuMSa7gPRSXmMq9mxmNh94C0HpB8bBvU/rFfu4u/e9jJv73ofxeu8deMTMVlowvRAM/bPk\npeRwmNw96e4nE4zqPt3MTgBuBo4GTgZ2At8dxRD71U/s2ced/CWhUWdmlcBvgM+6exPj5N5Dn7Fn\njId738u4ue8DGWf3/m3h7/BFwDVm9o7sg8PxWZQcjpC7HwD+SDDJ4O7wizcF/JTuolt/04RsD5/3\n3j8ismMHdodFZsJ/06uQjLnYzSxO8OX6K3e/B2C83Pu+Ymcc3fvexst9z2Nc3nt33x7+2wD8luC+\nD/Wz5KXkcBjMrNbMJoXPywnWrFiX/o8J/TWQ7hWxDLjMzErNbAGwGHguLAI2mdmZYY+HK4HfjUbs\nYYwfC0/7WFYcYyb2MGYD/hVY6+7fy9o/Hu59n7EzTu59X8bDfR/AuLv3ZjbBzKrSz4F3E9z3IX2W\nAd+oEC3pb/QHcBLwAvBS+J/ylXD/vwEvh/uXAUdlveZLBL0E1pPVuwGoC6+xCfgx4aj1UYh9KvAo\nsAF4BJgy1mIP3/NtBMXll4BV4ePicXLv+4t9vNz7OwmqjroI6q0/NR7u+wDxj4t73+tzHE3Q++hF\nYA3wpXD/kD9LvoemzxARkRyqVhIRkRxKDiIikkPJQUREcig5iIhIDiUHERHJoeQgkkdfM3mG+880\ns5+Gz083syfCGS9fMLPbzKzCzD5uZm5mf5X1ug+E+y4d6c8iMhRKDiL53UEwgry3i4AHzGw68B/A\nDe5+jLu/BXiAYNZVCMYAXJb1ussJ+qeLjGlKDiJ5eP8zkZ5PMNDoGuDn7v5M1mv+0913h5tPEsxf\nFQ/nVFpEMPhNZExTchAZIjOrAbrc/SBwArAyz+lOkEQuIJg6eVnhIxQ5ckoOIkP3buChIZx/F0HV\n0mUEUziIjHlKDiJDdxFBuwIEc9ucmu9kd38OOJFg5a5XCxybyLCIjXYAIuNJOBPnSXS3G/wYeM7M\n/uDhgk9m9jfAn3u99EagfcQCFTlCSg4ieZjZncC5QI2Z1QM/Al7wcMZKd99tZpcB3zGzaUAKeILu\nkgXhefePaOAiR0izsooMgZl9Gdjo7neNdiwihaTkICIiOdQgLSIiOZQcREQkh5KDiIjkUHIQEZEc\nSg4iIpJDyUFERHL8fyd0pTimalbKAAAAAElFTkSuQmCC\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "\n", - "plt.xlabel(meta['XUNITS'])\n", - "plt.ylabel(meta['YUNITS'])\n", - "plt.title(meta['TITLE'])\n", - "plt.axis([max(x), min(x), min(y), max(y) + 0.1])\n", - "plt.plot(x, y)" - ] - } - ], - "metadata": { - "hide_input": false, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/requirements/common.txt b/requirements/common.txt index bcfa389..2bf2942 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -1,3 +1,2 @@ -lxml==3.4.1 requests==2.5.0 six==1.8.0 diff --git a/setup.py b/setup.py index 6a6b373..046d179 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,6 @@ zip_safe=False, install_requires=['requests', 'six'], tests_require=['pytest'], - extras_require={'lxml': ['lxml']}, classifiers=[ 'Environment :: Console', 'Intended Audience :: Developers', diff --git a/tests/test_api.py b/tests/test_api.py index c5f5041..7b6514b 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -5,8 +5,6 @@ Test the core API functionality. -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. """ from __future__ import print_function @@ -15,201 +13,375 @@ import logging import os import re +import time import pytest -import requests import six -from chemspipy import ChemSpider, MOL2D, MOL3D, BOTH -from chemspipy.errors import ChemSpiPyAuthError, ChemSpiPyServerError +from chemspipy import ChemSpider, errors logging.basicConfig(level=logging.WARN) logging.getLogger('chemspipy').setLevel(logging.DEBUG) -# Security token is retrieved from environment variables -CHEMSPIDER_SECURITY_TOKEN = os.environ['CHEMSPIDER_SECURITY_TOKEN'] +# API key is retrieved from environment variables +CHEMSPIDER_API_KEY = os.environ['CHEMSPIDER_API_KEY'] -# Chemspider instances with and without a security token -cs = ChemSpider(security_token=CHEMSPIDER_SECURITY_TOKEN) -cs2 = ChemSpider() +# Chemspider instances with and without an API key +cs = ChemSpider(CHEMSPIDER_API_KEY) -def test_no_security_token(): - """Test ChemSpider can be initialized with no parameters.""" - assert cs2.security_token == None +def test_no_api_key(): + """Test ChemSpider cannot be initialized with no API key.""" + with pytest.raises(TypeError): + ChemSpider() -def test_security_token(): - """Test security token is set correctly when initializing ChemSpider""" - assert cs.security_token == CHEMSPIDER_SECURITY_TOKEN +def test_api_key(): + """Test API key is set correctly when initializing ChemSpider.""" + assert cs.api_key == CHEMSPIDER_API_KEY def test_chemspider_repr(): """Test ChemSpider object repr.""" assert repr(cs) == 'ChemSpider()' - assert repr(cs2) == 'ChemSpider()' -# MassSpecAPI +# Lookups -def test_get_databases(): - """Test get_databases returns the list of ChemSpider data sources.""" - dbs = cs.get_databases() - assert all(source in dbs for source in ['Wikipedia', 'ZINC', 'PubChem']) +def test_get_datasources(): + """Test get_datasources returns the list of ChemSpider data sources.""" + datasources = cs.get_datasources() + assert all(source in datasources for source in ['Wikipedia', 'ZINC', 'PubChem']) -def test_get_extended_compound_info(): - """Test get_extended_compound_info returns info for a CSID.""" - info = cs.get_extended_compound_info(6543) +# Records + +def test_get_details(): + """Test get_details returns details for a record ID.""" + info = cs.get_details(6543) assert all(field in info for field in [ - 'csid', 'molecular_formula', 'smiles', 'inchi', 'inchikey', 'average_mass', 'molecular_weight', - 'monoisotopic_mass', 'nominal_mass', 'alogp', 'xlogp', 'common_name' + 'id', 'smiles', 'formula', 'averageMass', 'molecularWeight', 'monoisotopicMass', 'nominalMass', + 'commonName', 'referenceCount', 'dataSourceCount', 'pubMedCount', 'rscCount', 'mol2D' ]) assert all(isinstance(info[field], float) for field in [ - 'average_mass', 'molecular_weight', 'monoisotopic_mass', 'nominal_mass', 'alogp', 'xlogp' + 'averageMass', 'molecularWeight', 'monoisotopicMass' ]) - assert isinstance(info['csid'], int) + assert isinstance(info['id'], int) assert all(isinstance(info[field], six.text_type) for field in [ - 'molecular_formula', 'smiles', 'inchi', 'inchikey', 'common_name' + 'smiles', 'formula', 'commonName', 'mol2D' ]) -def test_get_extended_compound_info_list(): - """Test get_extended_compound_info_list returns info for a list of CSIDs.""" - info = cs.get_extended_compound_info_list([6543, 1235, 6084]) +def test_get_details_batch(): + """Test get_extended_compound_info_list returns info for a list of record IDs.""" + info = cs.get_details_batch([6543, 1235, 6084]) assert len(info) == 3 assert all(field in info[0] for field in [ - 'csid', 'molecular_formula', 'smiles', 'inchi', 'inchikey', 'average_mass', 'molecular_weight', - 'monoisotopic_mass', 'nominal_mass', 'alogp', 'xlogp', 'common_name' + 'id', 'smiles', 'formula', 'averageMass', 'molecularWeight', 'monoisotopicMass', 'nominalMass', + 'commonName', 'referenceCount', 'dataSourceCount', 'pubMedCount', 'rscCount', 'mol2D' ]) assert all(isinstance(info[0][field], float) for field in [ - 'average_mass', 'molecular_weight', 'monoisotopic_mass', 'nominal_mass', 'alogp', 'xlogp' + 'averageMass', 'molecularWeight', 'monoisotopicMass' ]) - assert isinstance(info[0]['csid'], int) + assert isinstance(info[0]['id'], int) assert all(isinstance(info[0][field], six.text_type) for field in [ - 'molecular_formula', 'smiles', 'inchi', 'inchikey', 'common_name' + 'smiles', 'formula', 'commonName', 'mol2D' ]) -def test_get_extended_mol_compound_info_list(): - """Test get_extended_mol_compound_info_list returns info for a list of CSIDs.""" - info = cs.get_extended_mol_compound_info_list([1236], include_external_references=True, - include_reference_counts=True) - assert len(info) == 1 - assert all(field in info[0] for field in [ - 'csid', 'molecular_formula', 'smiles', 'inchi', 'inchikey', 'average_mass', 'molecular_weight', - 'monoisotopic_mass', 'nominal_mass', 'alogp', 'xlogp', 'common_name', 'reference_count', 'datasource_count', - 'mol_2d' - ]) - assert all(isinstance(info[0][field], float) for field in [ - 'average_mass', 'molecular_weight', 'monoisotopic_mass', 'nominal_mass', 'alogp', 'xlogp' - ]) - assert all(isinstance(info[0][field], int) for field in ['csid', 'reference_count', 'datasource_count']) - assert all(isinstance(info[0][field], six.text_type) for field in [ - 'molecular_formula', 'smiles', 'inchi', 'inchikey', 'common_name', 'mol_2d' - ]) +def test_get_external_references(): + """Test get_external_references returns references for a record ID.""" + refs = cs.get_external_references(125) + assert len(refs) > 5 + for ref in refs: + assert 'source' in ref + assert 'sourceUrl' in ref + assert 'externalId' in ref + assert 'externalUrl' in ref -def test_get_extended_mol_compound_info_list_dimensions(): - """Test get_extended_mol_compound_info_list returns 2D/3D/both MOL.""" - info = cs.get_extended_mol_compound_info_list([1236], mol_type=MOL2D) - assert 'mol_2d' in info[0] - info = cs.get_extended_mol_compound_info_list([1236], mol_type=MOL3D) - assert 'mol_3d' in info[0] - info = cs.get_extended_mol_compound_info_list([1236], mol_type=BOTH) - assert 'mol_2d' in info[0] - assert 'mol_3d' in info[0] +def test_get_image(): + """Test get_image returns image data for a record ID.""" + img = cs.get_image(123) + assert img[:8] == b'\x89PNG\x0d\x0a\x1a\x0a' # PNG magic number -def test_get_record_mol(): - """Test get_record_mol returns a MOL file.""" - mol = cs.get_record_mol(6084) +def test_get_mol(): + """Test get_mol returns a MOLfile for a record ID.""" + mol = cs.get_mol(6084) assert 'V2000' in mol assert 'M END' in mol -def test_simple_search_by_formula(): - """Test simple_search_by_formula returns a list of CSIDs.""" - assert [c.csid for c in cs.simple_search_by_formula('C2H6')] == [6084] +# Filter + +def test_filter_formula_batch(): + """Test filter_formula_batch returns a list of CSIDs.""" + qid = cs.filter_formula_batch(formulas=['C2H2', 'C3H6']) + while True: + status = cs.filter_formula_batch_status(qid) + if status['status'] in {'Suspended', 'Failed', 'Not Found', 'Complete'}: + break + time.sleep(1) + results = cs.filter_formula_batch_results(qid) + assert len(results) == 2 + for result in results: + assert 'formula' in result + assert 'results' in result + assert len(result['results']) > 1 + + +def test_filter_intrinsicproperty_formula(): + """Test filter_intrinsicproperty returns a list of CSIDs.""" + qid = cs.filter_intrinsicproperty(formula='C6H6') + while True: + status = cs.filter_status(qid) + if status['status'] in {'Suspended', 'Failed', 'Not Found', 'Complete'}: + break + time.sleep(1) + results = cs.filter_results(qid) + assert len(results) > 10 + + +def test_filter_intrinsicproperty_mass(): + """Test filter_intrinsicproperty returns a list of CSIDs.""" + qid = cs.filter_intrinsicproperty(monoisotopic_mass=500, monoisotopic_mass_range=0.001) + while True: + status = cs.filter_status(qid) + if status['status'] in {'Suspended', 'Failed', 'Not Found', 'Complete'}: + break + time.sleep(1) + results = cs.filter_results(qid) + assert len(results) > 10 + +def test_filter_mass(): + """Test filter_mass returns a list of CSIDs.""" + qid = cs.filter_mass(500, 0.001) + while True: + status = cs.filter_status(qid) + if status['status'] in {'Suspended', 'Failed', 'Not Found', 'Complete'}: + break + time.sleep(1) + results = cs.filter_results(qid) + assert len(results) > 10 + + +def test_filter_mass_batch(): + """Test filter_mass_batch returns a list of CSIDs.""" + qid = cs.filter_mass_batch(masses=[(12, 0.001), (24, 0.001)]) + while True: + status = cs.filter_mass_batch_status(qid) + if status['status'] in {'Suspended', 'Failed', 'Not Found', 'Complete'}: + break + time.sleep(1) + results = cs.filter_mass_batch_results(qid) + print(results) + assert len(results) == 2 + for result in results: + assert 'mass' in result + assert 'range' in result + assert 'results' in result + assert len(result['results']) > 0 + + +def test_filter_smiles(): + """Test filter_smiles returns a list of CSIDs.""" + qid = cs.filter_smiles('c1ccccc1') + while True: + status = cs.filter_status(qid) + if status['status'] in {'Suspended', 'Failed', 'Not Found', 'Complete'}: + break + time.sleep(1) + results = cs.filter_results(qid) + assert len(results) == 1 + assert results[0] == 236 # Benzene ChemSpider ID + + +def test_filter_sdf(): + """Test filter_results_sdf returns an SDF file.""" + qid = cs.filter_formula('C10H20') + while True: + status = cs.filter_status(qid) + if status['status'] in {'Suspended', 'Failed', 'Not Found', 'Complete'}: + break + time.sleep(1) + sdf = cs.filter_results_sdf(qid) + assert b'V2000' in sdf + assert b'$$$$' in sdf -def test_simple_search_by_mass(): - """Test simple_search_by_mass returns a list of CSIDs.""" - csids = [c.csid for c in cs.simple_search_by_mass(17, 0.1)] - assert len(csids) > 8 + +# Tools + +def test_convert(): + """Test convert.""" + assert cs.convert('c1ccccc1', 'SMILES', 'InChI') == 'InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H' + assert cs.convert('InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H', 'InChI', 'InChIKey') == 'UHOVQNZJYSORNB-UHFFFAOYSA-N' + assert cs.convert('UHOVQNZJYSORNB-UHFFFAOYSA-N', 'InChIKey', 'InChI') == 'InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H' + + +def test_validate_inchikey(): + """Test validate_inchikey.""" + assert cs.validate_inchikey('UHOVQNZJYSORNB-UHFFFAOYSA-N') is True + assert cs.validate_inchikey('UHOVQNZJYSORNB-UHFFFAOYSQ-N') is False + assert cs.validate_inchikey('UHOVQNZJYSORNB-UHFFFAOYSA') is False + + +# MassSpecAPI + +def test_get_databases(): + """Test get_databases returns the list of ChemSpider data sources.""" + with pytest.deprecated_call(): + dbs = cs.get_databases() + assert all(source in dbs for source in ['Wikipedia', 'ZINC', 'PubChem']) + + +def test_get_extended_compound_info(): + """Test get_extended_compound_info returns info for a CSID.""" + with pytest.deprecated_call(): + info = cs.get_extended_compound_info(6543) + assert all(field in info for field in [ + 'id', 'smiles', 'formula', 'averageMass', 'molecularWeight', 'monoisotopicMass', 'nominalMass', + 'commonName', 'referenceCount', 'dataSourceCount', 'pubMedCount', 'rscCount', 'mol2D' + ]) + assert all(isinstance(info[field], float) for field in [ + 'averageMass', 'molecularWeight', 'monoisotopicMass' + ]) + assert isinstance(info['id'], int) + assert all(isinstance(info[field], six.text_type) for field in [ + 'smiles', 'formula', 'commonName', 'mol2D' + ]) + + +def test_get_extended_compound_info_list(): + """Test get_extended_compound_info_list returns info for a list of CSIDs.""" + with pytest.deprecated_call(): + info = cs.get_extended_compound_info_list([6543, 1235, 6084]) + assert len(info) == 3 + assert all(field in info[0] for field in [ + 'id', 'smiles', 'formula', 'averageMass', 'molecularWeight', 'monoisotopicMass', 'nominalMass', + 'commonName', 'referenceCount', 'dataSourceCount', 'pubMedCount', 'rscCount', 'mol2D' + ]) + assert all(isinstance(info[0][field], float) for field in [ + 'averageMass', 'molecularWeight', 'monoisotopicMass' + ]) + assert isinstance(info[0]['id'], int) + assert all(isinstance(info[0][field], six.text_type) for field in [ + 'smiles', 'formula', 'commonName', 'mol2D' + ]) + + +def test_get_extended_mol_compound_info_list(): + """Test get_extended_mol_compound_info_list returns info for a list of CSIDs.""" + with pytest.deprecated_call(): + info = cs.get_extended_mol_compound_info_list([1236], include_external_references=True, + include_reference_counts=True) + assert len(info) == 1 + assert all(field in info[0] for field in [ + 'id', 'smiles', 'formula', 'averageMass', 'molecularWeight', 'monoisotopicMass', 'nominalMass', + 'commonName', 'referenceCount', 'dataSourceCount', 'pubMedCount', 'rscCount', 'mol2D' + ]) + assert all(isinstance(info[0][field], float) for field in [ + 'averageMass', 'molecularWeight', 'monoisotopicMass' + ]) + assert all(isinstance(info[0][field], int) for field in [ + 'id', 'referenceCount', 'dataSourceCount', 'pubMedCount', 'rscCount' + ]) + assert all(isinstance(info[0][field], six.text_type) for field in [ + 'smiles', 'formula', 'commonName', 'mol2D' + ]) + + +def test_get_record_mol(): + """Test get_record_mol returns a MOL file.""" + with pytest.deprecated_call(): + mol = cs.get_record_mol(6084) + assert 'V2000' in mol + assert 'M END' in mol # Search def test_async_simple_search(): """Test async_simple_search returns a transaction ID.""" - rid = cs.async_simple_search('benzene') - assert re.compile(r'[a-f0-9\-]{20,50}').search(rid) + with pytest.deprecated_call(): + rid = cs.async_simple_search('benzene') + assert re.compile(r'[a-f0-9\-]{20,50}').search(rid) def test_async_simple_search_ordered(): """Test async_simple_search returns a transaction ID.""" - rid = cs.async_simple_search_ordered('glucose') - assert re.compile(r'[a-f0-9\-]{20,50}').search(rid) + with pytest.deprecated_call(): + rid = cs.async_simple_search_ordered('glucose') + assert re.compile(r'[a-f0-9\-]{20,50}').search(rid) def test_get_async_search_status(): """Test get_async_search_status returns the status for a transaction ID.""" - rid = cs.async_simple_search('benzene') - status = cs.get_async_search_status(rid) - assert status in {'Unknown', 'Created', 'Scheduled', 'Processing', 'Suspended', 'PartialResultReady', 'ResultReady'} + with pytest.deprecated_call(): + rid = cs.async_simple_search('benzene') + status = cs.get_async_search_status(rid) + assert status in { + 'Complete', 'Suspended', 'Failed', 'Not Found', 'Unknown', 'Created', 'Scheduled', 'Processing', + 'Suspended', 'PartialResultReady', 'ResultReady' + } def test_get_async_search_status_and_count(): """Test get_async_search_status_and_count returns the status for a transaction ID.""" - rid = cs.async_simple_search('benzene') - while True: - status = cs.get_async_search_status_and_count(rid) - if status['status'] in {'Created', 'Scheduled', 'Processing'}: - continue - assert status['count'] == 1 - assert status['message'] == 'Found by approved synonym' - break + with pytest.deprecated_call(): + rid = cs.async_simple_search('benzene') + while True: + status = cs.get_async_search_status_and_count(rid) + if status['status'] in {'Created', 'Scheduled', 'Processing'}: + continue + assert status['count'] == 1 + assert status['message'] == 'Found by approved synonym' + break def test_get_async_search_result(): """Test get_async_search_result returns a list of CSIDs.""" - rid = cs.async_simple_search('benzene') - while True: - status = cs.get_async_search_status(rid) - if status in {'Created', 'Scheduled', 'Processing'}: - continue - assert [c.csid for c in cs.get_async_search_result(rid)] == [236] - break + with pytest.deprecated_call(): + rid = cs.async_simple_search('benzene') + while True: + status = cs.get_async_search_status(rid) + if status in {'Created', 'Scheduled', 'Processing'}: + continue + assert [c.csid for c in cs.get_async_search_result(rid)] == [236] + break def test_get_async_search_result_part(): """Test get_async_search_result_part returns a list of CSIDs.""" - rid = cs.async_simple_search('glucose') - while True: - status = cs.get_async_search_status(rid) - if status in {'Created', 'Scheduled', 'Processing'}: - continue - assert len(cs.get_async_search_result_part(rid)) > 6 - assert len(cs.get_async_search_result_part(rid, start=2)) > 2 - assert len(cs.get_async_search_result_part(rid, start=2, count=2)) == 2 - assert len(cs.get_async_search_result_part(rid, start=2, count=99)) > 2 - break + with pytest.deprecated_call(): + rid = cs.async_simple_search('glucose') + while True: + status = cs.get_async_search_status(rid) + if status in {'Created', 'Scheduled', 'Processing'}: + continue + assert len(cs.get_async_search_result_part(rid)) > 6 + assert len(cs.get_async_search_result_part(rid, start=2)) > 2 + assert len(cs.get_async_search_result_part(rid, start=2, count=2)) == 2 + assert len(cs.get_async_search_result_part(rid, start=2, count=99)) > 2 + break def test_get_compound_info(): """Test get_compound_info returns info for a CSID.""" - info = cs.get_compound_info(263) - assert all(field in info for field in ['csid', 'smiles', 'inchi', 'inchikey']) - assert isinstance(info['csid'], int) - assert all(isinstance(info[field], six.text_type) for field in ['smiles', 'inchi', 'inchikey']) + with pytest.deprecated_call(): + info = cs.get_compound_info(123) + assert all(field in info for field in ['id', 'smiles']) + assert isinstance(info['id'], int) + assert isinstance(info['smiles'], six.text_type) def test_get_compound_thumbnail(): """Test get_compound_thumbnail returns image data for a CSID.""" - img = cs.get_compound_thumbnail(263) - assert img[:8] == b'\x89PNG\x0d\x0a\x1a\x0a' # PNG magic number + with pytest.deprecated_call(): + img = cs.get_compound_thumbnail(123) + assert img[:8] == b'\x89PNG\x0d\x0a\x1a\x0a' # PNG magic number def test_simple_search(): @@ -217,93 +389,34 @@ def test_simple_search(): assert all(csid in [c.csid for c in cs.simple_search('glucose')] for csid in [5589, 58238, 71358, 96749, 9312824, 9484839]) -# Spectra - -# This is slow... -# def test_get_all_spectra_info(): -# """Test get_all_spectra_info returns all spectra info.""" -# spectra = cs.get_all_spectra_info() -# ok_(len(spectra) > 8000) -# ok_('spectrum_id' in spectrum for spectrum in spectra) - - -def test_get_spectrum_info(): - """Test get_spectrum_info returns info for the given spectrum ID.""" - info = cs.get_spectrum_info(36) - assert info['spectrum_id'] == 36 - assert info['csid'] == 235 - assert info['spectrum_type'] == 'HNMR' - assert info['file_name'] == 'BenzaldehydeHNMR.jdx' - assert info['submitted_date'] == '2007-08-08T20:18:36.593' - - -def test_get_compound_spectra_info(): - """Test get_compound_spectra_info returns list of spectra info for the given ChemSpider ID.""" - for s in cs.get_compound_spectra_info(2157): - assert isinstance(s, dict) - assert s['csid'] == 2157 - assert isinstance(s['spectrum_id'], int) - - -def test_get_spectra_info_list(): - """Test get_spectra_info_list returns list of spectra info for a list of CSIDs.""" - assert cs.get_spectra_info_list([263]) == [] # No spectra for this compound - for s in cs.get_spectra_info_list([2157, 6084]): - assert s['csid'] in [2157, 6084] - assert isinstance(s['spectrum_id'], int) - - -# InChI - -def test_get_original_mol(): - """Test get_original_mol returns a MOL file.""" - mol = cs.get_original_mol(6084) - assert 'V2000' in mol - assert 'M END' in mol - - -# Misc - -def test_construct_api_url(): - """Test construction of API URLs.""" - url = cs.construct_api_url('MassSpecAPI', 'GetExtendedCompoundInfo', csid=2157) - response = requests.get(url) - assert response.status_code == 200 - - # Errors -def test_token_needed(): - """Test ChemSpiPyAuthError is raised for certain endpoints if no security_token provided.""" - with pytest.raises(ChemSpiPyAuthError): - cs2.get_extended_compound_info(263) - -def test_invalid_token(): +def test_invalid_api_key(): """Test ChemSpiPyAuthError is raised if a token with invalid format is used.""" - with pytest.raises(ChemSpiPyAuthError): + with pytest.raises(errors.ChemSpiPyAuthError): mf = ChemSpider('abcde1-1346fa-934a').get_compound(2157).molecular_formula -def test_invalid_token2(): +def test_invalid_api_key2(): """Test ChemSpiPyAuthError is raised if a fake token with correct format is used.""" - with pytest.raises(ChemSpiPyAuthError): - mf = ChemSpider('a1e22457-c835-1234-b141-347bf12fa31c').get_compound(2157).molecular_formula + with pytest.raises(errors.ChemSpiPyAuthError): + mf = ChemSpider('6qBA6lrJycPAYTTcajkkaN02brz5S6Ee').get_compound(2157).molecular_formula -def test_invalid_rid(): - """Test ChemSpiPyServerError is raised when an invalid transaction ID is used.""" - with pytest.raises(ChemSpiPyServerError): - cs.get_async_search_status('xxxxxx') +def test_invalid_query_id(): + """Test ChemSpiPyBadRequestError is raised when an invalid query ID is used.""" + with pytest.raises(errors.ChemSpiPyBadRequestError): + cs.filter_status('xxxxxx') -def test_expired_rid(): - """Test ChemSpiPyServerError is raised when a valid but expired transaction ID is used.""" - with pytest.raises(ChemSpiPyServerError): - cs.get_async_search_status('1a93ee87-acbe-4caa-bc3b-23c3ff39be0f') +def test_expired_query_id(): + """Test ChemSpiPyBadRequestError is raised when a valid but expired query ID is used.""" + with pytest.raises(errors.ChemSpiPyBadRequestError): + cs.filter_status('1a93ee87-acbe-4caa-bc3b-23c3ff39be0f') -def test_fictional_rid(): - """Test ChemSpiPyServerError is raised when a valid but made up transaction ID is used.""" - with pytest.raises(ChemSpiPyServerError): - cs.get_async_search_status('1a93ee87-acbe-4caa-bc3b-23c3ff39be0a') +def test_fictional_query_id(): + """Test ChemSpiPyBadRequestError is raised when a valid but made up query ID is used.""" + with pytest.raises(errors.ChemSpiPyBadRequestError): + cs.filter_status('1a93ee87-acbe-4caa-bc3b-23c3ff39be0a') diff --git a/tests/test_compound.py b/tests/test_compound.py index 36b73fa..6b5908d 100644 --- a/tests/test_compound.py +++ b/tests/test_compound.py @@ -5,8 +5,6 @@ Test the Compound object. -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. """ from __future__ import print_function @@ -15,27 +13,32 @@ import logging import os +import pytest import requests -from chemspipy import ChemSpider, Compound, Spectrum +from chemspipy import ChemSpider, Compound logging.basicConfig(level=logging.WARN, format='%(levelname)s:%(name)s:(%(threadName)-10s):%(message)s') logging.getLogger('chemspipy').setLevel(logging.DEBUG) -# Security token is retrieved from environment variables -CHEMSPIDER_SECURITY_TOKEN = os.environ['CHEMSPIDER_SECURITY_TOKEN'] -cs = ChemSpider(security_token=CHEMSPIDER_SECURITY_TOKEN) +# API key is retrieved from environment variables +CHEMSPIDER_API_KEY = os.environ['CHEMSPIDER_API_KEY'] +cs = ChemSpider(CHEMSPIDER_API_KEY) def test_get_compound(): """Test getting a compound by ChemSpider ID.""" compound = cs.get_compound(2157) assert isinstance(compound, Compound) - assert compound.csid == 2157 + assert compound.record_id == 2157 + with pytest.deprecated_call(): + assert compound.csid == 2157 compound = cs.get_compound('2157') assert isinstance(compound, Compound) - assert compound.csid == 2157 + assert compound.record_id == 2157 + with pytest.deprecated_call(): + assert compound.csid == 2157 def test_get_compounds(): @@ -94,33 +97,35 @@ def test_smiles(): def test_inchi(): """Test Compound property inchi.""" compound = cs.get_compound(2157) - assert compound.inchi == 'InChI=1/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)' + assert compound.inchi == 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)' # Ensure value is the same on subsequent access from cache - assert compound.inchi == 'InChI=1/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)' + assert compound.inchi == 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)' def test_stdinchi(): """Test Compound property stdinchi.""" compound = cs.get_compound(2157) - assert compound.stdinchi == 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)' - # Ensure value is the same on subsequent access from cache - assert compound.stdinchi == 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)' + with pytest.deprecated_call(): + assert compound.stdinchi == 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)' + # Ensure value is the same on subsequent access from cache + assert compound.stdinchi == 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)' def test_inchikey(): """Test Compound property inchikey.""" compound = cs.get_compound(2157) - assert compound.inchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYAW' + assert compound.inchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N' # Ensure value is the same on subsequent access from cache - assert compound.inchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYAW' + assert compound.inchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N' def test_stdinchikey(): """Test Compound property stdinchikey.""" compound = cs.get_compound(2157) - assert compound.stdinchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N' - # Ensure value is the same on subsequent access from cache - assert compound.stdinchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N' + with pytest.deprecated_call(): + assert compound.stdinchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N' + # Ensure value is the same on subsequent access from cache + assert compound.stdinchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N' def test_masses(): @@ -132,13 +137,6 @@ def test_masses(): assert compound.nominal_mass == 180 -def test_descriptors(): - """Test Compound property alogp, xlogp.""" - compound = cs.get_compound(348191) - assert compound.alogp == 0.0 - assert compound.xlogp == 1.2 - - def test_name(): """Test Compound property common_name.""" compound = cs.get_compound(2157) @@ -150,7 +148,6 @@ def test_molfiles(): compound = cs.get_compound(2157) assert 'V2000' in compound.mol_2d assert 'V2000' in compound.mol_3d - assert 'V2000' in compound.mol_raw def test_image(): @@ -159,12 +156,12 @@ def test_image(): assert compound.image[:8] == b'\x89PNG\x0d\x0a\x1a\x0a' # PNG magic number -def test_spectra(): - """Test Compound property spectra.""" - compound = cs.get_compound(2157) - for s in compound.spectra: - assert isinstance(s, Spectrum) - assert s.csid == 2157 - assert isinstance(s.spectrum_id, int) - compound = cs.get_compound(263) - assert compound.spectra == [] +def test_external_references(): + """Test Compound property external_references.""" + compound = cs.get_compound(97809) + assert len(compound.external_references) > 50 + for xref in compound.external_references: + assert 'externalId' in xref + assert 'externalUrl' in xref + assert 'source' in xref + assert 'sourceUrl' in xref diff --git a/tests/test_search.py b/tests/test_search.py index e521b8a..81688a2 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -5,8 +5,6 @@ Test the search wrapper. -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. """ from __future__ import print_function @@ -17,25 +15,24 @@ import pytest -from chemspipy import ChemSpider, ASCENDING, DESCENDING, CSID, REFERENCE_COUNT, MOLECULAR_WEIGHT -from chemspipy.errors import ChemSpiPyServerError +from chemspipy import ChemSpider, errors, ASCENDING, DESCENDING, CSID, REFERENCE_COUNT, MOLECULAR_WEIGHT logging.basicConfig(level=logging.WARN, format='%(levelname)s:%(name)s:(%(threadName)-10s):%(message)s') logging.getLogger('chemspipy').setLevel(logging.DEBUG) -# Security token is retrieved from environment variables -CHEMSPIDER_SECURITY_TOKEN = os.environ['CHEMSPIDER_SECURITY_TOKEN'] -cs = ChemSpider(security_token=CHEMSPIDER_SECURITY_TOKEN) +# API key is retrieved from environment variables +CHEMSPIDER_API_KEY = os.environ['CHEMSPIDER_API_KEY'] +cs = ChemSpider(CHEMSPIDER_API_KEY) def test_search_smiles(): """Test SMILES input to search.""" results = cs.search('O=C(OCC)C') - assert results.ready() == False + assert results.ready() is False results.wait() - assert results.ready() == True - assert results.success() == True + assert results.ready() is True + assert results.success() is True assert results.message == 'Found by conversion query string to chemical structure (full match)' assert results[0].csid == 8525 assert results.duration.total_seconds() > 0 @@ -66,7 +63,7 @@ def test_search_iter(): def test_search_ordered_csid(): """Test search results ordered by CSID.""" results = cs.search('glucose', order=CSID) - assert list(results) == sorted(results, key=lambda x: x.csid) + assert list(results) == sorted(results, key=lambda x: x.csid) def test_search_ordered_csid_descending(): @@ -91,8 +88,8 @@ def test_search_no_results(): """Test name input to search.""" results = cs.search('aergherguyaelrgiaubrfawyef') assert results.message == 'No results found' - assert results.ready() == True - assert results.success() == True + assert results.ready() is True + assert results.success() is True assert len(results) == 0 @@ -106,20 +103,17 @@ def test_search_failed(): """Test ChemSpiPyServerError is raised for an invalid SMILES.""" results = cs.search('O=C(OCC)C*') results.wait() - assert isinstance(results.exception, ChemSpiPyServerError) + assert isinstance(results.exception, errors.ChemSpiPyBadRequestError) assert results.status == 'Failed' assert repr(results) == 'Results(Failed)' - assert results.ready() == True - assert results.success() == False + assert results.ready() is True + assert results.success() is False assert results.count == 0 assert results.duration.total_seconds() > 0 def test_search_exception(): """Test ChemSpiPyServerError is raised for an invalid SMILES.""" - with pytest.raises(ChemSpiPyServerError): + with pytest.raises(errors.ChemSpiPyBadRequestError): results = cs.search('O=C(OCC)C*', raise_errors=True) results.wait() - - -# ordered search - ascending/descending, different sort orders diff --git a/tests/test_spectrum.py b/tests/test_spectrum.py deleted file mode 100644 index 6f088c8..0000000 --- a/tests/test_spectrum.py +++ /dev/null @@ -1,136 +0,0 @@ -# -*- coding: utf-8 -*- -""" -test_spectrum -~~~~~~~~~~~~~ - -Test the Spectrum object. - -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. -""" - -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division -import datetime -import logging -import os - -import six - -from chemspipy import ChemSpider, Spectrum - - -logging.basicConfig(level=logging.WARN, format='%(levelname)s:%(name)s:(%(threadName)-10s):%(message)s') -logging.getLogger('chemspipy').setLevel(logging.DEBUG) - -# Security token is retrieved from environment variables -CHEMSPIDER_SECURITY_TOKEN = os.environ['CHEMSPIDER_SECURITY_TOKEN'] -cs = ChemSpider(security_token=CHEMSPIDER_SECURITY_TOKEN) - - -def test_get_all_spectra(): - """Test getting all spectra in ChemSpider.""" - spectra = cs.get_all_spectra() - for s in spectra: - assert isinstance(s, Spectrum) - assert isinstance(s.spectrum_id, int) - - -def test_get_spectrum(): - """Test getting a spectrum by spectrum ID.""" - s = cs.get_spectrum(36) - assert isinstance(s, Spectrum) - assert s.spectrum_id == 36 - assert s.csid == 235 - assert s.spectrum_type == 'HNMR' - assert s.file_name == 'BenzaldehydeHNMR.jdx' - assert s.submitted_date, datetime.datetime(2007, 8, 8, 20, 18, 36 == 593000) - - -def test_get_spectra(): - """Test getting multiple spectra by spectrum ID.""" - spectra = cs.get_spectra([36, 65]) - assert len(spectra) == 2 - for s in spectra: - assert isinstance(s, Spectrum) - assert s.spectrum_id in [36, 65] - assert s.csid in [235, 172] - - -def test_get_compound_spectra(): - """Test getting all spectra for a specific ChemSpider ID.""" - spectra = cs.get_compound_spectra(2157) - assert len(spectra) > 0 - for s in spectra: - assert isinstance(s, Spectrum) - assert isinstance(s.spectrum_id, int) - assert s.csid == 2157 - - -def test_spectrum_init(): - """Test instantiating a Spectrum directly.""" - s = Spectrum(cs, 36) - assert isinstance(s, Spectrum) - assert s.spectrum_id == 36 - assert s.csid == 235 - assert s.spectrum_type == 'HNMR' - assert s.file_name == 'BenzaldehydeHNMR.jdx' - assert s.submitted_date, datetime.datetime(2007, 8, 8, 20, 18, 36 == 593000) - - -def test_spectrum_equality(): - """Test equality test by spectrum ID.""" - s1 = cs.get_spectrum(65) - s2 = cs.get_spectrum(87) - s3 = cs.get_spectrum(87) - assert s1 != s2 - assert s2 == s3 - - -def test_spectrum_repr(): - """Test Spectrum object repr.""" - assert repr(cs.get_spectrum(65)) == 'Spectrum(65)' - - -def test_comments(): - """Test retrieving comments about a spectrum.""" - s = cs.get_spectrum(36) - assert isinstance(s.comments, six.text_type) - assert 'Benzaldehyde' in s.comments - - -def test_no_comments(): - """Test spectrum with no comments.""" - s = cs.get_spectrum(87) - assert s.comments is None - - -def test_original_url(): - """Test retrieving original_url for spectrum.""" - s = cs.get_spectrum(65) - assert isinstance(s.original_url, six.text_type) - assert 'http://' in s.original_url - - -def test_no_original_url(): - """Test spectrum with no original_url.""" - s = cs.get_spectrum(36) - assert s.original_url is None - - -def test_url(): - """Test retrieving spectrum url.""" - s = cs.get_spectrum(3558) - assert s.url == 'https://www.chemspider.com/FilesHandler.ashx?type=blob&disp=1&id=3558' - for compound in cs.search('Aspirin'): - for spectrum in compound.spectra: - assert spectrum.url.startswith('https://www.chemspider.com/FilesHandler.ashx?type=blob&disp=1&id=') - - -def test_data(): - """Test downloading spectrum.""" - s = cs.get_spectrum(3558) - assert 'JCAMP-DX' in s.data - assert 'NMR SPECTRUM' in s.data - assert len(s.data) > 500 diff --git a/tests/test_utils.py b/tests/test_utils.py index 78e2f76..e103abf 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,8 +5,6 @@ Test miscellaneous utility functions. -:copyright: Copyright 2014 by Matt Swain. -:license: MIT, see LICENSE file for more details. """ from __future__ import print_function