Skip to content

Commit

Permalink
niggli db format changed from pickle to numpy compressed. Script and …
Browse files Browse the repository at this point in the history
…read/write adapted
  • Loading branch information
Felix Simkovic committed May 3, 2017
1 parent fa16b2e commit c870969
Show file tree
Hide file tree
Showing 9 changed files with 178 additions and 1,510,500 deletions.
44 changes: 44 additions & 0 deletions simbad/command_line/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Command line facility for SIMBAD scripts"""

__author__ = "Felix Simkovic"
__date__ = "14 Apr 2017"
__version__ = "0.1"

import logging


def get_logger(name, level='info'):
"""Return an initialized logging handler
Parameters
----------
name : str
The logger name
level : str, optional
The logging level to be used [default: info]
To change, use one of
[ notset | info | debug | warning | error | critical ]
Returns
-------
logger
Instance of a :obj:`logger <logging.Logger>`
Raises
------
ValueError
Unknown logging level
"""
# Define the logging level handlers
logger_levels = {'notset': logging.NOTSET, 'info': logging.INFO, 'debug': logging.DEBUG,
'warning': logging.WARNING, 'error': logging.ERROR, 'critical': logging.CRITICAL}
if level in logger_levels:
LEVEL = logger_levels[level]
else:
raise ValueError("Unknown logging level: %s" % level)

FORMAT = "%(message)s"
logging.basicConfig(format=FORMAT, level=LEVEL)
return logging.getLogger(name)

13 changes: 7 additions & 6 deletions simbad/command_line/simbad_contaminant.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,19 @@
__version__ = "0.1"

import argparse
import logging
import os
import sys
import time

import simbad.command_line
import simbad.constants
import simbad.rotsearch.amore_search
import simbad.util.exit_util
import simbad.util.mr_util
import simbad.util.simbad_util

logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
logger = simbad.command_line.get_logger('contaminant_search', level='info')


def main():
p = argparse.ArgumentParser()
Expand Down Expand Up @@ -52,14 +53,14 @@ def main():
args = p.parse_args()

if args.work_dir:
logging.info('Making a named work directory: %s', args.work_dir)
logger.info('Making a named work directory: %s', args.work_dir)
try:
os.mkdir(args.work_dir)
except OSError:
msg = "Cannot create work_dir {0}".format(args.work_dir)
simbad.util.exit_util.exit_error(msg, sys.exc_info()[2])
else:
logging.info('Making a run_directory: checking for previous runs...')
logger.info('Making a run_directory: checking for previous runs...')
args.work_dir = simbad.util.simbad_util.make_workdir(os.getcwd())


Expand Down Expand Up @@ -97,7 +98,7 @@ def main():
run_in_min = elapsed_time / 60
run_in_hours = run_in_min / 60
msg = os.linesep + 'All processing completed (in {0:6.2F} hours)'.format(run_in_hours) + os.linesep
logging.info(msg)
logger.info(msg)

if __name__ == "__main__":
main()
main()
115 changes: 115 additions & 0 deletions simbad/command_line/simbad_create_lattice_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""Script to update the database for the lattice parameter search"""

__author__ = "Adam Simpkin"
__date__ = "28 Apr 2017"
__version__ = "0.1"

import argparse
import cctbx.crystal
import numpy as np
import urllib2

import simbad.constants
import simbad.command_line

logger = simbad.command_line.get_logger('create_lattice_db', level='info')


def rcsb_custom_report():
"""Create a custom report from the PDB
Returns
-------
list
A list of results
"""
base = 'http://www.rcsb.org/pdb/rest/'
report = 'customReport.csv?pdbids=*&customReportColumns=lengthOfUnitCellLatticeA,'\
+ 'lengthOfUnitCellLatticeB,lengthOfUnitCellLatticeC,unitCellAngleAlpha,'\
+ 'unitCellAngleBeta,unitCellAngleGamma,spaceGroup,experimentalTechnique'\
+ '&service=wsfile&format=csv'

# Space group conversions
sg_conversion = {
'A1': 'P1', 'B2': 'B112', 'C1211': 'C2', 'F422': 'I422',
'I21': 'I2', 'I1211': 'I2', 'P21212A' : 'P212121',
'R3': 'R3:R', 'C4212': 'P422',
}

# Obtain the data by querying the RCSB server
logger.info('Querying the RCSB Protein DataBank')
results, errors = [], []
for line in urllib2.urlopen(base + report):

# Ignore the header
if line.startswith('structureId'):
continue

pdb_code, rest = line.replace('"', "").split(',', 1)
unit_cell, space_group, exp_tech = rest.rsplit(',', 2)

# Ignore non-xtal structures
if exp_tech.strip().upper() != "X-RAY DIFFRACTION":
continue

# Some entries do not have stored unit cell parameters
try:
unit_cell = map(float, unit_cell.split(','))
except ValueError as e:
logger.debug('Skipping pdb entry {0}\t-\t{1}'.format(pdb_code, e))
errors.append(pdb_code)
continue

space_group = space_group.replace(" ", "").strip()
space_group = sg_conversion.get(space_group, space_group)

try:
symmetry = cctbx.crystal.symmetry(unit_cell=unit_cell, space_group=space_group)
except Exception as e:
logger.debug('Skipping pdb entry {0}\t-\t{1}'.format(pdb_code, e))
errors.append(pdb_code)
continue

results.append((pdb_code, symmetry))

logger.info('\tError with {0} pdb entries'.format(len(errors)))
return results


def create_niggli_cell_data(crystal_data):
"""Generate a new data for the lattice parameter search
Parameters
----------
crystal_data : list, tuple, generator
A list of lists for PDB entries with the pdb code as
first and the crystal info as second argument.
"""
logger.info('Calculating the Niggli cells')
data_ascii_encoded = np.zeros((0, 10))
for i, xtal_data in enumerate(crystal_data):
d = np.zeros(10)
d[:4] = np.fromstring(xtal_data[0], dtype='uint8')
d[4:] = np.asarray(xtal_data[1].niggli_cell().unit_cell().parameters())
data_ascii_encoded = np.concatenate((data_ascii_encoded, d[np.newaxis, :]), axis=0)
logger.info("Total Niggli cells loaded: {0}".format(i + 1))
return data_ascii_encoded


if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument('-db', '--database', help='File to store the database in. Default overwrites the general SIMBAD database')
args = p.parse_args()

if args.database:
logger.info('Creating a new database file: {0}'.format(args.database))
lattice_db = args.database
else:
logger.info('Overwriting the default SIMBAD Niggli database {0}'.format(simbad.constants.SIMBAD_LATTICE_DB))
lattice_db = simbad.constants.SIMBAD_LATTICE_DB

niggli_data = create_niggli_cell_data(rcsb_custom_report())
np.savez_compressed(lattice_db, niggli_data)

4 changes: 2 additions & 2 deletions simbad/command_line/simbad_lattice.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
__version__ = "0.1"

import argparse
import logging
import os

import simbad.command_line
import simbad.constants
import simbad.lattice.search
import simbad.util.mtz_util
import simbad.util.mr_util

logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
logger = simbad.command_line.get_logger('lattice_search', level='info')


def main():
Expand Down
2 changes: 1 addition & 1 deletion simbad/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
SIMBAD_CONFIG_FILE = os.path.join(SIMBAD_EGG_ROOT, 'static', 'simbad.ini')

# SIMBAD database for lattice search
SIMBAD_LATTICE_DB = os.path.join(SIMBAD_EGG_ROOT, 'static', 'niggli_database.cpk')
SIMBAD_LATTICE_DB = os.path.join(SIMBAD_EGG_ROOT, 'static', 'niggli_database.npz')

# SIMBAD database of contaminant models
CONTAMINANT_MODELS = os.path.join(SIMBAD_EGG_ROOT, 'static', 'contaminant_models')
19 changes: 9 additions & 10 deletions simbad/lattice/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ def __init__(self, unit_cell, space_group, lattice_db_fname, max_to_keep=20):
self._space_group = None

self._niggli_cell = None
self._lattice_db = None
self._lattice_db_fname = None
self._search_results = None
self._max_to_keep = 0
Expand All @@ -116,8 +115,6 @@ def lattice_db_fname(self):
def lattice_db_fname(self, lattice_db_fname):
"""Define the lattice database filename"""
self._lattice_db_fname = lattice_db_fname
self._lattice_db = cPickle.load(open(lattice_db_fname))
logger.info('Lattice database successfully cached')

@property
def max_to_keep(self):
Expand Down Expand Up @@ -279,13 +276,15 @@ def search(self, tolerance=0.05):
tol_niggli_cell = niggli_cell * tolerance

results = []
for i, db_cell in enumerate(self._lattice_db[1]):
pdb_code = self._lattice_db[0][i]

if LatticeSearch.cell_within_tolerance(niggli_cell, db_cell, tol_niggli_cell):
total_pen, length_pen, angle_pen = LatticeSearch.calculate_penalty(niggli_cell, db_cell)
score = _LatticeParameterScore(pdb_code, db_cell, total_pen, length_pen, angle_pen)
results.append(score)
with numpy.load(self.lattice_db_fname) as compressed:
for entry in compressed['arr_0']:
pdb_code = "".join(chr(c) for c in entry[:4].astype('uint8'))
db_cell = entry[4:]

if LatticeSearch.cell_within_tolerance(niggli_cell, db_cell, tol_niggli_cell):
total_pen, length_pen, angle_pen = LatticeSearch.calculate_penalty(niggli_cell, db_cell)
score = _LatticeParameterScore(pdb_code, db_cell, total_pen, length_pen, angle_pen)
results.append(score)

self._search_results = results

Expand Down

0 comments on commit c870969

Please sign in to comment.