Finish io

tclick · Aug 12, 2017 · c6f15f4 · c6f15f4
2 parents c36cdb1 + 1fb4bc2
commit c6f15f4
Show file tree

Hide file tree

Showing 11 changed files with 1,097 additions and 0 deletions.
diff --git a/src/fluctmatch/coordinates/CRD.py b/src/fluctmatch/coordinates/CRD.py
@@ -0,0 +1,182 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# MDAnalysis --- http://www.mdanalysis.org
+# Copyright (c) 2006-2016 The MDAnalysis Development Team and contributors
+# (see the file AUTHORS for the full list of names)
+#
+# Released under the GNU Public Licence, v2 or any higher version
+#
+# Please cite your use of MDAnalysis in published work:
+#
+# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
+# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
+# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
+# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
+# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
+#
+# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
+# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
+# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
+#
+
+"""CRD structure files in MDAnalysis --- :mod:`MDAnalysis.coordinates.CRD`
+===========================================================================
+
+Read and write coordinates in CHARMM CARD coordinate format (suffix
+"crd"). The CHARMM "extended format" is handled automatically.
+"""
+
+from __future__ import (
+    absolute_import,
+    division,
+    print_function,
+    unicode_literals,
+)
+
+=from future.builtins import (
+    range,
+    str,
+    super,
+    zip,
+)
+
+import itertools
+import warnings
+
+import numpy as np
+
+from MDAnalysis.coordinates import CRD
+from MDAnalysis.exceptions import NoDataError
+from MDAnalysis.lib import util
+
+
+class CRDWriter(CRD.CRDWriter):
+    """CRD writer that implements the CHARMM CRD coordinate format.
+
+    It automatically writes the CHARMM EXT extended format if there
+    are more than 99,999 atoms.
+
+    Requires the following attributes:
+    - resids
+    - resnames
+    - names
+    - chainIDs
+    - tempfactors
+
+    .. versionchanged:: 0.11.0
+       Frames now 0-based instead of 1-based
+    """
+    format = "CRD"
+    units = {"time": None, "length": "Angstrom"}
+
+    fmt = {
+        # crdtype = "extended"
+        # fortran_format = "(2I10,2X,A8,2X,A8,3F20.10,2X,A8,2X,A8,F20.10)"
+        "ATOM_EXT"    : ("{serial:10d}{totRes:10d}  {resname:<8.8s}  {name:<8.8s}"
+                         "{pos[0]:20.10f}{pos[1]:20.10f}{pos[2]:20.10f}  "
+                         "{chainID:<8.8s}  {resSeq:<8d}{tempfactor:20.10f}\n"),
+        "NUMATOMS_EXT": "{0:10d} EXT\n",
+        # crdtype = "standard"
+        # fortran_format = "(2I5,1X,A4,1X,A4,3F10.5,1X,A4,1X,A4,F10.5)"
+        "ATOM"        : ("{serial:5d}{totRes:5d} {resname:<4.4s} {name:<4.4s}"
+                         "{pos[0]:10.5f}{pos[1]:10.5f}{pos[2]:10.5f} "
+                         "{chainID:<4.4s} {resSeq:<4d}{tempfactor:10.5f}\n"),
+        "TITLE"       : "* FRAME {frame} FROM {where}\n",
+        "NUMATOMS"    : "{0:5d}\n",
+    }
+
+    def __init__(self, filename, **kwargs):
+        self.filename = util.filename(filename, ext="crd")
+        super().__init__(self.filename, **kwargs)
+        self.crd = None
+
+    def write(self, selection, frame=None):
+        # type: (object, object) -> object
+        """Write selection at current trajectory frame to file.
+
+        write(selection,frame=FRAME)
+
+        selection         MDAnalysis AtomGroup
+        frame             optionally move to frame FRAME
+        """
+        u = selection.universe
+        if frame is not None:
+            u.trajectory[frame]  # advance to frame
+        else:
+            try:
+                frame = u.trajectory.ts.frame
+            except AttributeError:
+                frame = 0  # should catch cases when we are analyzing a single PDB (?)
+
+        atoms = selection.atoms  # make sure to use atoms (Issue 46)
+        coor = atoms.positions  # can write from selection == Universe (Issue 49)
+
+        n_atoms = len(atoms)
+        # Detect which format string we"re using to output (EXT or not)
+        # *len refers to how to truncate various things,
+        # depending on output format!
+        at_fmt = self.fmt["ATOM_EXT"]
+        serial_len = 10
+        resid_len = 8
+        totres_len = 10
+
+        # Check for attributes, use defaults for missing ones
+        attrs = {}
+        missing_topology = []
+        for attr, default in (
+            ("resnames", itertools.cycle(("UNK",))),
+            # Resids *must* be an array because we index it later
+            ("resids", np.ones(n_atoms, dtype=np.int)),
+            ("names", itertools.cycle(("X",))),
+            ("tempfactors", itertools.cycle((0.0,))),
+        ):
+            try:
+                attrs[attr] = getattr(atoms, attr)
+            except (NoDataError, AttributeError):
+                attrs[attr] = default
+                missing_topology.append(attr)
+        # ChainIDs - Try ChainIDs first, fall back to Segids
+        try:
+            attrs["chainIDs"] = atoms.chainIDs
+        except (NoDataError, AttributeError):
+            # try looking for segids instead
+            try:
+                attrs["chainIDs"] = atoms.segids
+            except (NoDataError, AttributeError):
+                attrs["chainIDs"] = itertools.cycle(("",))
+                missing_topology.append(attr)
+        if missing_topology:
+            warnings.warn(
+                "Supplied AtomGroup was missing the following attributes: "
+                "{miss}. These will be written with default values. "
+                "".format(miss=", ".join(missing_topology)))
+
+        with util.openany(self.filename, "w") as crd:
+            # Write Title
+            crd.write(self.fmt["TITLE"].format(
+                frame=frame, where=u.trajectory.filename))
+            crd.write("*\n")
+
+            # Write NUMATOMS
+            crd.write(self.fmt["NUMATOMS_EXT"].format(n_atoms))
+
+            # Write all atoms
+
+            current_resid = 1
+            resids = attrs["resids"]
+            for i, pos, resname, name, chainID, resid, tempfactor in zip(
+                range(n_atoms), coor, attrs["resnames"], attrs["names"],
+                attrs["chainIDs"], attrs["resids"], attrs["tempfactors"]):
+                if not i == 0 and resids[i] != resids[i - 1]:
+                    current_resid += 1
+
+                # Truncate numbers
+                serial = int(str(i + 1)[-serial_len:])
+                resid = int(str(resid)[-resid_len:])
+                current_resid = int(str(current_resid)[-totres_len:])
+
+                crd.write(at_fmt.format(
+                    serial=serial, totRes=current_resid, resname=resname,
+                    name=name, pos=pos, chainID=chainID,
+                    resSeq=resid, tempfactor=tempfactor))
diff --git a/src/fluctmatch/coordinates/__init__.py b/src/fluctmatch/coordinates/__init__.py
@@ -0,0 +1,40 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+
+from __future__ import (
+    absolute_import,
+    division,
+    print_function,
+    unicode_literals,
+)
+
+from future.utils import (
+    native_str,
+    raise_from,
+    with_metaclass,
+)
+from future.builtins import (
+    ascii,
+    bytes,
+    chr,
+    dict,
+    filter,
+    hex,
+    input,
+    map,
+    next,
+    oct,
+    open,
+    pow,
+    range,
+    round,
+    str,
+    super,
+    zip,
+)
+
+import numpy as np
+import pandas as pd
+
+
diff --git a/src/fluctmatch/intcor/IC.py b/src/fluctmatch/intcor/IC.py
@@ -0,0 +1,154 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+
+from __future__ import (
+    absolute_import,
+    division,
+    print_function,
+    unicode_literals,
+)
+
+from future.utils import (
+    native_str,
+)
+from future.builtins import (
+    dict,
+    open,
+    super,
+)
+
+import time
+from os import environ
+
+import numpy as np
+import pandas as pd
+
+from MDAnalysis.lib import util
+from ..topology.base import (TopologyReaderBase, TopologyWriterBase)
+
+
+class IntcorReader(TopologyReaderBase):
+    format = "IC"
+    units = dict(time=None, length="Angstrom")
+
+    fmt = dict(
+        # fortran_format = "(I5,1X,4(I3,1X,A4),F9.4,3F8.2,F9.4)"
+        STD = "I5,1X,I3,1X,A4,I3,1X,A4,I3,1X,A4,I3,1X,A4,F9.4,3F8.2,F9.4",
+        # fortran_format = "(I9,1X,4(I5,1X,A8),F9.4,3F8.2,F9.4)"
+        STD_EXT = ("I9,1X,I5,1X,A8,I5,1X,A8,I5,1X,A8,I5,1X,A8,F9.4,"
+                   "3F8.2,F9.4"),
+        # fortran_format = "(I5,4(1X,A4,1X,A4,1X,A4,"":""),F12.6,3F12.4,F12.6)"
+        RESID = ("I5,1X,A4,1X,A4,1X,A4,A1,1X,A4,1X,A4,1X,A4,A1,1X,A4,"
+                 "1X,A4,1X,A4,A1,1X,A4,1X,A4,1X,A4,A1,F12.6,3F12.4,F12.6"),
+        # fortran_format = "(I10,4(1X,A8,1X,A8,1X,A8,"":""),F12.6,3F12.4,F12.6)"
+        RESID_EXT = ("I10,1X,A8,1X,A8,1X,A8,A1,1X,A8,1X,A8,1X,A8,A1,1X,"
+                      "A8,1X,A8,1X,A8,A1,1X,A8,1X,A8,1X,A8,A1,F12.6,"
+                      "3F12.4,F12.6"),
+    )
+    cols = np.asarray(["segidI", "resI", "I", "segidJ", "resJ", "J",
+                       "segidK", "resK", "K", "segidL", "resL", "L",
+                       "r_IJ", "T_IJK", "P_IJKL", "T_JKL", "r_KL"],)
+
+    def __init__(self, filename, **kwargs):
+        """
+        Parameters
+        ----------
+        filename : str or :class:`~MDAnalysis.lib.util.NamedStream`
+             name of the output file or a stream
+        """
+
+        self.filename = util.filename(filename, ext="ic")
+        super().__init__(self.filename, **kwargs)
+
+    def read(self):
+        """Read the internal coordinates file.
+
+        :return: pandas.DataFrame
+        """
+        with open(self.filename, "rb") as icfile:
+            for line in icfile:
+                line = bytes_to_native_str(line).split("!")[0].strip()
+                if line.startswith("*") or not line:
+                    continue       # ignore TITLE and empty lines
+                line = np.fromiter(line.strip().split(), dtype=np.int)
+                key = "RESID" if line[1] == 2 else "STD"
+                if line[0] == 30:
+                    key += "_EXT"
+                break
+
+            TableParser = util.FORTRANReader(self.fmt[key])
+            nlines, resid = np.array(icfile.readline().strip().split(), dtype=np.int)
+            if resid != line[1]:
+                raise IOError("A mismatch has occurred on determining the IC format.")
+
+            table = pd.DataFrame([TableParser.read(bytes_to_native_str(line)) for line in icfile], dtype=np.unicode)
+            table = table[table != ":"].dropna(axis=1).apply(pd.to_numeric, errors="ignore")
+            table.set_index(0, inplace=True)
+            if nlines != table.shape[0]:
+                raise IOError(("A mismatch has occurred between the number "
+                               "of lines expected and the number of lines "
+                               "read. (%d != %d)") % (nlines, len(table)))
+
+            if key == "STD":
+                idx = np.where((self.cols != "segidI") & (self.cols != "segidJ") &
+                               (self.cols != "segidK") & (self.cols != "segidL"))
+                columns = self.cols[idx]
+            else:
+                columns = self.cols
+            table.columns = columns
+        return table
+
+
+class IntcorWriter(TopologyWriterBase):
+    format = "IC"
+    units = {"time": "picosecond", "length": "Angstrom"}
+
+    fmt = dict(
+        # fortran_format = "(I5,1X,4(I3,1X,A4),F9.4,3F8.2,F9.4)"
+        STD="%5d %3s %-4s%3s %-4%3s %-4%3s %-4%9.4f%8.2f%8.2f%8.2f%9.4f",
+        # fortran_format = "(I9,1X,4(I5,1X,A8),F9.4,3F8.2,F9.4)"
+        STD_EXT="%9d %5s %-8s%5s %-8s%5s %-8s%5s %-8s%9.4f%8.2f%8.2f%8.2f%9.4f",
+        # fortran_format = "(I5,4(1X,A4,1X,A4,1X,A4,"":""),F12.6,3F12.4,F12.6)"
+        RESID=("%5d %-4s %-4s %-4s: %-4s %-4s %-4s: %-4s %-4s %-4s:"
+               " %-4s %-4s %-4s:%12.6f%12.4f%12.4f%12.4f%12.6f"),
+        # fortran_format = "(I10,4(1X,A8,1X,A8,1X,A8,"":""),F12.6,3F12.4,F12.6)"
+        RESID_EXT=("%10d %-8s %-8s %-8s: %-8s %-8s %-8s: "
+                   "%-8s %-8s %-8s: %-8s %-8s %-8s:"
+                   "%12.6f%12.4f%12.4f%12.4f%12.6f"),)
+
+    def __init__(self, filename, extended=True, resid=True, title=None, **kwargs):
+        self.filename = util.filename(filename, ext="ic")
+        super().__init__(self.filename, **kwargs)
+
+        self.intcor = None
+        self.extended = extended
+        self.resid = resid
+        self.key = "RESID" if self.resid else "STD"
+        if self.extended:
+            self.key += "_EXT"
+        self.title = ("* Created by fluctmatch on {}".format(time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())),
+                      "* User: {}".format(environ["USER"]), "*") if title is None else title
+
+    def write(self, table):
+        # type: (object) -> object
+        """Write an internal coordinates table.
+
+        :param table: table of internal coordinates
+        """
+        ictable = table.copy(deep=True)
+        rescol = ["resI", "resJ", "resK", "resL", ]
+        ictable[rescol] = ictable[rescol].astype(np.unicode)
+
+        with open(self.filename, "wb") as icfile:
+            for _ in self.title:
+                icfile.write((_ + "\n").encode())
+            line = np.zeros(20, dtype=np.int)
+            line[0] = 30 if self.extended else 20
+            line[1] = 2 if self.resid else 1
+            np.savetxt(icfile, line[np.newaxis, :], fmt=native_str("%4d"), delimiter=native_str(""))
+            line = np.zeros(2, dtype=np.int)
+            line[0] = ictable.shape[0]
+            line[1] = 2 if self.resid else 1
+            np.savetxt(icfile, line[np.newaxis, :], fmt=native_str("%4d"), delimiter=native_str(""))
+            np.savetxt(icfile, ictable.reset_index(), fmt=native_str(self.fmt[self.key]))