diff --git a/.gitignore b/.gitignore index 77a6522..2bbf215 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ *.project *.swp release.* +mmtf-update/pathname diff --git a/mmtf-api/pom.xml b/mmtf-api/pom.xml index c267eca..515363f 100644 --- a/mmtf-api/pom.xml +++ b/mmtf-api/pom.xml @@ -19,16 +19,9 @@ - - org.rcsb - mmtf-common - 0.0.1-alpha4-SNAPSHOT - junit junit - 4.11 - test diff --git a/mmtf-api/src/main/java/org/rcsb/mmtf/api/DataApiInterface.java b/mmtf-api/src/main/java/org/rcsb/mmtf/api/DataApiInterface.java deleted file mode 100644 index 6e3356a..0000000 --- a/mmtf-api/src/main/java/org/rcsb/mmtf/api/DataApiInterface.java +++ /dev/null @@ -1,263 +0,0 @@ -package org.rcsb.mmtf.api; - -import java.util.List; -import java.util.Map; - -import org.rcsb.mmtf.dataholders.BioAssemblyData; -import org.rcsb.mmtf.dataholders.Entity; -import org.rcsb.mmtf.dataholders.MmtfBean; -import org.rcsb.mmtf.dataholders.PDBGroup; - -/** - * An interface describing the data API. - * - *

- * The structural data is accessible through this interface via - * a flat structure, instead of the usual hierarchical - * data encountered in PDB structures: structure -> model -> chain -> group -> atom. - * Going back to a hierarchical view of the structure can be achieved by - * using the {@link #getChainsPerModel()}, {@link #getGroupsPerChain()} and - * {@link #getGroupMap()} methods so that the flat arrays can be reconstructed into - * a hierarchy. - *

- * - *

- * Please refer to the full MMTF specification available at - * http://mmtf.rcsb.org. - * Further reference can be found in the mmCIF dictionary. - *

- * - * @author Anthony Bradley - * @author Jose Duarte - */ -public interface DataApiInterface { - - /** - * Returns an array containing the X coordinates of the atoms in Angstroms. - * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} - */ - float[] getXcoords(); - - /** - * Returns an array containing the Y coordinates of the atoms in Angstroms. - * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} - */ - float[] getYcoords(); - - /** - * Returns an array containing the Z coordinates of the atoms in Angstroms. - * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} - */ - float[] getZcoords(); - - /** - * Returns an array containing the B-factors (temperature factors) of the atoms in Angstroms^2. - * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} - */ - float[] getBfactors(); - - /** - * Returns an array containing the occupancy values of the atoms. - * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} - */ - float[] getOccupancies(); - - /** - * Returns an array of atom serial ids (_atom_site.id in mmCIF dictionary). - * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} - */ - int[] getAtomIds(); - - /** - * Returns an array of location ids of the atoms. - * '?' specifies a lack of alt id. - * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} - */ - char[] getAltLocIds(); - - /** - * Returns an array containing the insertion codes (pdbx_PDB_ins_code in mmCIF dictionary) for each residue (group). - * '?' specifies a lack of insertion code. - * @return an array with insertion codes, of size {@link #getNumResidues()} - * @see #getResidueNums() - */ - char[] getInsCodes(); - - /** - * Returns an array containing residue numbers (auth_seq_id in mmCIF dictionary) for each residue (group). - * @return an array with with residue numbers, of size {@link #getNumResidues()} - * @see #getInsCodes() - */ - int[] getResidueNums(); - - /** - * Returns the group map, mapping the numbers from indices specified in {@link #getGroupIndices()} - * to {@link PDBGroup} objects, which specify the atom names, - * elements, bonds and charges for each group. - * @return a map of group indices to {@link PDBGroup} objects - */ - Map getGroupMap(); - - /** - * Returns an array containing indices of all groups in the structure as used in {@link #getGroupMap()}. - * @return an array of length the number of groups (residues) in the structure, obtainable with {@link #getNumResidues()} - */ - int[] getGroupIndices(); - - /** - * Returns an array containing the indices of groups (residues) in their corresponding sequences, - * obtainable through {@link #getEntityList()} from the {@link Entity} objects. - * The indices are 0-based and specified per entity, -1 indicates the group is not present in the sequence. - * @return an array of length the number of groups (residues) in the structure, obtainable with {@link #getNumResidues()} - */ - int[] getSeqResGroupIndices(); - - /** - * Returns an array of internal chain identifiers (asym_ids in mmCIF dictionary), of length the - * number of chains (polymeric, non-polymeric and water) in the structure. - * - * The ids have a maximum of 4 chars. - * @return an array of length the number of chains in the structure, obtainable with {@link #getNumChains()} - */ - String[] getChainIds(); - - /** - * Returns an array of public chain identifiers (auth_ids in mmCIF dictionary), of length the - * number of chains (polymeric, non-polymeric and water) in the structure. - * - * @return an array of length the number of chains in the structure, obtainable with {@link #getNumChains()} - */ - String[] getChainNames(); - - /** - * Returns an array containing the number of chains (polymeric/non-polymeric/water) in each model. - * @return an array of length the number of models in the structure, obtainable with {@link #getNumModels()} - */ - int[] getChainsPerModel(); - - /** - * Returns an array containing the number of groups (residues) in each chain. - * @return an array of length the number of chains in the structure, obtainable with {@link #getNumChains()} - */ - int[] getGroupsPerChain(); - - /** - * Returns the space group of the structure. - * - * @return the space group name (e.g. "P 21 21 21") or null if the structure is not crystallographic - */ - String getSpaceGroup(); - - /** - * Returns the 6 floats that describe the unit cell. - * @return an array of size 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma - */ - float[] getUnitCell(); - - /** - * Returns a list of {@link BioAssemblyData}s corresponding to the structure. - * @return - */ - List getBioAssemblyList(); - - /** - * Returns an array of inter-group bonds represented with 2 consecutive atom - * indices in the array. - * @return an array of length 2 * number of inter-group bonds - */ - int[] getInterGroupBondIndices(); - - /** - * Returns an array of bond orders (1,2,3) of inter-group bonds with length number of inter-group bonds - * @return - */ - int[] getInterGroupBondOrders(); - - /** - * Returns the MMTF version number (from the specification). - * @return - */ - String getMmtfVersion(); - - /** - * Returns a string describing the producer of the MMTF process. - * @return - */ - String getMmtfProducer(); - - /** - * Returns an array with all {@link Entity} objects for the structure. - * The sequences can be obtained from the Entities. - * @return - */ - Entity[] getEntityList(); - - /** - * Returns the four character PDB id of the structure. - * @return the PDB identifier - */ - String getPdbId(); - - /** - * Returns the number of models in the structure. - * @return the number of models - */ - int getNumModels(); - - /** - * Returns the number of chains (for all models) in the structure. - * @return the number of chains for all models - * @see #getChainsPerModel() - */ - int getNumChains(); - - /** - * Returns the number of groups (residues) in the structure that have - * experimentally determined 3D coordinates. - * @return the number of residues in the structure, counting all models and chains - */ - int getNumResidues(); - - - /** - * Returns the number of atoms in the structure. - * @return the number of atoms in the structure, counting all models and chains - */ - int getNumAtoms(); - - - /** - * Returns the Rfree (if available) of the dataset. - * @return the Rfree value or {@value MmtfBean#UNAVAILABLE_R_VALUE} if unavailable - */ - float getRfree(); - - /** - * Returns the Resolution (if available) of the dataset. - * @return the resolution value in Angstroms or {@value MmtfBean#UNAVAILABLE_R_VALUE} if unavailable - */ - float getResolution(); - - /** - * Returns the Rwork (if available) of the dataset. - * @return the Rwork value or {@value MmtfBean#UNAVAILABLE_R_VALUE} if unavailable - */ - float getRwork(); - - - /** - * Returns the title of the structure. - * @return - */ - String getTitle(); - - /** - * Returns the experimental methods as a list of strings. Normally only one - * experimental method is available, but structures solved with hybrid methods will - * have more than one method. - * The possible experimental method values are described in - * data item _exptl.method of the mmCIF dictionary - * @return the list of experimental methods - */ - List getExperimentalMethods(); -} \ No newline at end of file diff --git a/mmtf-api/src/main/java/org/rcsb/mmtf/api/DataTransferInterface.java b/mmtf-api/src/main/java/org/rcsb/mmtf/api/DataTransferInterface.java new file mode 100644 index 0000000..a384578 --- /dev/null +++ b/mmtf-api/src/main/java/org/rcsb/mmtf/api/DataTransferInterface.java @@ -0,0 +1,138 @@ +package org.rcsb.mmtf.api; + +/** + * Interface to inflate a given MMTF data source. + * + * @author Anthony Bradley + */ +public interface DataTransferInterface { + + /** + * Used before any additions to do any required pre-processing. + * For example the user could use this to specify the amount of memory to be allocated. + * @param totalNumBonds the total number of bonds in the structure + * @param totalNumAtoms the total number of atoms found in the data. + * @param totalNumGroups the total number of groups found in the data. + * @param totalNumChains the total number of chains found in the data. + * @param totalNumModels the total number of models found in the data. + * @param structureId an identifier for the structure (e.g. PDB id). + */ + void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups, int totalNumChains, + int totalNumModels, String structureId); + + /** + * A generic function to be used at the end of all data addition to do required cleanup on the structure + */ + void finalizeStructure(); + + /** + * Sets the number of chains for a given model. + * @param modelId identifier of the model within the structure + * @param chainCount total number of chains within this model + */ + void setModelInfo(int modelId, int chainCount); + + /** + * Sets the information for a given chain. + * @param chainId chain identifier - length of one to four + * @param chainName chain name - public chain id + * @param groupCount number of groups/residues in chain + */ + void setChainInfo(String chainId, String chainName, int groupCount); + + /** + * Sets the entity level annotation for a chain(s). ChainIds is a list of integers that indicate the chains this information + * refers to. Sequence is the one letter amino acid sequence. Description and title are both free forms strings describing the entity and + * acting as a title for the entity. + * @param chainIndices the indices of the chain this refers to. + * @param sequence the full sequence of the entity + * @param description the text description of the entity + * @param type as a string (POLYMER/NON-POLYMER and WATER) + */ + void setEntityInfo(int[] chainIndices, String sequence, String description, String type); + + /** + * Sets the information for a given group / residue with atomic data. + * @param groupName 3 letter code name of this group/residue + * @param groupNumber sequence position of this group + * @param insertionCode the one letter insertion code + * @param groupType a string indicating the type of group (as found in the chemcomp dictionary. Empty string if none available. + * @param atomCount the number of atoms in the group + * @param bondCount the number of unique bonds in the group + * @param singleLetterCode the single letter code of the group + * @param sequenceIndex the index of this group in the sequence + * @param secondaryStructureType the type of secondary structure used (types are according to DSSP and number to + * type mappings are defined in the specification) + */ + void setGroupInfo(String groupName, int groupNumber, char insertionCode, + String groupType, int atomCount, int boundCount, char singleLetterCode, + int sequenceIndex, int secondaryStructureType); + + + /** + * Sets the atom level information for a given atom. + * @param atomName 1-3 long string of the unique name of the atom + * @param serialNumber a number counting atoms in a structure + * @param alternativeLocationId a character indicating the alternate + * location of the atom + * @param x the x cartesian coordinate + * @param y the y cartesian coordinate + * @param z the z cartesian coordinate + * @param occupancy the atomic occupancy + * @param temperatureFactor the B factor (temperature factor) + * @param element a 1-3 long string indicating the chemical element of the atom + * @param charge the atomic charge + */ + void setAtomInfo(String atomName, int serialNumber, char alternativeLocationId, + float x, float y, float z, float occupancy, float temperatureFactor, String element, int charge); + + /** + * Sets a single Bioassembly transformation to a structure. bioAssemblyId indicates the index of the bioassembly. + * @param bioAssemblyIndex An integer index of this bioassembly. + * @param inputChainIndices The integer indices of the chains involved in this bioassembly. + * @param inputTransform A list of doubles indicating the transform for this bioassembly. + */ + void setBioAssemblyTrans(int bioAssemblyIndex, int[] inputChainIndices, double[] inputTransform); + + /** + * Sets the space group and unit cell information. + * + * @param spaceGroup the space group name, e.g. "P 21 21 21" + * @param unitCell an array of length 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma + */ + void setXtalInfo(String spaceGroup, float[] unitCell); + + /** + * Sets an intra-group bond. + * + * @param atomIndexOne the atom index of the first partner in the bond + * @param atomIndexTwo the atom index of the second partner in the bond + * @param bondOrder the bond order + */ + void setGroupBond(int atomIndexOne, int atomIndexTwo, int bondOrder); + + /** + * Sets an inter-group bond. + * @param atomIndexOne the atom index of the first partner in the bond + * @param atomIndexTwo the atom index of the second partner in the bond + * @param bondOrder the bond order + */ + void setInterGroupBond(int atomIndexOne, int atomIndexTwo, int bondOrder); + + + /** + * Sets the header information. + * @param rFree the measured R-Free for the structure + * @param rWork the measure R-Work for the structure + * @param resolution the resolution of the structure + * @param title the title of the structure + * @param depositionDate the deposition date of the structure + * @param releaseDate the release date of the structure + * @param experimnetalMethods the list of experimental methods in the structure + */ + void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate, + String releaseDate, String[] experimnetalMethods); + + + +} diff --git a/mmtf-api/src/main/java/org/rcsb/mmtf/api/DecodedDataInterface.java b/mmtf-api/src/main/java/org/rcsb/mmtf/api/DecodedDataInterface.java new file mode 100644 index 0000000..dcd5d5d --- /dev/null +++ b/mmtf-api/src/main/java/org/rcsb/mmtf/api/DecodedDataInterface.java @@ -0,0 +1,401 @@ +package org.rcsb.mmtf.api; + +/** + * An interface describing the data API. + * + *

+ * The structural data is accessible through this interface via + * a flat structure, instead of the usual hierarchical + * data encountered in PDB structures: structure -> model -> chain -> group -> atom. + * Going back to a hierarchical view of the structure can be achieved by + * using the {@link #getChainsPerModel()}, {@link #getGroupsPerChain()} and + * {@link #getGroupMap()} methods so that the flat arrays can be reconstructed into + * a hierarchy. + *

+ * + *

+ * Please refer to the full MMTF specification available at + * http://mmtf.rcsb.org. + * Further reference can be found in the mmCIF dictionary. + *

+ * + * @author Anthony Bradley + * @author Jose Duarte + */ +public interface DecodedDataInterface { + + /** + * Returns an array containing the X coordinates of the atoms in Angstroms. + * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} + */ + float[] getxCoords(); + + /** + * Returns an array containing the Y coordinates of the atoms in Angstroms. + * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} + */ + float[] getyCoords(); + + /** + * Returns an array containing the Z coordinates of the atoms in Angstroms. + * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} + */ + float[] getzCoords(); + + /** + * Returns an array containing the B-factors (temperature factors) of the atoms in Angstroms^2. + * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} + */ + float[] getbFactors(); + + /** + * Returns an array containing the occupancy values of the atoms. + * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} + */ + float[] getOccupancies(); + + /** + * Returns an array of atom serial ids (_atom_site.id in mmCIF dictionary). + * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} + */ + int[] getAtomIds(); + + /** + * Returns an array of location ids of the atoms. + * '\0' specifies a lack of alt id. + * @return an array of length the number of atoms in the structure, obtainable with {@link #getNumAtoms()} + */ + char[] getAltLocIds(); + + /** + * Returns an array containing the insertion codes (pdbx_PDB_ins_code in mmCIF dictionary) for each residue (group). + * '\0' specifies a lack of insertion code. + * @return an array with insertion codes, of size {@link #getNumGroups()} + * @see #getGroupIds() + */ + char[] getInsCodes(); + + /** + * Returns an array containing residue numbers (auth_seq_id in mmCIF dictionary) for each residue (group). + * @return an array with with residue numbers, of size {@link #getNumGroups()} + * @see #getInsCodes() + */ + int[] getGroupIds(); + + /** + * Returns the group name for the group specified in {@link #getGroupTypeIndices()}. + * to link groups to the 3 letter group name, e.g. HIS. + * @param groupInd The index of the group specified in {@link #getGroupTypeIndices()}. + * @return a 3 letter string specifiying the group name. + */ + String getGroupName(int groupInd); + + /** + * Returns the number of atoms in the group specified in {@link #getGroupTypeIndices()}. + * @param groupInd The index of the group specified in {@link #getGroupTypeIndices()}. + * @return The number of atoms in the group + */ + int getNumAtomsInGroup(int groupInd); + + + /** + * Returns the atom names (e.g. CB) for the group specified in {@link #getGroupTypeIndices()}. + * Atom names are unique for each unique atom in a group. + * @param groupInd The index of the group specified in {@link #getGroupTypeIndices()}. + * @return A list of strings for the atom names. + * */ + String[] getGroupAtomNames(int groupInd); + + /** + * Returns the IUPAC element names (e.g. Ca is calcium) for the group specified in {@link #getGroupTypeIndices()}. + * @param groupInd the index of the group specified in {@link #getGroupTypeIndices()}. + * @return an array of strings for the element information. + * */ + String[] getGroupElementNames(int groupInd); + + /** + * Returns the bond orders for the group specified in {@link #getGroupTypeIndices()}. + * A list of integers indicating the bond orders + * @param groupInd the index of the group specified in {@link #getGroupTypeIndices()}. + * @return an array of integers (1,2 or 3) indicating the bond orders. + * */ + int[] getGroupBondOrders(int groupInd); + + /** + * Returns the zero-based bond indices (in pairs) for the group specified in {@link #getGroupTypeIndices()}. + * (e.g. 0,1 means there is bond between atom 0 and 1). + * @param groupInd the index of the group specified in {@link #getGroupTypeIndices()}. + * @return an array of integers specifying the bond indices (within the group). Indices are zero indexed. + * */ + int[] getGroupBondIndices(int groupInd); + + /** + * Returns the atom charges for the group specified in {@link #getGroupTypeIndices()}. + * @param groupInd the index of the group specified in {@link #getGroupTypeIndices()}. + * @return an array of integers indicating the atomic charge for each atom in the group. + */ + int[] getGroupAtomCharges(int groupInd); + + /** + * Returns the single letter amino acid code or nucleotide code for the + * group specified in {@link #getGroupTypeIndices()}. + * @param groupInd the index of the group specified in {@link #getGroupTypeIndices()}. + * @return the single letter amino acid or nucleotide, 'X' if non-standard amino acid or nucleotide + */ + char getGroupSingleLetterCode(int groupInd); + + /** + * Returns the chemical component type for the group specified in {@link #getGroupTypeIndices()}. + * @param groupInd The index of the group specified in {@link #getGroupTypeIndices()}. + * @return a string (taken from the chemical component dictionary) indicating + * the type of the group. Corresponds to + * http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx.dic/Items/_chem_comp.type.html + */ + String getGroupChemCompType(int groupInd); + + /** + * Returns an array containing indices to be used to obtain group level information, + * e.g. through {@link #getGroupAtomCharges(int)}. + * @return an array of length the number of groups (residues) in the structure, obtainable with {@link #getNumGroups()} + */ + int[] getGroupTypeIndices(); + + /** + * Returns an array containing the indices of groups (residues) in their corresponding sequences, + * obtainable through {@link #getEntityList()} from the {@link Entity} objects. + * The indices are 0-based and specified per entity, -1 indicates the group is not present in the sequence. + * @return an array of length the number of groups (residues) in the structure, obtainable with {@link #getNumGroups()} + */ + int[] getGroupSequenceIndices(); + + /** + * Returns an array of internal chain identifiers (asym_ids in mmCIF dictionary), of length the + * number of chains (polymeric, non-polymeric and water) in the structure. + * @return an array of length the number of chains in the structure, obtainable with {@link #getNumChains()} + * @see #getChainNames() + */ + String[] getChainIds(); + + /** + * Returns an array of public chain identifiers (auth_ids in mmCIF dictionary), of length the + * number of chains (polymeric, non-polymeric and water) in the structure. + * @return an array of length the number of chains in the structure, obtainable with {@link #getNumChains()} + * @see #getChainIds() + */ + String[] getChainNames(); + + /** + * Returns an array containing the number of chains (polymeric/non-polymeric/water) in each model. + * @return an array of length the number of models in the structure, obtainable with {@link #getNumModels()} + */ + int[] getChainsPerModel(); + + /** + * Returns an array containing the number of groups (residues) in each chain. + * @return an array of length the number of chains in the structure, obtainable with {@link #getNumChains()} + */ + int[] getGroupsPerChain(); + + /** + * Returns the space group of the structure. + * + * @return the space group name (e.g. "P 21 21 21") or null if the structure is not crystallographic + */ + String getSpaceGroup(); + + /** + * Returns the 6 floats that describe the unit cell. + * @return an array of size 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma + */ + float[] getUnitCell(); + + /** + * Returns the number of bioassemblies in this structure. + * @return the number of bioassemblies. + */ + int getNumBioassemblies(); + + /** + * Returns the number of transformations in a given bioassembly. + * @param bioassemblyIndex an integer specifying the bioassembly index (zero indexed). + * @return an integer specifying of transformations in a given bioassembly. + */ + int getNumTransInBioassembly(int bioassemblyIndex); + + /** + * Returns the list of chain indices for the given transformation for the given bioassembly. + * @param bioassemblyIndex an integer specifying the bioassembly index (zero indexed). + * @param transformationIndex an integer specifying the index (zero indexed) for the desired transformation. + * @return a list of indices showing the chains involved in this transformation. + */ + int[] getChainIndexListForTransform(int bioassemblyIndex, int transformationIndex); + + + /** + * Returns a 4x4 transformation matrix for the given transformation for the given bioassembly. + * It is row-packed as per the convention of vecmath. (The first four elements are in the first row of the + * overall matrix). + * @param bioassemblyIndex an integer specifying the bioassembly index (zero indexed). + * @param transformationIndex an integer specifying the index for the desired transformation (zero indexed). + * @return the transformation matrix for this transformation. + */ + double[] getMatrixForTransform(int bioassemblyIndex, int transformationIndex); + + + /** + * Returns the zero-based bond indices (in pairs) for the structure. + * (e.g. 0,1 means there is bond between atom 0 and 1). + * @return an array of integers specifying the bond indices (within the structure). Indices are zero-based. + */ + int[] getInterGroupBondIndices(); + + /** + * Returns an array of bond orders (1,2,3) of inter-group bonds with length number of inter-group bonds + * @return the bond orders for bonds within a group + */ + int[] getInterGroupBondOrders(); + + /** + * Returns the MMTF version number (from the specification). + * @return the version + */ + String getMmtfVersion(); + + /** + * Returns a string describing the producer of the MMTF file. + * e.g. "RCSB-PDB Generator---version: 6b8635f8d319beea9cd7cc7f5dd2649578ac01a0" + * @return a string describing the producer + */ + String getMmtfProducer(); + + /** + * Returns the number of entities (as defined in mmCIF dictionary) in the structure + * @return the number of entities in the structure + */ + int getNumEntities(); + + /** + * Returns the entity description (as defined in mmCIF dictionary) + * for the entity specified by the index. + * @param entityInd the index of the specified entity. + * @return the description of the entity + */ + String getEntityDescription(int entityInd); + + /** + * Returns the entity type (polymer, non-polymer, water) for the entity specified by the index. + * @param entityInd the index of the specified entity. + * @return the entity type (polymer, non-polymer, water) + */ + String getEntityType(int entityInd); + + /** + * Returns the chain indices for the entity specified by the index. + * @param entityInd the index of the specified entity. + * @return the chain index list - referencing the entity to the chains. + */ + int[] getEntityChainIndexList(int entityInd); + + /** + * Returns the sequence for the entity specified by the index. + * @param entityInd the index of the specified entity. + * @return the one letter sequence for this entity. Empty string if no sequence is applicable. + */ + String getEntitySequence(int entityInd); + + /** + * Returns the identifier of the structure. + * For instance the 4-letter PDB id + * @return the identifier + */ + String getStructureId(); + + /** + * Returns the number of models in the structure. + * @return the number of models + */ + int getNumModels(); + + /** + * Returns the total number of bonds in the structure + * @resturn the number of bonds + */ + int getNumBonds(); + + /** + * Returns the number of chains (for all models) in the structure. + * @return the number of chains for all models + * @see #getChainsPerModel() + */ + int getNumChains(); + + /** + * Returns the number of groups (residues) in the structure that have + * experimentally determined 3D coordinates. + * @return the number of residues in the structure, for all models and chains + */ + int getNumGroups(); + + + /** + * Returns the number of atoms in the structure. + * @return the number of atoms in the structure, for all models and chains + */ + int getNumAtoms(); + + + /** + * Returns the Rfree of the dataset. + * @return the Rfree value + */ + float getRfree(); + + /** + * Returns the Rwork of the dataset. + * @return the Rwork value + */ + float getRwork(); + + /** + * Returns the resolution of the dataset. + * @return the resolution value in Angstroms + */ + float getResolution(); + + /** + * Returns the title of the structure. + * @return the title of the structure. + */ + String getTitle(); + + /** + * Returns the experimental methods as an array of strings. Normally only one + * experimental method is available, but structures solved with hybrid methods will + * have more than one method. + * The possible experimental method values are described in + * data item _exptl.method of the mmCIF dictionary + * @return the list of experimental methods + */ + String[] getExperimentalMethods(); + + /** + * Returns the deposition date of the structure as a string + * in ISO time standard format. https://www.cl.cam.ac.uk/~mgk25/iso-time.html + * @return the deposition date of the structure. + */ + String getDepositionDate(); + + /** + * Returns the release date of the structure as a string + * in ISO time standard format. https://www.cl.cam.ac.uk/~mgk25/iso-time.html + * @return the release date of the structure. + */ + String getReleaseDate(); + + /** + * The secondary structure information for the structure as a list of integers + * @return the array of secondary structure informations + */ + int[] getSecStructList(); + +} \ No newline at end of file diff --git a/mmtf-api/src/main/java/org/rcsb/mmtf/api/StructureDecoderInterface.java b/mmtf-api/src/main/java/org/rcsb/mmtf/api/StructureDecoderInterface.java deleted file mode 100644 index 58f7ead..0000000 --- a/mmtf-api/src/main/java/org/rcsb/mmtf/api/StructureDecoderInterface.java +++ /dev/null @@ -1,135 +0,0 @@ -package org.rcsb.mmtf.api; - -import java.util.List; - -import org.rcsb.mmtf.dataholders.BioAssemblyData; - -/** - * Interface to inflate a given MMTF data source. - * - * @author Anthony Bradley - */ -public interface StructureDecoderInterface { - - /** - * Sets the number of models. - * @param modelCount Number of models - */ - void setModelCount(int modelCount); - - /** - * Sets the number of chains for a given model. - * @param modelId identifier of the model within the structure - * @param chainCount total number of chains within this model - */ - void setModelInfo(int modelId, int chainCount); - - /** - * Sets the information for a given chain. - * @param chainId chain identifier - length of one to four - * @param groupCount number of groups/residues in chain - */ - void setChainInfo(String chainId, int groupCount); - - /** - * Sets the entity level annotation for a chain(s). ChainIds is a list of strings that indicate the list of chains this information - * refers to. Sequence is the one letter amino acid sequence. Description and title are both free forms strings describing the entity and - * acting as a title for the entity. - * @param chainIds - * @param sequence - * @param description - * @param title - */ - void setEntityInfo(String[] chainIds, String sequence, String description, String title); - - /** - * Sets the information for a given group / residue with atomic data. - * @param groupName 3 letter code name of this group/residue - * @param groupNumber sequence position of this group - * @param insertionCode the one letter insertion code - * @param polymerType a string indicating the type of group (as found in the chemcomp dictionary. Empty string if none available. - * @param atomCount the number of atoms in the group - */ - void setGroupInfo(String groupName, int groupNumber, char insertionCode, - String polymerType, int atomCount); - - - /** - * Sets the atom level information for a given atom. - * @param atomName 1-3 long string of the unique name of the atom - * @param serialNumber a number counting atoms in a structure - * @param alternativeLocationId a character indicating the alternate - * location of the atom - * @param x the x cartesian coordinate - * @param y the y cartesian coordinate - * @param z the z cartesian coordinate - * @param occupancy the atomic occupancy - * @param temperatureFactor the B factor (temperature factor) - * @param element a 1-3 long string indicating the chemical element of the atom - * @param charge the atomic charge - */ - void setAtomInfo(String atomName, int serialNumber, char alternativeLocationId, - float x, float y, float z, float occupancy, float temperatureFactor, String element, int charge); - - - /** - * Sets the Bioassembly information for the structure. - * - * @param inputBioassemblies - */ - void setBioAssemblyList(List inputBioAssemblies); - - /** - * Sets the space group and unit cell information. - * - * @param spaceGroup the space group name, e.g. "P 21 21 21" - * @param unitCell an array of length 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma - */ - void setXtalInfo(String spaceGroup, float[] unitCell); - - /** - * Sets an intra-group bond. - * - * @param thisBondIndOne the atom index of the first partner in the bond - * @param thisBondIndTwo the atom index of the second partner in the bond - * @param thisBondOrder the bond order - */ - void setGroupBond(int thisBondIndOne, int thisBondIndTwo, int thisBondOrder); - - /** - * Sets an inter-group bond. - * - * @param thisBondIndOne the atom index of the first partner in the bond - * @param thisBondIndTwo the atom index of the second partner in the bond - * @param thisBondOrder the bond order - */ - void setInterGroupBond(int thisBondIndOne, int thisBondIndTwo, int thisBondOrder); - - - /** - * Sets the header information. - * @param rFree - * @param rWork - * @param resolution - * @param title - * @param experimnetalMethods - */ - void setHeaderInfo(float rFree, float rWork, float resolution, String title, List experimnetalMethods); - - /** - * Used before any additions to do any required pre-processing. - * For example the user could use this to specify the amount of memory to be allocated. - * @param totalNumAtoms The total number of atoms found in the data. - * @param totalNumGroups The total number of groups found in the data. - * @param totalNumChains The total number of chains found in the data. - * @param totalNumModels The total number of models found in the data. - * @param modelCode An identifier for this model (e.g. PDB id). - */ - void prepareStructure(int totalNumAtoms, int totalNumGroups, int totalNumChains, int totalNumModels, String modelCode); - - /** - * A generic function to be used at the end of all data addition to do required cleanup on the structure - */ - void cleanUpStructure(); - -} diff --git a/mmtf-common/pom.xml b/mmtf-common/pom.xml index 3094fc7..3591fb8 100644 --- a/mmtf-common/pom.xml +++ b/mmtf-common/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 org.rcsb @@ -24,23 +25,250 @@ junit junit - 4.11 - test + + + uk.co.jemos.podam + podam + + + org.unitils + unitils-core + + + commons-beanutils + commons-beanutils - - - - maven-compiler-plugin - 3.3 - - 1.8 - 1.8 - - - - + + + org.apache.maven.plugins + maven-jar-plugin + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + true + + + + pl.project13.maven + git-commit-id-plugin + 2.2.0 + + + + revision + + + + + + + ${project.basedir}/.git + + + git + + + dd.MM.yyyy '@' HH:mm:ss z + + + + ${user.timezone} + + + false + + + + + + true + + + ${project.build.outputDirectory}/git.properties + + + properties + + + true + + + + false + + + + true + + + + true + + + + false + + + + false + + + + + + + + + + + + + + + + false + + + + 7 + + + + + flat + + + + + + + false + + + false + + 7 + + + -dirty + + + * + + + false + + + + + true + + + ${project.build.outputDirectory}/git.properties + + + + + + + diff --git a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/BioAssemblyData.java b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/BioAssemblyData.java index 52dac78..651d8dd 100644 --- a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/BioAssemblyData.java +++ b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/BioAssemblyData.java @@ -21,7 +21,7 @@ public class BioAssemblyData implements Serializable { /** * The specific transformations of this bioassembly. */ - private List transforms; + private List transformList; /** @@ -29,8 +29,8 @@ public class BioAssemblyData implements Serializable { * * @return the transforms */ - public final List getTransforms() { - return transforms; + public final List getTransformList() { + return transformList; } /** @@ -38,9 +38,9 @@ public final List getTransforms() { * * @param inputTransforms the new transforms */ - public final void setTransforms(final + public final void setTransformList(final List inputTransforms) { - this.transforms = inputTransforms; + this.transformList = inputTransforms; } } diff --git a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/BioAssemblyTrans.java b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/BioAssemblyTrans.java index 54f451c..3017a4a 100644 --- a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/BioAssemblyTrans.java +++ b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/BioAssemblyTrans.java @@ -1,11 +1,6 @@ package org.rcsb.mmtf.dataholders; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; - /** * The transformation needed for generation of biological assemblies * from the contents of a PDB/mmCIF file. It contains both the actual @@ -14,18 +9,14 @@ * * @author Anthony Bradley */ -public class BioAssemblyTrans implements Serializable { - - - /** Serial id for this version of the format. */ - private static final long serialVersionUID = -8109941242652091495L; +public class BioAssemblyTrans { - /** The chain id. */ - private List chainIdList = new ArrayList(); + /** The indices of the chains this bioassembly references. */ + private int[] chainIndexList; /** The 4x4 matrix transformation specifying a rotation and a translation. */ - private double[] transformation; + private double[] matrix; /** @@ -35,8 +26,8 @@ public class BioAssemblyTrans implements Serializable { * * @return the transformation */ - public final double[] getTransformation() { - return transformation; + public double[] getMatrix() { + return matrix; } /** @@ -46,26 +37,26 @@ public final double[] getTransformation() { * * @param inputTransformation the new transformation */ - public final void setTransformation(final double[] inputTransformation) { - this.transformation = inputTransformation; + public void setMatrix(double[] transformation) { + this.matrix = transformation; } /** - * Gets the chain id list. + * Gets the indices of the chains this bioassembly refers to. * - * @return the chain id list + * @return a list of integers indicating the indices (zero indexed) of the chains this bioassembly refers to. */ - public final List getChainIdList() { - return chainIdList; + public int[] getChainIndexList() { + return chainIndexList; } /** * Sets the chain id. * - * @param inputChainId the new chain id + * @param a list of integers indicating the indices (zero indexed) of the chains this bioassembly refers to. */ - public final void setChainIdList(final List inputChainId) { - this.chainIdList = inputChainId; + public void setChainIndexList(int[] inputChainId) { + this.chainIndexList = inputChainId; } } diff --git a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/CodeHolders.java b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/CodeHolders.java deleted file mode 100644 index 8b02f6b..0000000 --- a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/CodeHolders.java +++ /dev/null @@ -1,85 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import java.io.Serializable; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -/** - * The encoding of secondary structure types. - * @author Anthony Bradley - * - */ -public class CodeHolders implements Serializable { - - /** Serial id for this version of the format. */ - private static final long serialVersionUID = 8214511379505123391L; - - /** The Constant PI_HELIX_IND. */ - private static final int PI_HELIX_IND = 0; - - /** The Constant BEND_IND. */ - private static final int BEND_IND = 1; - - /** The Constant ALPHA_HELIX_IND. */ - private static final int ALPHA_HELIX_IND = 2; - - /** The Constant EXTENDED_IND. */ - private static final int EXTENDED_IND = 3; - - /** The Constant HELIX_3_10_IND. */ - private static final int HELIX_3_10_IND = 4; - - /** The Constant BRIDGE_IND. */ - private static final int BRIDGE_IND = 5; - - /** The Constant TURN_IND. */ - private static final int TURN_IND = 6; - - /** The Constant COIL_IND. */ - private static final int COIL_IND = 7; - - /** The Constant NULL_ENTRY_IND. */ - private static final int NULL_ENTRY_IND = -1; - - - /** - * A map to store secondary structure to integer mappings. - */ - private Map dsspMap; - - /** - * Constructor initialises the map. - */ - public CodeHolders() { - Map aMap = new HashMap(); - aMap.put("pi Helix", PI_HELIX_IND); - aMap.put("Bend", BEND_IND); - aMap.put("alpha Helix", ALPHA_HELIX_IND); - aMap.put("Extended", EXTENDED_IND); - aMap.put("3-10 Helix", HELIX_3_10_IND); - aMap.put("Bridge", BRIDGE_IND); - aMap.put("Turn", TURN_IND); - aMap.put("Coil", COIL_IND); - aMap.put("NA", NULL_ENTRY_IND); - setDsspMap(Collections.unmodifiableMap(aMap)); - } - - /** - * Gets the dssp map. - * - * @return the dsspMap - */ - public final Map getDsspMap() { - return dsspMap; - } - - /** - * Sets the dssp map. - * - * @param inputDsspMap the dsspMap to set - */ - public final void setDsspMap(final Map inputDsspMap) { - this.dsspMap = inputDsspMap; - } -} diff --git a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/DsspType.java b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/DsspType.java new file mode 100644 index 0000000..bc5e50d --- /dev/null +++ b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/DsspType.java @@ -0,0 +1,121 @@ +package org.rcsb.mmtf.dataholders; + + +/** + * The encoding of secondary structure types. + * Based on DSSP: Kabsch W, Sander C (1983). + * "Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features". + * Biopolymers 22 (12): 2577–637. doi:10.1002/bip.360221211. PMID 6667333. + * @author Anthony Bradley + */ +public enum DsspType { + + PI_HELIX("pi Helix",0), + BEND("Bend",1), + ALPHA_HELIX("alpha Helix",2), + EXTENDED("Extended",3), + HELIX_3_10("3-10 Helix",4), + BRIDGE("Bridge",5), + TURN("Turn",6), + COIL("Coil",7), + NULL_ENTRY("NA",-1); + + /** The String type of the DSSP*/ + private String dsspType; + + /** + * The index used for encoding secondary structure + * information. One for each type + * */ + private int dsspIndex; + + /** + * Constructor (private) of DSSP type + * @param inputDsspType The string type of the DSSP input. + * @param inputDsspIndex The integer index of hte DSSP input. + */ + private DsspType(String inputDsspType, int inputDsspIndex) { + this.setDsspType(inputDsspType); + this.setDsspIndex(inputDsspIndex); + + } + + /** + * Sets the DSSP index given a particular input value. + * @param inputDsspIndex The input index (as specified above). + */ + private void setDsspIndex(int inputDsspIndex) { + this.dsspIndex = inputDsspIndex; + } + + /** + * Returns the type of the DSSP as a String + * @return String representation of the DSSP type. + */ + public String getDsspType() { + return dsspType; + } + + + /** Returns the index of the Dssp as an integer + * @return String representation of the Dssp type. + */ + public int getDsspIndex() { + return dsspIndex; + } + + /** + * Sets the DSSP type using an input String. + * @param inputDsspType The input string used to define the DSSP type. + */ + private void setDsspType(String inputDsspType) { + this.dsspType = inputDsspType; + } + + /** + * Creates a new DsspType from a String value. + * Returns DsspType.NULL_ENTRY if dsspType is "NA" or not one of the supported + * standard types. + * @param dsspType String value , should be one of "pi Helix","Bend","alpha Helix","Extended", + * "3-10 Helix","Bridge","Turn","Coil","NA" + * @return an DsspType object + */ + public static DsspType dsspTypeFromString(String dsspType) + { + + if (dsspType == null) + return DsspType.NULL_ENTRY; + + for(DsspType et : DsspType.values()) + { + if(dsspType.equals(et.dsspType)) + { + return et; + } + } + // Return a null entry. + return DsspType.NULL_ENTRY; + } + + /** + * Creates a new DsspType from an int val + * Returns DsspType.NULL_ENTRY if dsspType is -1 or not one of the supported + * standard types. + * @param dsspType int value , should be an integer value from -1 to 7 + * @return an DsspType object + */ + public static DsspType dsspTypeFromInt(int dsspType) + { + + for(DsspType et : DsspType.values()) + { + if(dsspType==et.dsspIndex) + { + return et; + } + } + // Return a null entry. + return DsspType.NULL_ENTRY; + } + +} \ No newline at end of file diff --git a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/Entity.java b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/Entity.java index 74600e8..d15f726 100644 --- a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/Entity.java +++ b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/Entity.java @@ -24,7 +24,6 @@ public class Entity implements Serializable { private String sequence; - public String getDescription() { return description; } diff --git a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/MmtfBean.java b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/MmtfBean.java index 312d388..be01cc3 100644 --- a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/MmtfBean.java +++ b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/MmtfBean.java @@ -2,7 +2,6 @@ import java.io.Serializable; import java.util.List; -import java.util.Map; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @@ -13,17 +12,20 @@ */ @JsonIgnoreProperties(ignoreUnknown = true) public class MmtfBean implements Serializable { - + + /** The number to divide coordinate int values by. */ public static final float COORD_DIVIDER = 1000.0f; /** The number to divide occupancy and bfactor int values by. */ public static final float OCCUPANCY_BFACTOR_DIVIDER = 100.0f; - /** - * The default value for Rfree, Rwork and resolution when not available or not applicable - */ - public static final float UNAVAILABLE_R_VALUE = -1.0f; + /** The default value for Rfree and Rwork */ + public static final float UNAVAILABLE_R_VALUE = 1.0f; + /** The default value for resolution when not available or not applicable */ + public static final float UNAVAILABLE_RESOLUTION_VALUE = 99.0f; + /** The default value for a missing or null string field */ + public static final char UNAVAILABLE_CHAR_VALUE = '\0'; + - /** Serial id for this version of the format. */ private static final long serialVersionUID = 384559979573830324L; @@ -36,8 +38,8 @@ public class MmtfBean implements Serializable { /** The number of bonds. */ private int numBonds; - /** The pdb id. */ - private String pdbId; + /** The structure Id. Most commonly this will be the four character PDB id. */ + private String structureId; /** The title. */ private String title; @@ -72,9 +74,8 @@ public class MmtfBean implements Serializable { /** The bond order list. */ private byte[] bondOrderList; - /** The group map. */ - // Map of all the data - private Map groupMap; + /** The list of different PDBGroups in the structure. */ + private PDBGroup[] groupList; /** The x coord big. 4 byte integers in pairs. */ private byte[] xCoordBig; @@ -104,13 +105,13 @@ public class MmtfBean implements Serializable { private byte[] secStructList; /** The occupancy list. */ - private byte[] occList; + private byte[] occupancyList; - /** The alt label list. */ - private List altLabelList; + /** The list of alternate location ids. */ + private byte[] altLocList; /** The insertion code list. */ - private List insCodeList; + private byte[] insCodeList; /** The group type list. */ private byte[] groupTypeList; @@ -122,24 +123,29 @@ public class MmtfBean implements Serializable { private byte[] atomIdList; /** The SeqRes group ids. */ - private byte[] seqResIdList; + private byte[] sequenceIndexList; /** The experimental method(s). */ - private List experimentalMethods; + private String[] experimentalMethods; - /** The resolution in Angstrom. -1.0 if not applicable*/ + /** The resolution in Angstrom. Null if not applicable*/ private float resolution; - /** The rfree. -1.0 if not applicable */ + /** The rfree. Null if not applicable */ private float rFree; - /** The r-work. -1.0 if not applicable */ + /** The r-work. Null if not applicable */ private float rWork; - + /** The list of entities in this structure. */ private Entity[] entityList; - - + + /** The deposition date of the structure in ISO time standard format. https://www.cl.cam.ac.uk/~mgk25/iso-time.html */ + private String depositionDate; + + /** The release data of the structure in ISO time standard format. https://www.cl.cam.ac.uk/~mgk25/iso-time.html */ + private String releaseDate; + /** Constructor to set the default values for floats */ public MmtfBean() { @@ -150,11 +156,11 @@ public MmtfBean() { mmtfProducer = "NA"; /** The resolution in Angstrom. -1.0 if not applicable*/ - resolution = UNAVAILABLE_R_VALUE; + resolution = UNAVAILABLE_RESOLUTION_VALUE; - /** The rfree. -1.0 if not applicable */ + /** The rfree. 1.0 if not applicable */ rFree = UNAVAILABLE_R_VALUE; - + rWork = UNAVAILABLE_R_VALUE; /** The number of atoms. Default is -1 indicates error */ @@ -168,7 +174,10 @@ public MmtfBean() { /** * @return the resolution */ - public float getResolution() { + public Float getResolution() { + if (resolution==UNAVAILABLE_RESOLUTION_VALUE) { + return null; + } return resolution; } @@ -176,13 +185,21 @@ public float getResolution() { * @param resolution the resolution to set */ public void setResolution(float resolution) { - this.resolution = resolution; + if (resolution==0.0) { + this.resolution = UNAVAILABLE_RESOLUTION_VALUE; + } + else{ + this.resolution = resolution; + } } /** * @return the rFree */ - public float getrFree() { + public Float getrFree() { + if (rFree==UNAVAILABLE_R_VALUE) { + return null; + } return rFree; } @@ -190,13 +207,21 @@ public float getrFree() { * @param rFree the rFree to set */ public void setrFree(float rFree) { - this.rFree = rFree; + if (rFree==0.0){ + this.rFree = UNAVAILABLE_R_VALUE; + } + else{ + this.rFree = rFree; + } } /** * @return the rWork */ - public float getrWork() { + public Float getrWork() { + if (rWork==UNAVAILABLE_R_VALUE) { + return null; + } return rWork; } @@ -204,11 +229,16 @@ public float getrWork() { * @param rWork the rWork to set */ public void setrWork(float rWork) { - this.rWork = rWork; + if (rWork==0.0){ + this.rWork = UNAVAILABLE_R_VALUE; + } + else{ + this.rWork = rWork; + } } - /** + /** * Gets the space group. * * @return the space group @@ -407,21 +437,21 @@ public final void setbFactorSmall(final byte[] inputSmallBFactor) { } /** - * Gets the alt label list. + * Gets the alternate location list. * - * @return the alt label list + * @return the alternate location list */ - public final List getAltLabelList() { - return altLabelList; + public final byte[] getAltLocList() { + return altLocList; } /** * Sets the alt label list. * - * @param inputAltIdList the new alt id label list + * @param inputAltLocList the new alternation location label list */ - public final void setAltLabelList(final List inputAltIdList) { - this.altLabelList = inputAltIdList; + public final void setAltLocList(final byte[] inputAltLocList) { + this.altLocList = inputAltLocList; } /** @@ -479,21 +509,21 @@ public final void setNumAtoms(final int inputNumAtoms) { } /** - * Gets the occ list. + * Gets the occupancy list - an encoded per atom list of occupancy values. * - * @return the occ list + * @return the occupancy list - an encoded per atom list of occupancy values. */ - public final byte[] getOccList() { - return occList; + public final byte[] getOccupancyList() { + return occupancyList; } /** - * Sets the occ list. + * Sets the occupancy list - an encoded per atom list of occupancy values. * - * @param occupancy the new occ list + * @param inputOccupancyList the occupancy list - an encoded per atom list of occupancy values. */ - public final void setOccList(final byte[] occupancy) { - this.occList = occupancy; + public final void setOccupancyList(final byte[] inputOccupancyList) { + this.occupancyList = inputOccupancyList; } /** @@ -501,7 +531,7 @@ public final void setOccList(final byte[] occupancy) { * * @return the insertion code list */ - public final List getInsCodeList() { + public final byte[] getInsCodeList() { return insCodeList; } @@ -510,7 +540,7 @@ public final List getInsCodeList() { * * @param inputInsertionCodeList the new insertion code list */ - public final void setInsCodeList(final List inputInsertionCodeList) { + public final void setInsCodeList(final byte[] inputInsertionCodeList) { this.insCodeList = inputInsertionCodeList; } @@ -519,8 +549,8 @@ public final void setInsCodeList(final List inputInsertionCodeList) { * * @return the group map */ - public final Map getGroupMap() { - return groupMap; + public final PDBGroup[] getGroupList() { + return groupList; } /** @@ -528,8 +558,8 @@ public final Map getGroupMap() { * * @param inputGroupMap the group map */ - public final void setGroupMap(final Map inputGroupMap) { - this.groupMap = inputGroupMap; + public void setGroupList(PDBGroup[] inputGroupMap) { + this.groupList = inputGroupMap; } /** @@ -537,7 +567,7 @@ public final void setGroupMap(final Map inputGroupMap) { * * @return the sec struct list */ - public final byte[] getSecStructList() { + public byte[] getSecStructList() { return secStructList; } @@ -546,7 +576,7 @@ public final byte[] getSecStructList() { * * @param secStruct the new sec struct list */ - public final void setSecStructList(final byte[] secStruct) { + public void setSecStructList(byte[] secStruct) { this.secStructList = secStruct; } @@ -555,7 +585,7 @@ public final void setSecStructList(final byte[] secStruct) { * * @return the group type list */ - public final byte[] getGroupTypeList() { + public byte[] getGroupTypeList() { return groupTypeList; } @@ -564,7 +594,7 @@ public final byte[] getGroupTypeList() { * * @param resOrder the new group type list */ - public final void setGroupTypeList(final byte[] resOrder) { + public void setGroupTypeList(byte[] resOrder) { this.groupTypeList = resOrder; } @@ -573,7 +603,7 @@ public final void setGroupTypeList(final byte[] resOrder) { * * @return the atom id list */ - public final byte[] getAtomIdList() { + public byte[] getAtomIdList() { return atomIdList; } @@ -582,7 +612,7 @@ public final byte[] getAtomIdList() { * * @param inputAtomIdList the new atom id list */ - public final void setAtomIdList(final byte[] inputAtomIdList) { + public void setAtomIdList(byte[] inputAtomIdList) { this.atomIdList = inputAtomIdList; } @@ -591,7 +621,7 @@ public final void setAtomIdList(final byte[] inputAtomIdList) { * * @return the title */ - public final String getTitle() { + public String getTitle() { return title; } @@ -600,26 +630,26 @@ public final String getTitle() { * * @param inputTitle the new title */ - public final void setTitle(final String inputTitle) { + public void setTitle(String inputTitle) { this.title = inputTitle; } /** - * Gets the pdb id. + * Gets the structure id. Should be used as a unique identifier of this structure. * - * @return the pdb id + * @return the the structure id a unique String id of this structure. */ - public final String getPdbId() { - return pdbId; + public String getStructureId() { + return structureId; } /** - * Sets the pdb id. + * Sets the structure id. Should be used as a unique identifier of this structure. * - * @param pdbCode the new pdb id + * @param inputId a unique String id of this structure. */ - public final void setPdbId(final String pdbCode) { - this.pdbId = pdbCode; + public void setStructureId(String inputId) { + this.structureId = inputId; } /** @@ -627,7 +657,7 @@ public final void setPdbId(final String pdbCode) { * * @return the mmtf producer */ - public final String getMmtfProducer() { + public String getMmtfProducer() { return mmtfProducer; } @@ -636,7 +666,7 @@ public final String getMmtfProducer() { * * @param inputMmtfProducer the new mmtf producer */ - public final void setMmtfProducer(final String inputMmtfProducer) { + public void setMmtfProducer(String inputMmtfProducer) { this.mmtfProducer = inputMmtfProducer; } @@ -645,7 +675,7 @@ public final void setMmtfProducer(final String inputMmtfProducer) { * * @return the mmtf version */ - public final String getMmtfVersion() { + public String getMmtfVersion() { return mmtfVersion; } @@ -654,7 +684,7 @@ public final String getMmtfVersion() { * * @param inputMmtfVersion the new mmtf version */ - public final void setMmtfVersion(final String inputMmtfVersion) { + public void setMmtfVersion(String inputMmtfVersion) { this.mmtfVersion = inputMmtfVersion; } @@ -663,7 +693,7 @@ public final void setMmtfVersion(final String inputMmtfVersion) { * * @return the num bonds */ - public final int getNumBonds() { + public int getNumBonds() { return numBonds; } @@ -672,7 +702,7 @@ public final int getNumBonds() { * * @param inputNumBonds the new num bonds */ - public final void setNumBonds(final int inputNumBonds) { + public void setNumBonds(int inputNumBonds) { this.numBonds = inputNumBonds; } @@ -681,7 +711,7 @@ public final void setNumBonds(final int inputNumBonds) { * * @return the bond atom list */ - public final byte[] getBondAtomList() { + public byte[] getBondAtomList() { return bondAtomList; } @@ -690,7 +720,7 @@ public final byte[] getBondAtomList() { * * @param inputBondAtomList the new bond atom list */ - public final void setBondAtomList(final byte[] inputBondAtomList) { + public void setBondAtomList(byte[] inputBondAtomList) { this.bondAtomList = inputBondAtomList; } @@ -699,7 +729,7 @@ public final void setBondAtomList(final byte[] inputBondAtomList) { * * @return the bond order list */ - public final byte[] getBondOrderList() { + public byte[] getBondOrderList() { return bondOrderList; } @@ -708,7 +738,7 @@ public final byte[] getBondOrderList() { * * @param inputBondOrderList the new bond order list */ - public final void setBondOrderList(final byte[] inputBondOrderList) { + public void setBondOrderList(byte[] inputBondOrderList) { this.bondOrderList = inputBondOrderList; } @@ -717,7 +747,7 @@ public final void setBondOrderList(final byte[] inputBondOrderList) { * * @return the list of chains per model. */ - public final int[] getChainsPerModel() { + public int[] getChainsPerModel() { return chainsPerModel; } @@ -726,7 +756,7 @@ public final int[] getChainsPerModel() { * * @param inputInternalChainsPerModel the new list of chains per model. */ - public final void setChainsPerModel(final int[] + public void setChainsPerModel(int[] inputInternalChainsPerModel) { this.chainsPerModel = inputInternalChainsPerModel; } @@ -736,7 +766,7 @@ public final void setChainsPerModel(final int[] * * @return the internal groups per chain */ - public final int[] getGroupsPerChain() { + public int[] getGroupsPerChain() { return groupsPerChain; } @@ -745,7 +775,7 @@ public final int[] getGroupsPerChain() { * * @param inputGroupsPerChain the new internal groups per chain */ - public final void setGroupsPerChain(final int[] + public void setGroupsPerChain(int[] inputGroupsPerChain) { this.groupsPerChain = inputGroupsPerChain; } @@ -755,7 +785,7 @@ public final void setGroupsPerChain(final int[] * * @return the internal chain list */ - public final byte[] getChainIdList() { + public byte[] getChainIdList() { return chainIdList; } @@ -764,52 +794,82 @@ public final byte[] getChainIdList() { * * @param inputInternalChainList the new internal chain list */ - public final void setChainIdList(final byte[] inputInternalChainList) { + public void setChainIdList(byte[] inputInternalChainList) { this.chainIdList = inputInternalChainList; } /** * @return the experimental methods */ - public List getExperimentalMethods() { + public String[] getExperimentalMethods() { return experimentalMethods; } /** * @param experimentalMethods the experimental methods to set */ - public void setExperimentalMethods(List experimentalMethods) { + public void setExperimentalMethods(String[] experimentalMethods) { this.experimentalMethods = experimentalMethods; } /** - * @return the seqResGroupIds + * @return the seqResGroupIds */ - public byte[] getSeqResIdList() { - return seqResIdList; + public byte[] getSequenceIndexList() { + return sequenceIndexList; } /** * @param seqResGroupIds the seqResGroupIds to set */ - public void setSeqResIdList(byte[] seqResGroupIds) { - this.seqResIdList = seqResGroupIds; - } - - /** - * Get the entity list - * @return - */ - public Entity[] getEntityList() { - return entityList; - } - - /** - * Set the entity list - * @param entityList - */ - public void setEntityList(Entity[] entityList) { - this.entityList = entityList; - } + public void setSequenceIndexList(byte[] seqResGroupIds) { + this.sequenceIndexList = seqResGroupIds; + } + + /** + * Get the entity list + * @return + */ + public Entity[] getEntityList() { + return entityList; + } + + /** + * Set the entity list + * @param entityList + */ + public void setEntityList(Entity[] entityList) { + this.entityList = entityList; + } + + /** + * @return the deposition date of the structure in ISO time standard. + */ + public String getDepositionDate() { + return depositionDate; + } + + /** + * @param depositionDate a string indicating the deposition date to set. + */ + public void setDepositionDate(String depositionDate) { + this.depositionDate = depositionDate; + } + + /** + * @return the release date of the structure in ISO time standard. + */ + public String getReleaseDate() { + return releaseDate; + } + + /** + * @param releaseDate a string indicating the deposition date to set. + */ + public void setReleaseDate(String releaseDate) { + this.releaseDate = releaseDate; + } + + } diff --git a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/PDBGroup.java b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/PDBGroup.java index 3c62349..e74c7c7 100644 --- a/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/PDBGroup.java +++ b/mmtf-common/src/main/java/org/rcsb/mmtf/dataholders/PDBGroup.java @@ -1,8 +1,7 @@ package org.rcsb.mmtf.dataholders; import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; +import java.util.Arrays; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @@ -14,165 +13,227 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class PDBGroup implements Serializable { - /** Serial id for this version of the format. */ - private static final long serialVersionUID = 2880633780569899800L; - - /** The group name. (e.g. HIS) */ - private String groupName; - - /** The atom info. A list of strings indicating - * the atominfo (Atom name and element name). */ - private List atomInfo; - - /** The bond orders. A list of integers indicating the bond orders*/ - private List bondOrders; - - /** The bond indices (in pairs). - * (e.g. 0,1 means there is bond between atom 0 and 1).*/ - private List bondIndices; - - /** The atom charges. */ - private List atomCharges; - - /** The single letter code. */ - private String singleLetterCode; - - /** A string (taken from the chemical component dictionary) indicating - * the type of the group. Corresponds to -> http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx.dic/Items/_chem_comp.type.html - */ - private String chemCompType; - - /** - * Constructor for the PDB group. Makes empty lists. - */ - public PDBGroup(){ - groupName = new String(); - atomInfo = new ArrayList(); - bondOrders = new ArrayList(); - bondIndices = new ArrayList(); - atomCharges = new ArrayList(); - } - - /** - * Gets the atom info. - * - * @return the atom info - */ - // Generic getter and setter functions - public final List getAtomInfo() { - return atomInfo; - } - - /** - * Sets the atom info. - * - * @param inputAtomInfo the new atom info - */ - public final void setAtomInfo(final List inputAtomInfo) { - this.atomInfo = inputAtomInfo; - } - - /** - * Gets the bond orders. - * - * @return the bond orders - */ - public final List getBondOrders() { - return bondOrders; - } - - /** - * Sets the bond orders. - * - * @param inputBondOrders the new bond orders - */ - public final void setBondOrders(final List inputBondOrders) { - this.bondOrders = inputBondOrders; - } - - /** - * Gets the bond indices. - * - * @return the bond indices - */ - public final List getBondIndices() { - return bondIndices; - } - - /** - * Sets the bond indices. - * - * @param inputBondIndices the new bond indices - */ - public final void setBondIndices(final List inputBondIndices) { - this.bondIndices = inputBondIndices; - } - - /** - * Gets the group name. - * - * @return the group name - */ - public final String getGroupName() { - return groupName; - } - - /** - * Sets the group name. - * - * @param resName the new group name - */ - public final void setGroupName(final String resName) { - this.groupName = resName; - } - - /** - * Gets the atom charges. - * - * @return the atom charges - */ - public final List getAtomCharges() { - return atomCharges; - } - - /** - * Sets the atom charges. - * - * @param inputAtomCharges the new atom charges - */ - public final void setAtomCharges(final List inputAtomCharges) { - this.atomCharges = inputAtomCharges; - } - - /** - * Gets the single letter code. - * - * @return the single letter code - */ - public final String getSingleLetterCode() { - return singleLetterCode; - } - - /** - * Sets the single letter code. - * - * @param inputSingleLetterCode the new single letter code - */ - public final void setSingleLetterCode(final String inputSingleLetterCode) { - this.singleLetterCode = inputSingleLetterCode; - } - - /** - * @return the groupType - corresponds to _chem_comp.type - */ - public String getChemCompType() { - return chemCompType; - } - - /** - * @param groupType the groupType (corresponds to _chem_comp.type) to set - */ - public void setChemCompType(String groupType) { - this.chemCompType = groupType; - } + /** Serial id for this version of the format. */ + private static final long serialVersionUID = 2880633780569899800L; + + /** The group name. (e.g. HIS) */ + private String groupName; + + /** The atom names. A list of strings indicating + * the atom names (e.g. CA for C-alpha). */ + private String[] atomNameList; + + /** The element names. A list of strings indicating + * the element names (e.g. Ca for Calcium). */ + private String[] elementList; + + /** The bond orders. A list of integers indicating the bond orders*/ + private int[] bondOrderList; + + /** The bond indices (in pairs). + * (e.g. 0,1 means there is bond between atom 0 and 1).*/ + private int[] bondAtomList; + + /** The atom charges. */ + private int[] atomChargeList; + + /** The single letter code. */ + private char singleLetterCode; + + /** A string (taken from the chemical component dictionary) indicating + * the type of the group. Corresponds to -> http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx.dic/Items/_chem_comp.type.html + */ + private String chemCompType; + + /* (non-Javadoc) + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Arrays.hashCode(atomChargeList); + result = prime * result + Arrays.hashCode(atomNameList); + result = prime * result + Arrays.hashCode(bondAtomList); + result = prime * result + Arrays.hashCode(bondOrderList); + result = prime * result + ((chemCompType == null) ? 0 : chemCompType.hashCode()); + result = prime * result + Arrays.hashCode(elementList); + result = prime * result + ((groupName == null) ? 0 : groupName.hashCode()); + result = prime * result + singleLetterCode; + return result; + } + + /* (non-Javadoc) + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PDBGroup other = (PDBGroup) obj; + if (!Arrays.equals(atomChargeList, other.atomChargeList)) + return false; + if (!Arrays.equals(atomNameList, other.atomNameList)) + return false; + if (!Arrays.equals(bondAtomList, other.bondAtomList)) + return false; + if (!Arrays.equals(bondOrderList, other.bondOrderList)) + return false; + if (chemCompType == null) { + if (other.chemCompType != null) + return false; + } else if (!chemCompType.equals(other.chemCompType)) + return false; + if (!Arrays.equals(elementList, other.elementList)) + return false; + if (groupName == null) { + if (other.groupName != null) + return false; + } else if (!groupName.equals(other.groupName)) + return false; + if (singleLetterCode != other.singleLetterCode) + return false; + return true; + } + + /** + * Gets the atom info. + * + * @return the atom info + */ + // Generic getter and setter functions + public String[] getAtomNameList() { + return atomNameList; + } + + /** + * Sets the atom info. + * + * @param inputAtomInfo the new atom info + */ + public void setAtomNameList(String[] inputAtomInfo) { + this.atomNameList = inputAtomInfo; + } + + /** + * Gets the bond orders. + * + * @return the bond orders + */ + public int[] getBondOrderList() { + return bondOrderList; + } + + /** + * Sets the bond orders. + * + * @param inputBondOrders the new bond orders + */ + public void setBondOrderList(int[] inputBondOrders) { + this.bondOrderList = inputBondOrders; + } + + /** + * Gets the bond indices. + * + * @return the bond indices + */ + public int[] getBondAtomList() { + return bondAtomList; + } + + /** + * Sets the bond indices. + * + * @param inputBondIndices the new bond indices + */ + public void setBondAtomList(int[] inputBondIndices) { + this.bondAtomList = inputBondIndices; + } + + /** + * Gets the group name. + * + * @return the group name + */ + public String getGroupName() { + return groupName; + } + + /** + * Sets the group name. + * + * @param resName the new group name + */ + public void setGroupName(String resName) { + this.groupName = resName; + } + + /** + * Gets the atom charges. + * + * @return the atom charges + */ + public int[] getAtomChargeList() { + return atomChargeList; + } + + /** + * Sets the atom charges. + * + * @param inputAtomCharges the new atom charges + */ + public void setAtomChargeList(int[] inputAtomCharges) { + this.atomChargeList = inputAtomCharges; + } + + /** + * Gets the single letter code. + * + * @return the single letter code + */ + public char getSingleLetterCode() { + return singleLetterCode; + } + + /** + * Sets the single letter code. + * + * @param inputSingleLetterCode the new single letter code + */ + public void setSingleLetterCode(char inputSingleLetterCode) { + this.singleLetterCode = inputSingleLetterCode; + } + + /** + * @return the groupType - corresponds to _chem_comp.type + */ + public String getChemCompType() { + return chemCompType; + } + + /** + * @param groupType the groupType (corresponds to _chem_comp.type) to set + */ + public void setChemCompType(String groupType) { + this.chemCompType = groupType; + } + + /** + * @return the elementList + */ + public String[] getElementList() { + return elementList; + } + + /** + * @param elementList the elementList to set + */ + public void setElementList(String[] elementList) { + this.elementList = elementList; + } } diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/gitversion/GetRepoState.java b/mmtf-common/src/main/java/org/rcsb/mmtf/gitversion/GetRepoState.java similarity index 96% rename from mmtf-encoder/src/main/java/org/rcsb/mmtf/gitversion/GetRepoState.java rename to mmtf-common/src/main/java/org/rcsb/mmtf/gitversion/GetRepoState.java index 2e2127f..9d9092b 100644 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/gitversion/GetRepoState.java +++ b/mmtf-common/src/main/java/org/rcsb/mmtf/gitversion/GetRepoState.java @@ -29,7 +29,7 @@ public GitRepositoryState getGitRepositoryState() throws IOException * @return * @throws IOException */ - public String getCurrentVersion(){ + public static String getCurrentVersion(){ GetRepoState grs = new GetRepoState(); try{ GitRepositoryState repoState = grs.getGitRepositoryState(); diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/gitversion/GitRepositoryState.java b/mmtf-common/src/main/java/org/rcsb/mmtf/gitversion/GitRepositoryState.java similarity index 100% rename from mmtf-encoder/src/main/java/org/rcsb/mmtf/gitversion/GitRepositoryState.java rename to mmtf-common/src/main/java/org/rcsb/mmtf/gitversion/GitRepositoryState.java diff --git a/mmtf-common/src/main/java/org/rcsb/mmtf/utils/CodecUtils.java b/mmtf-common/src/main/java/org/rcsb/mmtf/utils/CodecUtils.java new file mode 100644 index 0000000..231520f --- /dev/null +++ b/mmtf-common/src/main/java/org/rcsb/mmtf/utils/CodecUtils.java @@ -0,0 +1,35 @@ +package org.rcsb.mmtf.utils; + +import java.util.List; + +public class CodecUtils { + + /** + * Convert an integer list to an integer array + * @param integerList the input list + * @return the output array + */ + public static int[] convertToIntArray(List integerList) { + int[] integerArray = new int[integerList.size()]; + for(int i=0; i max) { + max = intArray[i]; + } + } + return max; + } +} diff --git a/mmtf-common/src/test/java/org/rcsb/mmtf/dataholders/TestDataHolders.java b/mmtf-common/src/test/java/org/rcsb/mmtf/dataholders/TestDataHolders.java new file mode 100644 index 0000000..7b7e166 --- /dev/null +++ b/mmtf-common/src/test/java/org/rcsb/mmtf/dataholders/TestDataHolders.java @@ -0,0 +1,54 @@ +package org.rcsb.mmtf.dataholders; + + +import static org.junit.Assert.assertNotNull; + +import java.beans.IntrospectionException; +import java.beans.Introspector; +import java.beans.PropertyDescriptor; +import java.lang.reflect.InvocationTargetException; + +import org.junit.Test; +import org.unitils.reflectionassert.ReflectionAssert; + +import uk.co.jemos.podam.api.PodamFactory; +import uk.co.jemos.podam.api.PodamFactoryImpl; + +public class TestDataHolders { + + @Test + public void testBeans() throws IllegalAccessException, IllegalArgumentException, InvocationTargetException, IntrospectionException{ + PodamFactory factory = new PodamFactoryImpl(); + // Tests if setters are set appropriately + ReflectionAssert.assertPropertiesNotNull("Some properties null.", + factory.manufacturePojo(MmtfBean.class)); + testData(MmtfBean.class, factory.manufacturePojo(MmtfBean.class)); + + ReflectionAssert.assertPropertiesNotNull("Some properties null.", + factory.manufacturePojo(BioAssemblyData.class)); + testData(BioAssemblyData.class, factory.manufacturePojo(BioAssemblyData.class)); + + ReflectionAssert.assertPropertiesNotNull("Some properties null.", + factory.manufacturePojo(BioAssemblyTrans.class)); + testData(BioAssemblyTrans.class, factory.manufacturePojo(BioAssemblyTrans.class)); + + ReflectionAssert.assertPropertiesNotNull("Some properties null.", + factory.manufacturePojo(Entity.class)); + testData(Entity.class, factory.manufacturePojo(Entity.class)); + + ReflectionAssert.assertPropertiesNotNull("Some properties null.", + factory.manufacturePojo(PDBGroup.class)); + testData(PDBGroup.class, factory.manufacturePojo(PDBGroup.class)); + + } + + + private void testData(@SuppressWarnings("rawtypes") Class beanClass, Object object) throws IllegalAccessException, IllegalArgumentException, InvocationTargetException, IntrospectionException { + for(PropertyDescriptor propertyDescriptor : + Introspector.getBeanInfo(beanClass).getPropertyDescriptors()){ + assertNotNull(propertyDescriptor.getReadMethod().invoke(object)); + } + } + + +} diff --git a/mmtf-common/src/test/java/org/rcsb/mmtf/dataholders/TestDsspType.java b/mmtf-common/src/test/java/org/rcsb/mmtf/dataholders/TestDsspType.java new file mode 100644 index 0000000..a75b6a6 --- /dev/null +++ b/mmtf-common/src/test/java/org/rcsb/mmtf/dataholders/TestDsspType.java @@ -0,0 +1,43 @@ +package org.rcsb.mmtf.dataholders; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class TestDsspType { + @Test + public void testDsspType() { + + assertEquals(DsspType.dsspTypeFromString("pi Helix"), DsspType.dsspTypeFromInt(0)); + assertEquals(DsspType.PI_HELIX, DsspType.dsspTypeFromInt(0)); + + assertEquals(DsspType.dsspTypeFromString("Bend"), DsspType.dsspTypeFromInt(1)); + assertEquals(DsspType.BEND, DsspType.dsspTypeFromInt(1)); + + assertEquals(DsspType.dsspTypeFromString("alpha Helix"), DsspType.dsspTypeFromInt(2)); + assertEquals(DsspType.ALPHA_HELIX, DsspType.dsspTypeFromInt(2)); + + assertEquals(DsspType.dsspTypeFromString("Extended"), DsspType.dsspTypeFromInt(3)); + assertEquals(DsspType.EXTENDED, DsspType.dsspTypeFromInt(3)); + + assertEquals(DsspType.dsspTypeFromString("3-10 Helix"), DsspType.dsspTypeFromInt(4)); + assertEquals(DsspType.HELIX_3_10, DsspType.dsspTypeFromInt(4)); + + assertEquals(DsspType.dsspTypeFromString("Bridge"), DsspType.dsspTypeFromInt(5)); + assertEquals(DsspType.BRIDGE, DsspType.dsspTypeFromInt(5)); + + assertEquals(DsspType.dsspTypeFromString("Turn"), DsspType.dsspTypeFromInt(6)); + assertEquals(DsspType.TURN, DsspType.dsspTypeFromInt(6)); + + assertEquals(DsspType.dsspTypeFromString("Coil"), DsspType.dsspTypeFromInt(7)); + assertEquals(DsspType.COIL, DsspType.dsspTypeFromInt(7)); + + assertEquals(DsspType.dsspTypeFromString("NA"), DsspType.dsspTypeFromInt(-1)); + assertEquals(DsspType.NULL_ENTRY, DsspType.dsspTypeFromInt(-1)); + + assertEquals(DsspType.NULL_ENTRY, DsspType.dsspTypeFromString(null)); + assertEquals(DsspType.NULL_ENTRY, DsspType.dsspTypeFromString("MMTF")); + assertEquals(DsspType.NULL_ENTRY, DsspType.dsspTypeFromInt(100)); + + } +} diff --git a/mmtf-common/src/test/java/org/rcsb/mmtf/dataholders/TestPdbGroup.java b/mmtf-common/src/test/java/org/rcsb/mmtf/dataholders/TestPdbGroup.java new file mode 100644 index 0000000..c5cc535 --- /dev/null +++ b/mmtf-common/src/test/java/org/rcsb/mmtf/dataholders/TestPdbGroup.java @@ -0,0 +1,119 @@ +package org.rcsb.mmtf.dataholders; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; + +import org.junit.Before; +import org.junit.Test; + +public class TestPdbGroup { + + + private PDBGroup pdbGroupOne; + private PDBGroup pdbGroupTwo; + + @Before + public void beforeTest() { + + pdbGroupOne = makePdbGroup(); + pdbGroupTwo = makePdbGroup(); + + } + + + private PDBGroup makePdbGroup() { + PDBGroup pdbGroup = new PDBGroup(); + pdbGroup.setAtomChargeList(new int[] {1,1}); + pdbGroup.setAtomNameList(new String[] {"A","B"}); + pdbGroup.setBondAtomList(new int[] {0,1}); + pdbGroup.setBondOrderList(new int[] {1}); + pdbGroup.setChemCompType("POLT"); + pdbGroup.setElementList(new String[] {"A","B"}); + pdbGroup.setGroupName("MET"); + pdbGroup.setSingleLetterCode('A'); + return pdbGroup; + } + + + @Test + public void testPdbGroupEqualsItstelf() { + assertTrue(pdbGroupOne.equals(pdbGroupOne)); + } + + + @Test + public void testPdbGroupEquals() { + assertTrue(pdbGroupOne.equals(pdbGroupTwo)); + } + + @Test + public void testPdbGroupNotEqualsCharge() { + pdbGroupOne.setAtomChargeList(new int[] {0,1}); + assertFalse(pdbGroupOne.equals(pdbGroupTwo)); + } + + @Test + public void testPdbGroupNotEqualsName() { + pdbGroupOne.setAtomNameList(new String[] {"A","C"}); + assertFalse(pdbGroupOne.equals(pdbGroupTwo)); + } + + @Test + public void testPdbGroupNotEqualsBond() { + pdbGroupOne.setBondAtomList(new int[] {1,1}); + assertFalse(pdbGroupOne.equals(pdbGroupTwo)); + } + + @Test + public void testPdbGroupNotEqualsOrder() { + pdbGroupOne.setBondOrderList(new int[] {0}); + assertFalse(pdbGroupOne.equals(pdbGroupTwo)); + } + + @Test + public void testPdbGroupNotEqualsChem() { + pdbGroupOne.setChemCompType("LL"); + assertFalse(pdbGroupOne.equals(pdbGroupTwo)); + } + + + @Test + public void testPdbGroupNotEqualsEle() { + pdbGroupOne.setElementList(new String[] {"A","BC"}); + assertFalse(pdbGroupOne.equals(pdbGroupTwo)); + } + @Test + public void testPdbGroupNotEqualsGroupName() { + pdbGroupOne.setGroupName("MES"); + assertFalse(pdbGroupOne.equals(pdbGroupTwo)); + } + @Test + public void testPdbGroupNotEqualsSingle() { + pdbGroupOne.setSingleLetterCode('B'); + assertFalse(pdbGroupOne.equals(pdbGroupTwo)); + } + + @Test + public void testPdbGroupNotEqualsNull() { + assertFalse(pdbGroupOne.equals(null)); + } + + @Test + public void testPdbGroupNotEqulasOtherObj() { + assertFalse(pdbGroupOne.equals(new ArrayList())); + } + + @Test + public void testPdbGroupNotEqualsNullChem() { + pdbGroupTwo.setChemCompType(null); + assertFalse(pdbGroupOne.equals(pdbGroupTwo)); + } + + @Test + public void testPdbGroupNotEqulasNullType() { + pdbGroupTwo.setGroupName(null); + assertFalse(pdbGroupOne.equals(pdbGroupTwo)); + } +} diff --git a/mmtf-common/src/test/java/org/rcsb/mmtf/gitversion/TestGitVersion.java b/mmtf-common/src/test/java/org/rcsb/mmtf/gitversion/TestGitVersion.java new file mode 100644 index 0000000..087a71c --- /dev/null +++ b/mmtf-common/src/test/java/org/rcsb/mmtf/gitversion/TestGitVersion.java @@ -0,0 +1,18 @@ +package org.rcsb.mmtf.gitversion; + + + +import static org.junit.Assert.assertNotEquals; + +import org.junit.Test; + + +public class TestGitVersion { + + @Test + public void doesGitVersionExist(){ + assertNotEquals("NA", GetRepoState.getCurrentVersion()); + } + + +} diff --git a/mmtf-common/src/test/java/org/rcsb/mmtf/utils/TestCodecUtils.java b/mmtf-common/src/test/java/org/rcsb/mmtf/utils/TestCodecUtils.java new file mode 100644 index 0000000..57876d0 --- /dev/null +++ b/mmtf-common/src/test/java/org/rcsb/mmtf/utils/TestCodecUtils.java @@ -0,0 +1,33 @@ +package org.rcsb.mmtf.utils; + +import org.junit.Test; +import static org.junit.Assert.*; + +import java.util.ArrayList; +import java.util.List; + +/** + * Class of functions to test if codec utils wor + * @author Anthony Bradley + * + */ +public class TestCodecUtils { + + @Test + public void testConvertToIntArray() { + int[] testIntegerArray = {1,4,3,2,6,7,2,200}; + List integerList = new ArrayList<>(); + for (int inputInt : testIntegerArray) { + integerList.add(inputInt); + } + int[] integerArray = CodecUtils.convertToIntArray(integerList); + assertArrayEquals(testIntegerArray, integerArray); + } + + @Test + public void testFindMaxInIntArray() { + int[] intArray = {1,2,2020,1,2,567,203,-10200,-304,20}; + int maxValue = CodecUtils.findMaxInIntArray(intArray); + assertEquals(2020, maxValue); + } +} diff --git a/mmtf-decoder/pom.xml b/mmtf-decoder/pom.xml index 69b2094..253be8c 100644 --- a/mmtf-decoder/pom.xml +++ b/mmtf-decoder/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 org.rcsb @@ -19,7 +20,6 @@ commons-beanutils commons-beanutils - 1.8.3 org.rcsb @@ -36,12 +36,40 @@ jackson-dataformat-msgpack 0.7.1 + + uk.co.jemos.podam + podam + + + org.unitils + unitils-core + + junit junit - 4.11 - test + + + + org.slf4j + slf4j-api + + + + org.apache.logging.log4j + log4j-slf4j-impl + + + org.apache.logging.log4j + log4j-api + + + org.apache.logging.log4j + log4j-core + + diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/DeltaDeCompress.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/DeltaDeCompress.java deleted file mode 100644 index 69e357c..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/DeltaDeCompress.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.rcsb.mmtf.arraydecompressors; - -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; - -/** - * Perform delta decompression on byte arrays. - * The first array is of four byte integers found in pairs. The first in - * each pair is a number to be used in the output array. The second indicates - * the number of two byte integers to read from the second array. - * @author Anthony Bradley - * - */ -public class DeltaDeCompress { - - /** - * The number of bytes in a four byte integers. - */ - private static final int BIG_INT_BYTES = 4; - /** - * Decompress two byte arrays - one containing 4 byte and one 2 byte integers. - * @param fourByteInts An array of four byte integers. - * @param twoByteInts An array of two byte integers. - * @return A decompressed integer array. - * @throws IOException The byte array does not contain - * the information requested. - */ - public final int[] decompressByteArray(final byte[] fourByteInts, - final byte[] twoByteInts) throws IOException { - // Get these data streams - DataInputStream bigStream = new DataInputStream(new - ByteArrayInputStream(fourByteInts)); - // Get the length of the array - int lengthOfBigIntArray = fourByteInts.length / (BIG_INT_BYTES * 2); - // Integers used in the output - int[] fourByteIntArr = new int[lengthOfBigIntArray]; - // Integers used to count from the two byte array - int[] counterInts = new int[lengthOfBigIntArray]; - DataInputStream smallStream = new DataInputStream(new - ByteArrayInputStream(twoByteInts)); - int totNum = 0; - // Loop through these and take every other int - for (int i = 0; i < lengthOfBigIntArray; i++) { - int bigNum = bigStream.readInt(); - int counterNum = bigStream.readInt(); - // Now writ thei sout - totNum++; - // Now add to the counter - totNum += counterNum; - fourByteIntArr[i] = bigNum; - counterInts[i] = counterNum; - } - // Now loop over the total number of ints - int[] outArr = new int[totNum]; - int totCounter = 0; - for (int i = 0; i < fourByteIntArr.length; i++) { - // Now add this to the out array - if (i == 0) { - outArr[totCounter] = fourByteIntArr[i]; - } else { - outArr[totCounter] = outArr[totCounter - 1] + fourByteIntArr[i]; - } - totCounter++; - // Now loop through this - for (int j = 0; j < counterInts[i]; j++) { - // Now add this as a short - int currentInt = (int) smallStream.readShort(); - outArr[totCounter] = outArr[totCounter - 1] + currentInt; - totCounter++; - } - } - return outArr; - } -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/IntArrayDeCompressorInterface.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/IntArrayDeCompressorInterface.java deleted file mode 100644 index f90435b..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/IntArrayDeCompressorInterface.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.rcsb.mmtf.arraydecompressors; - -import java.util.List; - -/** - * Specify the functions in an decompressort of integer arrays. - * @author Anthony Bradley - * - */ -public interface IntArrayDeCompressorInterface { - - /** - * Decompress an integer array and return an other integer array. - * @param inArray An input list of integers. - * @return A list of Integers. - */ - List decompressIntArray(List inArray); -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/RunLengthDecodeInt.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/RunLengthDecodeInt.java deleted file mode 100644 index 144735c..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/RunLengthDecodeInt.java +++ /dev/null @@ -1,81 +0,0 @@ -package org.rcsb.mmtf.arraydecompressors; - -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * Run length decode a list of integers. - * @author Anthony Bradley - * - */ -public class RunLengthDecodeInt implements IntArrayDeCompressorInterface { - - /** - * The number of bytes in a four byte integers. - */ - private static final int BIG_INT_BYTES = 4; - - /** - * Decompress a byte array that is run length encoded. - * @param inArray The input byte array. Integers as 4 bytes long - * @return The decompressed integer array - * @throws IOException The byte array does not contain the - * information requested. - */ - public final int[] decompressByteArray(final byte[] inArray) - throws IOException { - // The length of each of the 4 byte integer arrays - int lengthOfBigIntArr = inArray.length / (BIG_INT_BYTES * 2); - // Array to store all the different numbers - int[] numArr = new int[lengthOfBigIntArr]; - int[] countArr = new int[lengthOfBigIntArr]; - // Get the size - int totCount = 0; - DataInputStream bis = new DataInputStream(new - ByteArrayInputStream(inArray)); - for (int i = 0; i < lengthOfBigIntArr; i++) { - // Get the number - int getNum = bis.readInt(); - int getCount = bis.readInt(); - totCount += getCount; - numArr[i] = getNum; - countArr[i] = getCount; - } - // Now set this output array - int[] outArr = new int[totCount]; - int totCounter = 0; - for (int i = 0; i < numArr.length; i++) { - int thisAns = numArr[i]; - for (int j = 0; j < countArr[i]; j++) { - // Annd then add t is to the array - outArr[totCounter] = thisAns; - // Now add to the counter - totCounter++; - - } - - } - return outArr; - } - - @Override - public final List decompressIntArray(final List inArray) { - // Make the output array - List outArray = new ArrayList(); - // Loop through the vals - for (int i = 0; i < inArray.size(); i += 2) { - // Get the value out here - int intIn = inArray.get(i); - int numOfInt = inArray.get(i + 1); - // Now add these to the array - for (int j = 0; j < numOfInt; j++) { - outArray.add(intIn); - } - } - return outArray; - } - -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/RunLengthDecodeString.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/RunLengthDecodeString.java deleted file mode 100644 index 8f2131b..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/RunLengthDecodeString.java +++ /dev/null @@ -1,83 +0,0 @@ -package org.rcsb.mmtf.arraydecompressors; - -import java.util.ArrayList; -import java.util.List; - -/** - * Decode string arrays that have been run length encoded. - * The input values are in pairs. The first value in each pair - * is the string to be used in the output array. The second value - * is the number of repetitions of that value. - * @author Anthony Bradley - * - */ -public class RunLengthDecodeString implements StringArrayDeCompressorInterface { - - - /** - * Runlength decode a string list and return as a char[]. - * @param inputArray An input string array. - * @return a char array of the strings - * rather than a string array. - */ - public final char[] stringArrayToChar(final ArrayList inputArray) { - int totNum = 0; - // Define an array to hold chars - char[] outChars = new char[1]; - char outChar = "l".charAt(0); - // If it's only one long - just take the char - if (inputArray.size() == 1) { - char[] outArray = new char[1]; - if (inputArray.get(0) == null) { - outChar = "?".charAt(0); - } else { - String outString = inputArray.get(0); - outString.getChars(0, 1, outChars, 0); - outChar = outChars[0]; - } - outArray[0] = outChar; - return outArray; - } - for (int i = 0; i < inputArray.size(); i += 2) { - totNum += Integer.parseInt(inputArray.get(i + 1)); - } - - char[] outArray = new char[totNum]; - int totCounter = 0; - - - for (int i = 0; i < inputArray.size(); i += 2) { - if (inputArray.get(i) == null) { - outChar = "?".charAt(0); - } else { - String outString = inputArray.get(i); - outString.getChars(0, 1, outChars, 0); - outChar = outChars[0]; - } - int numString = Integer.parseInt(inputArray.get(i + 1)); - for (int j = 0; j < numString; j++) { - outArray[totCounter] = outChar; - totCounter++; - } - } - return outArray; - } - - @Override - public final List deCompressStringArray(final List inArray) { - // Make the output array - List outArray = new ArrayList(); - for (int i = 0; i < inArray.size(); i += 2) { - String outString = inArray.get(i); - int numString = Integer.parseInt(inArray.get(i + 1)); - - for (int j = 0; j < numString; j++) { - outArray.add(outString); - } - } - return outArray; - } - - - -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/RunLengthDelta.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/RunLengthDelta.java deleted file mode 100644 index 4fe4d79..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/RunLengthDelta.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.rcsb.mmtf.arraydecompressors; - -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; - -/** - * Functions to decompress input arrays using delta and run - * length decoding. - * The input is first run length decoded and then delta decoded. - * This is particularly useful for sequential numbers. - * @author Anthony Bradley - * - */ -public class RunLengthDelta { - - - /** - * The number of bytes in a four byte integers. - */ - private static final int BIG_INT_BYTES = 4; - - /** - * Decompress a byte a byte array using run length and delta decoding. - * @param inArray The input byte array - * @return A decompressed array of integers. - * @throws IOException If no more data can be read from the byte array. - */ - public final int[] decompressByteArray(final byte[] inArray) - throws IOException { - // The length of the array - int lengthOfBigIntArr = inArray.length / (BIG_INT_BYTES * 2); - // Array to store all the different numbers - int[] numArr = new int[lengthOfBigIntArr]; - int[] countArr = new int[lengthOfBigIntArr]; - // Get the size - int totCount = 0; - DataInputStream bis = new DataInputStream(new - ByteArrayInputStream(inArray)); - for (int i = 0; i < lengthOfBigIntArr; i++) { - // Get the numbers - int getNum = bis.readInt(); - // Get the number of repeats - int getCount = bis.readInt(); - if (getCount < 0) { - System.out.println(getCount); - } - // Get the total count - totCount += getCount; - // Fill the number array - numArr[i] = getNum; - // Fill the count array - countArr[i] = getCount; - } - // Now set this output array - int[] outArr = new int[totCount]; - int totCounter = 0; - int totAns = 0; - for (int i = 0; i < numArr.length; i++) { - // Get the number that is to be repeared - int thisAns = numArr[i]; - // Get the number of repeats - for (int j = 0; j < countArr[i]; j++) { - // Add the delta to get this answer - totAns += thisAns; - // And then add t is to the array - outArr[totCounter] = totAns; - // Now add to the counter - totCounter++; - } - } - return outArr; - } - -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/StringArrayDeCompressorInterface.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/StringArrayDeCompressorInterface.java deleted file mode 100644 index 56c4ef4..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/StringArrayDeCompressorInterface.java +++ /dev/null @@ -1,20 +0,0 @@ -package org.rcsb.mmtf.arraydecompressors; - -import java.util.List; - -/** - * Generic functions to decompress a list of Strings to another list of Strings. - * @author Anthony Bradley - * - */ -public interface StringArrayDeCompressorInterface { - - /** - * Generic function to decompress a list of Strings to another - * list of Strings. - * @param inArray An input list of strings - * @return A list of strings - */ - List deCompressStringArray(List inArray); - -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/package-info.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/package-info.java deleted file mode 100644 index f6ad2df..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/arraydecompressors/package-info.java +++ /dev/null @@ -1,8 +0,0 @@ -/** - * Decompress integer, byte and string arrays. - */ -/** - * @author Anthony Bradley - * - */ -package org.rcsb.mmtf.arraydecompressors; diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/ArrayConverters.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/ArrayConverters.java new file mode 100644 index 0000000..8edd232 --- /dev/null +++ b/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/ArrayConverters.java @@ -0,0 +1,166 @@ +package org.rcsb.mmtf.decoder; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; + +/** + * Class of functions to convert arrays to readable types. + * e.g. byte arrays to integer arrays. + * @author Anthony Bradley + * + */ +public class ArrayConverters { + + /** The maximum number of chars in a chain entry. */ + private static final int MAX_CHARS_PER_CHAIN_ENTRY = 4; + + /** + * Find all the chain ids from a single byte array. Each byte encodes a different ASCII character. + * @param currentChainList the byte array of the chain list input. Each chain takes up 4 bytes. + * @return the string array of the parsed chain ids + */ + public static String[] decodeChainList(byte[] currentChainList) { + int outputLength = currentChainList.length/4; + String[] outArray = new String[outputLength]; + for (int i = 0; i < outputLength; i++){ + outArray[i] = getChainId(currentChainList, i); + } + return outArray; + } + + /** + * Convert an integer array to a float array by dividing by a float. + * @param intArray the input integer array to be divided + * @param floatDivider the float divider to divide the integers by. + * @return a float array converted from the input. + */ + public static float[] convertIntsToFloats(int[] intArray, float floatDivider) { + // Assign the output array to write + float[] outArray = new float[intArray.length]; + for (int i=0; i outList = new ArrayList<>(); + for (int i=0; i chainIdSet; - private String[] chainList; - - /** - * The constructor requires a byte array to fill the data. This will decompress the arrays using our bespoke methods. - * @param inputByteArr An unentropy encoded byte array with the data as found in the MMTF format - */ - public DecodeStructure(byte[] inputByteArr) { - - // Data api - dataApi = new SimpleDataApi(inputByteArr); - - } - - - /** - * Generate a structure from bytes using a structure inflator. - * - * @param myInBytes the my in bytes - * @param inputStructInflator the input struct inflator - * @param parsingParams the parsing params - */ - public final void getStructFromByteArray(final StructureDecoderInterface inputStructInflator, final ParsingParams parsingParams) { - // Set the inflator - structInflator = inputStructInflator; - // Do any required preparation - inputStructInflator.prepareStructure(dataApi.getNumAtoms(), dataApi.getNumResidues(), dataApi.getNumChains(), dataApi.getNumModels(), dataApi.getPdbId()); - // Now get the parsing parameters to do their thing - useParseParams(parsingParams); - // Now add the atom information - addAtomicInformation(); - // Now add the header information. - addHeaderInfo(); - // Now set the crystallographic information - addXtalographicInfo(); - /// Now get the bioassembly information - generateBioAssembly(); - // Now add the other bonds between groups - addInterGroupBonds(); - // Now add the entity info - addEntityInfo(); - // Now do any required cleanup - structInflator.cleanUpStructure(); - } - - /** - * Add the main atomic information to the data model - */ - private void addAtomicInformation() { - for (int modelChains: dataApi.getChainsPerModel()) { - structInflator.setModelInfo(modelCounter, modelChains); - // A list to check if we need to set or update the chains - chainIdSet = new HashSet<>(); - int totChainsThisModel = chainCounter + modelChains; - for (int chainIndex = chainCounter; chainIndex < totChainsThisModel; chainIndex++) { - addOrUpdateChainInfo(chainIndex); - } - modelCounter++; - } - } - - - /** - * Add the entity information to a structure. - */ - private void addEntityInfo() { - for (Entity entity : dataApi.getEntityList()) { - String[] chainIdList = new String[entity.getChainIndexList().length]; - int counter = 0; - for (int chainInd : entity.getChainIndexList()) { - chainIdList[counter] = chainList[chainInd]; - counter++; - } - structInflator.setEntityInfo(chainIdList, entity.getSequence(), entity.getDescription(), entity.getType()); - } - } - - - /** - * Function to add ancilliary header information to the structure - */ - private void addHeaderInfo() { - structInflator.setHeaderInfo(dataApi.getRfree(),dataApi.getRwork(), dataApi.getResolution(), dataApi.getTitle(), dataApi.getExperimentalMethods()); - } - - - /** - * Use the parsing parameters to set the scene. - * @param parsingParams - */ - private void useParseParams(ParsingParams parsingParams) { - if (parsingParams.isParseInternal()) { - System.out.println("Using asym ids"); - chainList = dataApi.getChainIds(); - } else { - System.out.println("Using auth ids"); - chainList = dataApi.getChainNames(); - } - } - - - /** - * Set the chain level information and then loop through the groups - * @param chainIndex - */ - private void addOrUpdateChainInfo(int chainIndex) { - // Get the current c - String currentChainId = chainList[chainIndex]; - int groupsThisChain = dataApi.getGroupsPerChain()[chainIndex]; - // If we've already seen this chain -> just update it - if (chainIdSet.contains(currentChainId)) { - structInflator.setChainInfo(currentChainId, groupsThisChain); - } else { - structInflator.setChainInfo(currentChainId, groupsThisChain); - chainIdSet.add(currentChainId); - } - int nextInd = groupCounter + groupsThisChain; - // Now iteratr over the group - for (int currentGroupNumber = groupCounter; currentGroupNumber < nextInd; currentGroupNumber++) { - groupCounter++; - int atomCount = addGroup(currentGroupNumber); - lastAtomCount += atomCount; - } - chainCounter++; - } - - /** - * Adds the group. - * - * @param thisGroupNum the this group num - * @param nucAcidList the nuc acid list - * @return the int - */ - private int addGroup(final int thisGroupNum) { - // Now get the group - int g = dataApi.getGroupIndices()[thisGroupNum]; - // Get this info - PDBGroup currentGroup = dataApi.getGroupMap().get(g); - List atomInfo = currentGroup.getAtomInfo(); - int atomCount = atomInfo.size() / 2; - int currentGroupNumber = dataApi.getResidueNums()[thisGroupNum]; - char insertionCode = dataApi.getInsCodes()[thisGroupNum]; - structInflator.setGroupInfo(currentGroup.getGroupName(), currentGroupNumber, insertionCode, - currentGroup.getChemCompType(), atomCount); - // A counter for the atom information - atomCounter = 0; - // Now read the next atoms - for (int i = lastAtomCount; i < lastAtomCount + atomCount; i++) { - addAtomData(currentGroup, atomInfo, i); - } - addGroupBonds(currentGroup.getBondIndices(), currentGroup.getBondOrders()); - return atomCount; - } - - - /** - * Add atom level data for a given atom. - * @param currentPdbGroup The group being considered. - * @param atomInfo The list of strings containing atom level information. - * @param currentAtomIndex The index of the current Atom - */ - private void addAtomData(PDBGroup currentPdbGroup, List atomInfo, int currentAtomIndex) { - // Now get all the relevant atom level information here - String atomName = atomInfo.get(atomCounter * 2 + 1); - String element = atomInfo.get(atomCounter * 2); - int charge = currentPdbGroup.getAtomCharges().get(atomCounter); - int serialNumber = dataApi.getAtomIds()[currentAtomIndex]; - char alternativeLocationId = dataApi.getAltLocIds()[currentAtomIndex]; - float x = dataApi.getXcoords()[currentAtomIndex]; - float z = dataApi.getZcoords()[currentAtomIndex]; - float y = dataApi.getYcoords()[currentAtomIndex]; - float occupancy = dataApi.getOccupancies()[currentAtomIndex]; - float temperatureFactor = dataApi.getBfactors()[currentAtomIndex]; - structInflator.setAtomInfo(atomName, serialNumber, alternativeLocationId, - x, y, z, occupancy, temperatureFactor, element, charge); - // Now increment the atom counter for this group - atomCounter++; - } - - /** - * Adds bond information for a group (residue). - * @param bondInds A list of integer pairs. Each pair indicates the indices for the bonds. - * Bond indices are specified internally within the group and start at 0. - * @param bondOrders A list of integers specifying the bond orders for each bond. - */ - private void addGroupBonds(List bondInds, List bondOrders) { - // Now add the bond information for this group - for (int thisBond = 0; thisBond < bondOrders.size(); thisBond++) { - int thisBondOrder = bondOrders.get(thisBond); - int thisBondIndOne = bondInds.get(thisBond * 2); - int thisBondIndTwo = bondInds.get(thisBond * 2 + 1); - structInflator.setGroupBond(thisBondIndOne, thisBondIndTwo, - thisBondOrder); - } - } - - /** - * Generate inter group bonds - */ - private void addInterGroupBonds() { - for (int i = 0; i < dataApi.getInterGroupBondOrders().length; i++) { - structInflator.setInterGroupBond(dataApi.getInterGroupBondIndices()[i * 2], - dataApi.getInterGroupBondIndices()[i * 2 + 1], dataApi.getInterGroupBondOrders()[i]); - } - } - - /** - * Adds the crystallographic info to the structure - */ - private void addXtalographicInfo() { - if(dataApi.getUnitCell()!=null){ - structInflator.setXtalInfo(dataApi.getSpaceGroup(), dataApi.getUnitCell()); - } - } - - /** - * Parses the bioassembly data and inputs it to the structure inflator - */ - private void generateBioAssembly() { - structInflator.setBioAssemblyList(dataApi.getBioAssemblyList()); - } - - - -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/DecoderToReader.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/DecoderToReader.java new file mode 100644 index 0000000..c92a8f9 --- /dev/null +++ b/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/DecoderToReader.java @@ -0,0 +1,175 @@ +package org.rcsb.mmtf.decoder; + +import java.util.HashSet; +import java.util.Set; + +import org.rcsb.mmtf.api.DecodedDataInterface; +import org.rcsb.mmtf.api.DataTransferInterface; + +/** + * Decode an MMTF structure using a structure inflator. + * The class also allows access to the unconsumed but parsed and inflated underlying data. + * @author Anthony Bradley + * + */ +public class DecoderToReader { + + /** The struct inflator. */ + private DataTransferInterface structInflator; + + /** The api to the data */ + private DecodedDataInterface dataApi; + + // Intialises the counters. + private int modelCounter = 0; + private int chainCounter = 0; + private int groupCounter = 0; + private int atomCounter = 0; + private int currentAtomIndex = 0; + private Set chainIdSet; + + public DecoderToReader(){ + + } + + /** + * Passes data from the data interface to the inflator interface. + */ + public void read(DecodedDataInterface inputApi, DataTransferInterface inputInflator){ + // Set the api and the inflator + dataApi = inputApi; + structInflator = inputInflator; + // Do any required preparation + structInflator.initStructure(dataApi.getNumBonds(), dataApi.getNumAtoms(), dataApi.getNumGroups(), + dataApi.getNumChains(), dataApi.getNumModels(), dataApi.getStructureId()); + // Now add the atom information + addAtomicInformation(); + // Now add the header information. + DecoderUtils.addHeaderInfo(dataApi, structInflator); + // Now set the crystallographic information + DecoderUtils.addXtalographicInfo(dataApi, structInflator); + /// Now get the bioassembly information - only if parsing using AsymId + DecoderUtils.generateBioAssembly(dataApi, structInflator); + // Now add the other bonds between groups + DecoderUtils.addInterGroupBonds(dataApi, structInflator); + // Now add the entity info + DecoderUtils.addEntityInfo(dataApi, structInflator); + // Now do any required cleanup + structInflator.finalizeStructure(); + } + + /** + * Add the main atomic information to the data model + */ + private final void addAtomicInformation() { + for (int modelChains: dataApi.getChainsPerModel()) { + structInflator.setModelInfo(modelCounter, modelChains); + // A list to check if we need to set or update the chains + chainIdSet = new HashSet<>(); + int totChainsThisModel = chainCounter + modelChains; + int lastChainCounter = chainCounter; + for (int chainIndex = lastChainCounter; chainIndex < totChainsThisModel; chainIndex++) { + addOrUpdateChainInfo(chainIndex); + } + modelCounter++; + } + } + + /** + * Set the chain level information and then loop through the groups + * @param chainIndex the chain index to be created or updated. + */ + private void addOrUpdateChainInfo(int chainIndex) { + // Get the current c + String currentChainId = dataApi.getChainIds()[chainIndex]; + String currentChainName = dataApi.getChainNames()[chainIndex]; + int groupsThisChain = dataApi.getGroupsPerChain()[chainIndex]; + // If we've already seen this chain -> just update it + if (chainIdSet.contains(currentChainId)) { + structInflator.setChainInfo(currentChainId, currentChainName, groupsThisChain); + } else { + structInflator.setChainInfo(currentChainId, currentChainName, groupsThisChain); + chainIdSet.add(currentChainId); + } + int nextInd = groupCounter + groupsThisChain; + int lastGroupCount = groupCounter; + // Now iteratr over the group + for (int currentGroupNumber = lastGroupCount; currentGroupNumber < nextInd; currentGroupNumber++) { + addGroup(currentGroupNumber); + groupCounter++; + } + chainCounter++; + } + + /** + * Add a group to the structure - return the number of atoms in the structure. + * @param currentGroupIndex the integer indicating the index of the group to be added. + * @return an integer for the number of atoms in the structure. + */ + private int addGroup(int currentGroupIndex) { + // Now get the group + int groupInd = dataApi.getGroupTypeIndices()[currentGroupIndex]; + // Get this info + int atomCount = dataApi.getNumAtomsInGroup(groupInd); + int currentGroupNumber = dataApi.getGroupIds()[currentGroupIndex]; + char insertionCode = dataApi.getInsCodes()[currentGroupIndex]; + structInflator.setGroupInfo(dataApi.getGroupName(groupInd), currentGroupNumber, insertionCode, + dataApi.getGroupChemCompType(groupInd), atomCount, dataApi.getNumBonds(), dataApi.getGroupSingleLetterCode(groupInd), + dataApi.getGroupSequenceIndices()[currentGroupIndex], dataApi.getSecStructList()[currentGroupIndex]); + // A counter for the atom information + atomCounter = 0; + // Now read the next atoms + for (int i = 0; i < atomCount; i++) { + addAtomData(dataApi.getGroupAtomNames(groupInd), dataApi.getGroupElementNames(groupInd), dataApi.getGroupAtomCharges(groupInd), currentAtomIndex); + currentAtomIndex++; + // Now increment the atom counter for this group + atomCounter++; + } + addGroupBonds(dataApi.getGroupBondIndices(groupInd), dataApi.getGroupBondOrders(groupInd)); + return atomCount; + } + + + /** + * Add atom level data for a given atom. + * @param currentPdbGroup the group being considered. + * @param atomInfo the list of strings containing atom level information. + * @param currentAtomIndex the index of the current Atom + */ + private void addAtomData(String[] atomNames, String[] elementNames, int[] atomCharges, + int currentAtomIndex) { + // Now get all the relevant atom level information here + String atomName = atomNames[atomCounter]; + String element = elementNames[atomCounter]; + int charge = atomCharges[atomCounter]; + char alternativeLocationId = dataApi.getAltLocIds()[currentAtomIndex]; + int serialNumber = dataApi.getAtomIds()[currentAtomIndex]; + float x = dataApi.getxCoords()[currentAtomIndex]; + float z = dataApi.getzCoords()[currentAtomIndex]; + float y = dataApi.getyCoords()[currentAtomIndex]; + float occupancy = dataApi.getOccupancies()[currentAtomIndex]; + float temperatureFactor = dataApi.getbFactors()[currentAtomIndex]; + structInflator.setAtomInfo(atomName, serialNumber, alternativeLocationId, + x, y, z, occupancy, temperatureFactor, element, charge); + } + + /** + * Adds bond information for a group (residue). + * @param bondInds A list of integer pairs. Each pair indicates the indices for the bonds. + * Bond indices are specified internally within the group and start at 0. + * @param bondOrders A list of integers specifying the bond orders for each bond. + */ + private void addGroupBonds(int[] bondInds, int[] bondOrders) { + // Now add the bond information for this group + for (int thisBond = 0; thisBond < bondOrders.length; thisBond++) { + int thisBondOrder = bondOrders[thisBond]; + int thisBondIndOne = bondInds[thisBond * 2]; + int thisBondIndTwo = bondInds[thisBond * 2 + 1]; + structInflator.setGroupBond(thisBondIndOne, thisBondIndTwo, + thisBondOrder); + } + } + + + +} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/DecoderUtils.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/DecoderUtils.java index 975d9e6..f9d5bcb 100644 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/DecoderUtils.java +++ b/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/DecoderUtils.java @@ -1,121 +1,68 @@ package org.rcsb.mmtf.decoder; -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; +import org.rcsb.mmtf.api.DecodedDataInterface; +import org.rcsb.mmtf.api.DataTransferInterface; public class DecoderUtils { - - /** The number of bytes in an integer. */ - private static final int NUM_BYTES_IN_INT = 4; - /** The maximum number of chars in a chain entry. */ - private static final int MAX_CHARS_PER_CHAIN_ENTRY = 4; - /** - * Function to get the chain id for this chain. - * - * @param chainList the chain list - * @param thisChain the this chain - * @return the chain id + * Parses the bioassembly data and inputs it to the structure inflator */ - public final String getChainId(final byte[] chainList, final int thisChain) { - - int incrementor = 0; - StringBuilder sb = new StringBuilder(); - byte chainIdOne = chainList[thisChain - * - MAX_CHARS_PER_CHAIN_ENTRY + incrementor]; - sb.append((char) chainIdOne); - // Now get the next byte - incrementor += 1; - byte chainIdTwo = chainList[thisChain - * MAX_CHARS_PER_CHAIN_ENTRY + incrementor]; - if (chainIdTwo != (byte) 0) { - sb.append((char) chainIdTwo); - } - incrementor += 1; - byte chainIdThree = chainList[thisChain - * - MAX_CHARS_PER_CHAIN_ENTRY + incrementor]; - if (chainIdThree != (byte) 0) { - sb.append((char) chainIdThree); + public static void generateBioAssembly(DecodedDataInterface dataApi, DataTransferInterface structInflator) { + for (int i=0; i bioAssembly; + + /** The bond indices for bonds between groups*/ + private int[] interGroupBondIndices; + + /** The bond orders for bonds between groups*/ + private int[] interGroupBondOrders; + + /** The chosen list of chain ids */ + private String[] chainList; + + /** The mmtf version */ + private String mmtfVersion; + + /** The mmtf prodcuer */ + private String mmtfProducer; + + /** A list containing pdb group names for nucleic acids */ + List nucAcidList = new ArrayList<>(); + + /** The list of entities in this structure. */ + private Entity[] entityList; + + /** The PDB id */ + private String pdbId; + + /** The reported resolution of the dataset. */ + private Float resolution; + + /** The reported R Free of the model. */ + private Float rFree; + + /** The reported R Work of the model. */ + private Float rWork; + + /** The title of the model. */ + private String title; + + /** The list of experimental methods. */ + private String[] experimentalMethods; + + /** The deposition date of the structure */ + private String depositionDate; + + /** The release date of the structure */ + private String releaseDate; + + private int[] secStructInfo; + + + @Override + public float[] getxCoords() { + return cartnX; + } + + @Override + public float[] getyCoords() { + return cartnY; + } + + @Override + public float[] getzCoords() { + return cartnZ; + } + + @Override + public float[] getbFactors() { + return bFactor; + } + + @Override + public float[] getOccupancies() { + return occupancy; + } + + @Override + public int[] getAtomIds() { + return atomId; + } + + @Override + public char[] getAltLocIds() { + return altId; + } + + @Override + public char[] getInsCodes() { + return insertionCodeList; + } + + @Override + public int[] getGroupIds() { + return groupNum; + } + + @Override + public int[] getGroupTypeIndices() { + return groupList; + } + + @Override + public int[] getGroupSequenceIndices() { + return seqResGroupList; + } + + @Override + public String[] getChainNames() { + return publicChainIds; + } + + @Override + public int[] getChainsPerModel() { + return chainsPerModel; + } + + @Override + public int[] getGroupsPerChain() { + return groupsPerChain; + } + + @Override + public String getSpaceGroup() { + return spaceGroup; + } + + @Override + public float[] getUnitCell() { + return unitCell; + } + + @Override + public int[] getInterGroupBondIndices() { + return interGroupBondIndices; + } + + @Override + public int[] getInterGroupBondOrders() { + return interGroupBondOrders; + } + + @Override + public String[] getChainIds() { + return chainList; + } + + @Override + public String getMmtfVersion() { + return mmtfVersion; + } + + @Override + public String getMmtfProducer() { + return mmtfProducer; + } + + @Override + public String getStructureId() { + return pdbId; + } + + @Override + public int getNumGroups() { + return this.groupList.length; + } + + @Override + public int getNumChains() { + return this.chainList.length; + } + + @Override + public int getNumModels() { + return this.chainsPerModel.length; + } + + @Override + public int getNumAtoms() { + return this.cartnX.length; + } + + @Override + public float getRfree() { + if (rFree==null|| rFree ==0.0f) { + return MmtfBean.UNAVAILABLE_R_VALUE; + } + return rFree; + } + + @Override + public float getResolution() { + if (resolution==null || resolution==0.0f) { + return MmtfBean.UNAVAILABLE_RESOLUTION_VALUE; + } + return resolution; + } + + @Override + public float getRwork() { + if (rWork==null|| rWork ==0.0f) { + return MmtfBean.UNAVAILABLE_R_VALUE; + } + return rWork; + } + + @Override + public String getTitle() { + return title; + } + + @Override + public String[] getExperimentalMethods() { + return experimentalMethods; + } + + @Override + public String getGroupName(int groupInd) { + return groupMap[groupInd].getGroupName(); + } + + public int getNumAtomsInGroup(int groupInd) { + return groupMap[groupInd].getAtomChargeList().length; + } + + @Override + public String[] getGroupAtomNames(int groupInd) { + return groupMap[groupInd].getAtomNameList(); + } + + @Override + public String[] getGroupElementNames(int groupInd) { + return groupMap[groupInd].getElementList(); + + } + + @Override + public int[] getGroupBondOrders(int groupInd) { + return groupMap[groupInd].getBondOrderList(); + + } + + @Override + public int[] getGroupBondIndices(int groupInd) { + return groupMap[groupInd].getBondAtomList(); + } + + @Override + public int[] getGroupAtomCharges(int groupInd) { + return groupMap[groupInd].getAtomChargeList(); + } + + @Override + public char getGroupSingleLetterCode(int groupInd) { + return groupMap[groupInd].getSingleLetterCode(); + } + + @Override + public String getGroupChemCompType(int groupInd) { + return groupMap[groupInd].getChemCompType(); + } + + @Override + public String getEntityDescription(int entityInd) { + return entityList[entityInd].getDescription(); + } + + @Override + public String getEntityType(int entityInd) { + return entityList[entityInd].getType(); + + } + + @Override + public int[] getEntityChainIndexList(int entityInd) { + return entityList[entityInd].getChainIndexList(); + + } + + @Override + public String getEntitySequence(int entityInd) { + return entityList[entityInd].getSequence(); + + } + + @Override + public int getNumEntities() { + return entityList.length; + } + + @Override + public int getNumBioassemblies() { + return bioAssembly.size(); + } + + @Override + public int getNumTransInBioassembly(int bioassemblyIndex) { + return bioAssembly.get(bioassemblyIndex).getTransformList().size(); + } + + @Override + public int[] getChainIndexListForTransform(int bioassemblyIndex, int transformationIndex) { + return bioAssembly.get(bioassemblyIndex).getTransformList().get(transformationIndex).getChainIndexList(); + } + + @Override + public double[] getMatrixForTransform(int bioassemblyIndex, int transformationIndex) { + return bioAssembly.get(bioassemblyIndex).getTransformList().get(transformationIndex).getMatrix(); + } + + @Override + public String getDepositionDate() { + return depositionDate; + } + + @Override + public int getNumBonds() { + int numIntergroupBonds = interGroupBondOrders.length; + for(int groupIndex : groupList) { + numIntergroupBonds+=groupMap[groupIndex].getBondOrderList().length; + } + return numIntergroupBonds; + } + + @Override + public int[] getSecStructList() { + return secStructInfo; + } + + @Override + public String getReleaseDate() { + return releaseDate; + } + + +} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/ParsingParams.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/ParsingParams.java deleted file mode 100644 index dab58bb..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/ParsingParams.java +++ /dev/null @@ -1,38 +0,0 @@ -package org.rcsb.mmtf.decoder; - -/** - * The Class ParsingParams. - */ -public class ParsingParams { - - /** Whether to use internal chain ids or not. */ - private boolean parseInternal; - - /** - * Instantiates a new parsing params. - */ - public ParsingParams() { - parseInternal = false; - } - - /** - * Checks if is parses the internal chain ids. - * - * @return true, if is parses the internal - */ - public final boolean isParseInternal() { - return parseInternal; - } - - /** - * Sets whether to parse the internal chain ids. - * - * @param ifParseInternal the new parses the internal - */ - public final void setParseInternal(final boolean ifParseInternal) { - this.parseInternal = ifParseInternal; - } - - - -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/ReaderUtils.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/ReaderUtils.java new file mode 100644 index 0000000..421a54b --- /dev/null +++ b/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/ReaderUtils.java @@ -0,0 +1,128 @@ +package org.rcsb.mmtf.decoder; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.zip.GZIPInputStream; + +import org.rcsb.mmtf.dataholders.MmtfBean; +import org.rcsb.mmtf.deserializers.MessagePackDeserializer; + +public class ReaderUtils { + + /** The base url. */ + public static final String BASE_URL = "http://mmtf.rcsb.org/full/"; + /** The size of a chunk for a byte buffer. */ + private static final int BYTE_BUFFER_CHUNK_SIZE = 4096; + + /** + * Find the message pack byte array from the web using the input code and a base url. + * Caches the file if possible. + * @param inputCode + * @return the byte array + * @throws IOException + */ + public static MmtfBean getDataFromUrl(String inputCode) throws IOException { + // Get these as an inputstream + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + InputStream is = null; + URL url = new URL(BASE_URL + inputCode); + try { + is = url.openStream(); + byte[] byteChunk = new byte[BYTE_BUFFER_CHUNK_SIZE]; // Or whatever size you want to read in at a time. + int n; + while ( (n = is.read(byteChunk)) > 0 ) { + baos.write(byteChunk, 0, n); + } + } + catch (IOException e) { + System.err.printf ("Failed while reading bytes from %s: %s", url.toExternalForm(), e.getMessage()); + e.printStackTrace (); + } + finally { + if (is != null) { is.close(); } + } + byte[] b = baos.toByteArray(); + // Now return the gzip deflated and deserialized byte array + MessagePackDeserializer messagePackDeserializer = new MessagePackDeserializer(); + return messagePackDeserializer.deserialize(deflateGzip(b)); + } + + /** + * Deflate a gzip byte array. + * @param inputBytes -> gzip compressed byte + * array + * @return A deflated byte array + * @throws IOException Signals that an I/O exception has occurred. + */ + public static byte[] deflateGzip(final byte[] inputBytes){ + // Start the byte input stream + ByteArrayInputStream bis = new ByteArrayInputStream(inputBytes); + GZIPInputStream gis; + try { + gis = new GZIPInputStream(bis); + } catch (IOException e) { + System.err.println("Error in opening byte array."); + e.printStackTrace(); + return null; + } + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + // Make a buffer + byte[] tmp = new byte[BYTE_BUFFER_CHUNK_SIZE]; + try { + while (gis.available() == 1) { + int size = gis.read(tmp); + if(size==-1){ + break; + } + baos.write(tmp, 0, size); + } + } + catch (Exception ex) { + ex.printStackTrace(); + return null; + } + finally { + try { + if (baos != null) { + baos.close(); + } + } catch (Exception ex) { + ex.printStackTrace(); + return null; + } + } + // Get the bytes + byte[] outArr = baos.toByteArray(); + return outArr; + } + + /** + * A function to get MMTF data from a file path + * @param filePath + * @return the deserialized mmtfBean + * @throws IOException + */ + public static MmtfBean getDataFromFile(String filePath) throws IOException { + // Now return the gzip deflated and deserialized byte array + MessagePackDeserializer messagePackDeserializer = new MessagePackDeserializer(); + return messagePackDeserializer.deserialize(readFile(filePath)); + } + + /** + * Read a byte array from a file + * @param filePath the input file path + * @return the returned byte array + * @throws IOException + */ + private static byte[] readFile(String filePath) throws IOException { + Path path = Paths.get(filePath); + byte[] data = Files.readAllBytes(path); + return data; + } +} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/SimpleDataApi.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/SimpleDataApi.java deleted file mode 100644 index 843df4b..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/decoder/SimpleDataApi.java +++ /dev/null @@ -1,362 +0,0 @@ -package org.rcsb.mmtf.decoder; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.msgpack.jackson.dataformat.MessagePackFactory; -import org.rcsb.mmtf.api.DataApiInterface; -import org.rcsb.mmtf.arraydecompressors.DeltaDeCompress; -import org.rcsb.mmtf.arraydecompressors.RunLengthDecodeInt; -import org.rcsb.mmtf.arraydecompressors.RunLengthDecodeString; -import org.rcsb.mmtf.arraydecompressors.RunLengthDelta; -import org.rcsb.mmtf.dataholders.BioAssemblyData; -import org.rcsb.mmtf.dataholders.Entity; -import org.rcsb.mmtf.dataholders.MmtfBean; -import org.rcsb.mmtf.dataholders.PDBGroup; - -import com.fasterxml.jackson.databind.ObjectMapper; - -public class SimpleDataApi implements DataApiInterface { - - - public SimpleDataApi(byte[] inputByteArr) { - - - MmtfBean inputData = null; - try { - inputData = new ObjectMapper(new MessagePackFactory()).readValue(inputByteArr, MmtfBean.class); - } catch (IOException e) { - // - System.err.println("Error converting Byte array to message pack. IOError"); - e.printStackTrace(); - throw new RuntimeException(); - } - - // Get the decompressors to build in the data structure - DeltaDeCompress deltaDecompress = new DeltaDeCompress(); - RunLengthDelta intRunLengthDelta = new RunLengthDelta(); - RunLengthDecodeInt intRunLength = new RunLengthDecodeInt(); - RunLengthDecodeString stringRunlength = new RunLengthDecodeString(); - DecoderUtils decoderUtils = new DecoderUtils(); - - // Get the data - try { - groupList = decoderUtils.bytesToInts(inputData.getGroupTypeList()); - // Read the byte arrays as int arrays - cartnX = decoderUtils.decodeIntsToFloats(deltaDecompress.decompressByteArray(inputData.getxCoordBig(), inputData.getxCoordSmall()), MmtfBean.COORD_DIVIDER); - cartnY = decoderUtils.decodeIntsToFloats(deltaDecompress.decompressByteArray(inputData.getyCoordBig(), inputData.getyCoordSmall()), MmtfBean.COORD_DIVIDER); - cartnZ = decoderUtils.decodeIntsToFloats(deltaDecompress.decompressByteArray(inputData.getzCoordBig(), inputData.getzCoordSmall()), MmtfBean.COORD_DIVIDER); - bFactor = decoderUtils.decodeIntsToFloats(deltaDecompress.decompressByteArray(inputData.getbFactorBig(),inputData.getbFactorSmall()), MmtfBean.OCCUPANCY_BFACTOR_DIVIDER); - occupancy = decoderUtils.decodeIntsToFloats(intRunLength.decompressByteArray(inputData.getOccList()), MmtfBean.OCCUPANCY_BFACTOR_DIVIDER); - atomId = intRunLengthDelta.decompressByteArray(inputData.getAtomIdList()); - altId = stringRunlength.stringArrayToChar( - (ArrayList) inputData.getAltLabelList()); - // Get the insertion code - insertionCodeList = stringRunlength.stringArrayToChar( - (ArrayList) inputData.getInsCodeList()); - // Get the groupNumber - groupNum = intRunLengthDelta.decompressByteArray( - inputData.getGroupIdList()); - groupMap = inputData.getGroupMap(); - // Get the seqRes groups - seqResGroupList = intRunLengthDelta.decompressByteArray(inputData.getSeqResIdList()); - // Get the number of chains per model - chainsPerModel = inputData.getChainsPerModel(); - groupsPerChain = inputData.getGroupsPerChain(); - // Get the internal and public facing chain ids - publicChainIds = decoderUtils.decodeChainList(inputData.getChainNameList()); - chainList = decoderUtils.decodeChainList(inputData.getChainIdList()); - spaceGroup = inputData.getSpaceGroup(); - unitCell = inputData.getUnitCell(); - bioAssembly = inputData.getBioAssemblyList(); - interGroupBondIndices = decoderUtils.bytesToInts(inputData.getBondAtomList()); - interGroupBondOrders = decoderUtils.bytesToByteInts(inputData.getBondOrderList()); - mmtfVersion = inputData.getMmtfVersion(); - mmtfProducer = inputData.getMmtfProducer(); - entityList = inputData.getEntityList(); - pdbId = inputData.getPdbId(); - // Now get the header data - rFree = inputData.getrFree(); - rWork = inputData.getrWork(); - resolution = inputData.getResolution(); - title = inputData.getTitle(); - experimentalMethods = inputData.getExperimentalMethods(); - - - } - catch (IOException ioException){ - System.err.println("Error reading in byte arrays from message pack"); - ioException.printStackTrace(); - throw new RuntimeException(); - } - } - - - /** The X coordinates */ - private float[] cartnX; - - /** The Y coordinates */ - private float[] cartnY; - - /** The Z coordinates */ - private float[] cartnZ; - - /** The X coordinates */ - private float[] bFactor; - - /** The Y coordinates */ - private float[] occupancy; - - /** The atom id. */ - private int[] atomId; - - /** The alt id. */ - private char[] altId; - - /** The ins code. */ - private char[] insertionCodeList; - - /** The group num. */ - private int[] groupNum; - - /** The group map. */ - private Map groupMap; - - /** The group list. */ - private int[] groupList; - - /** The sequence ids of the groups */ - private int[] seqResGroupList; - - /** The public facing chain ids*/ - private String[] publicChainIds; - - /** The number of chains per model*/ - private int[] chainsPerModel; - - /** The number of groups per (internal) chain*/ - private int[] groupsPerChain; - - /** The space group of the structure*/ - private String spaceGroup; - - /** The unit cell of the structure*/ - private float[] unitCell; - - /** The bioassembly information for the structure*/ - private List bioAssembly; - - /** The bond indices for bonds between groups*/ - private int[] interGroupBondIndices; - - /** The bond orders for bonds between groups*/ - private int[] interGroupBondOrders; - - /** The chosen list of chain ids */ - private String[] chainList; - - /** The mmtf version */ - private String mmtfVersion; - - /** The mmtf prodcuer */ - private String mmtfProducer; - - /** A list containing pdb group names for nucleic acids */ - List nucAcidList = new ArrayList<>(); - - /** The list of entities in this structure. */ - private Entity[] entityList; - - /** The PDB id */ - private String pdbId; - - /** The reported resolution of the dataset. */ - private float resolution; - - /** The reported R Free of the model. */ - private float rFree; - - /** The reported R Work of the model. */ - private float rWork; - - /** The title of the model. */ - private String title; - - /** The list of experimental methods. */ - private List experimentalMethods; - - @Override - public float[] getXcoords() { - return cartnX; - } - - @Override - public float[] getYcoords() { - return cartnY; - } - - @Override - public float[] getZcoords() { - return cartnZ; - } - - @Override - public float[] getBfactors() { - return bFactor; - } - - @Override - public float[] getOccupancies() { - return occupancy; - } - - @Override - public int[] getAtomIds() { - return atomId; - } - - @Override - public char[] getAltLocIds() { - return altId; - } - - @Override - public char[] getInsCodes() { - return insertionCodeList; - } - - @Override - public int[] getResidueNums() { - return groupNum; - } - - @Override - public Map getGroupMap() { - return groupMap; - } - - @Override - public int[] getGroupIndices() { - return groupList; - } - - @Override - public int[] getSeqResGroupIndices() { - return seqResGroupList; - } - - @Override - public String[] getChainNames() { - return publicChainIds; - } - - @Override - public int[] getChainsPerModel() { - return chainsPerModel; - } - - @Override - public int[] getGroupsPerChain() { - return groupsPerChain; - } - - @Override - public String getSpaceGroup() { - return spaceGroup; - } - - @Override - public float[] getUnitCell() { - return unitCell; - } - - @Override - public List getBioAssemblyList() { - return bioAssembly; - } - - @Override - public int[] getInterGroupBondIndices() { - return interGroupBondIndices; - } - - @Override - public int[] getInterGroupBondOrders() { - return interGroupBondOrders; - } - - @Override - public String[] getChainIds() { - return chainList; - } - - @Override - public String getMmtfVersion() { - return mmtfVersion; - } - - @Override - public String getMmtfProducer() { - return mmtfProducer; - } - - @Override - public Entity[] getEntityList() { - return entityList; - } - - @Override - public String getPdbId() { - return pdbId; - } - - @Override - public int getNumResidues() { - return this.groupList.length; - } - - @Override - public int getNumChains() { - return this.chainList.length; - } - - @Override - public int getNumModels() { - return this.chainsPerModel.length; - } - - @Override - public int getNumAtoms() { - return this.cartnX.length; - } - - @Override - public float getRfree() { - return rFree; - } - - @Override - public float getResolution() { - return resolution; - } - - @Override - public float getRwork() { - return rWork; - } - - @Override - public String getTitle() { - return title; - } - - @Override - public List getExperimentalMethods() { - return experimentalMethods; - } - - - -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/deserializers/MessagePackDeserializer.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/deserializers/MessagePackDeserializer.java new file mode 100644 index 0000000..271224b --- /dev/null +++ b/mmtf-decoder/src/main/java/org/rcsb/mmtf/deserializers/MessagePackDeserializer.java @@ -0,0 +1,24 @@ +package org.rcsb.mmtf.deserializers; + +import java.io.IOException; + +import org.msgpack.jackson.dataformat.MessagePackFactory; +import org.rcsb.mmtf.dataholders.MmtfBean; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * A class to deserialize messaepack data into an mmtfBean + * @author Anthony Bradley + * + */ +public class MessagePackDeserializer { + + public MmtfBean deserialize(byte[] byteArray) throws JsonParseException, JsonMappingException, IOException { + MmtfBean mmtfBean = null; + mmtfBean = new ObjectMapper(new MessagePackFactory()).readValue(byteArray, MmtfBean.class); + return mmtfBean; + } +} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/examples/HandleIO.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/examples/HandleIO.java deleted file mode 100644 index 5c81191..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/examples/HandleIO.java +++ /dev/null @@ -1,308 +0,0 @@ -package org.rcsb.mmtf.examples; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.zip.GZIPInputStream; - -import org.rcsb.mmtf.api.DataApiInterface; -import org.rcsb.mmtf.decoder.SimpleDataApi; - -/** - * Some helper functions and utility functions to get structures from BioJava. - * Really just for canary release and testing. - * @author Anthony Bradley - * - */ -public class HandleIO { - - /** The base url. */ - public static final String BASE_URL = "http://mmtf.rcsb.org/full/"; - /** The index to get the middle two characters of a PDB id. */ - private static final int END_ID_FOR_MID_PDB = 3; - /** The size of a chunk for a byte buffer. */ - private static final int BYTE_BUFFER_CHUNK_SIZE = 4096; - - /** - * Gets the biojava structure from a url. - * - * @param inputCode the input code - * @return A biojava structure object - */ - public final byte[] getByteArrFromUrlOrFile(final String inputCode) { - String basePath = getBasePath(); - boolean isFile = getFile(basePath, inputCode); - // If it's a file on the file system - get it - if (isFile) { - return getFromFileSystem(basePath, inputCode); - } - try { - return getFromUrl(inputCode); - } catch (IOException e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - - /** - * Gets the data API from a url. - * @param inputCode the input code - * @return - */ - public final DataApiInterface getDataApiFromUrlOrFile(final String inputCode) { - String basePath = getBasePath(); - boolean isFile = getFile(basePath, inputCode); - // If it's a file on the file system - get it - if (isFile) { - return new SimpleDataApi(getFromFileSystem(basePath, inputCode)); - } - try { - return new SimpleDataApi(getFromUrl(inputCode)); - } catch (IOException e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - - - /** - * Get from a cached file on the file system. - * @param inputCode The four letter pdb code to fine - * @return The message pack byte array - */ - public final byte[] getFromFile(final String inputCode) { - String basePath = getBasePath(); - String fullPath = constructPath(basePath, inputCode); - return getFromFileSystem(fullPath); - - - } - - /** - * Find the message pack byte array from the web using the input code. - * Uses the server specified in BASE_URL param. - * Caches the file if possible. - * @param inputCode - * @return - * @throws IOException - */ - public final byte[] getFromUrl(final String inputCode) throws IOException { - return getFromUrl(inputCode, BASE_URL); - } - - /** - * Find the message pack byte array from the web using the input code and a base url. - * Caches the file if possible. - * @param inputCode - * @param baseUrl - * @return - * @throws IOException - */ - public final byte[] getFromUrl(final String inputCode, String baseUrl) throws IOException { - // Get the base path - String basePath = getBasePath(); - // Get these as an inputstream - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - InputStream is = null; - URL url = new URL(baseUrl + inputCode); - try { - is = url.openStream(); - byte[] byteChunk = new byte[4096]; // Or whatever size you want to read in at a time. - int n; - - while ( (n = is.read(byteChunk)) > 0 ) { - baos.write(byteChunk, 0, n); - } - } - catch (IOException e) { - System.err.printf ("Failed while reading bytes from %s: %s", url.toExternalForm(), e.getMessage()); - e.printStackTrace (); - // Perform any other exception handling that's appropriate. - } - finally { - if (is != null) { is.close(); } - } - byte[] b = baos.toByteArray(); - // Cache the data on the file system - cacheFile(b, basePath, inputCode); - // Now return the gzip deflated byte array - return deflateGzip(b); - } - - /** - * Gets the file from the file system. Specify the pdb id and the base path - * - * @param basePath the base path - * @param pdbCode the pdb code - * @return the from file system - * @throws FileNotFoundException the file not found exception - * @throws IOException Signals that an I/O exception has occurred. - */ - private final byte[] getFromFileSystem(final String basePath, final String pdbCode) { - - String fullFilePath = constructPath(basePath, pdbCode); - // Now return the message pack byte[] - return getFromFileSystem(fullFilePath); - } - - /** - * Construct the full path for the file to be stored on the file system. - * @param basePath The base directory (PDB_DIR in Biojava) - * @param pdbCode The four letter pdb code - * @return The full path, including suffic to be written out. - */ - private String constructPath(String basePath, String pdbCode) { - return basePath + "/data/structures/divided/msgpack" + "/" + pdbCode.substring(1, END_ID_FOR_MID_PDB) + "/" + pdbCode + ".mmtf"; - } - - /** - * Function to get a file from the file system - full path supplied. - * - * @param fullPath the full path - * @return the from file system - * @throws FileNotFoundException the file not found exception - * @throws IOException Signals that an I/O exception has occurred. - */ - private final byte[] getFromFileSystem(final String fullPath) { - // Get these as an inputstream - byte[] inputByteArr; - try { - inputByteArr = Files.readAllBytes(Paths.get(fullPath)); - } catch (IOException e) { - System.err.println("Could not find file: "+fullPath); - return null; - } - // Now return it - return deflateGzip(inputByteArr); - } - - /** - * Gets the file form the file system - * - * @param basePath the base path - * @param pdbId the pdb id - * @return the file - */ - private boolean getFile(final String basePath, final String pdbId) { - // Set the path for the file - if (basePath == null) { - System.out.println("Can't get - PDB_DIR and PDB_CACHE_DIR not specified"); - return false; - } - String dirPath = basePath - + "/data/structures/divided/msgpack/" - + pdbId.substring(1, END_ID_FOR_MID_PDB) + "/"; - String filePath = dirPath + pdbId + ".mmtf"; - File thisFile = new File(filePath); - return thisFile.exists(); - } - - /** - * Cache file. - * - * @param b the b - * @param basePath the base path - * @param pdbId the pdb id - * @throws IOException Signals that an I/O exception has occurred. - */ - private void cacheFile(final byte[] b, final String basePath, final String pdbId) { - // Set the path for the file - if (basePath == null) { - System.out.println("Not caching - PDB_DIR and PDB_CACHE_DIR not specified"); - return; - } - String dirPath = basePath - + "/data/structures/divided/msgpack/" - + pdbId.substring(1, END_ID_FOR_MID_PDB) + "/"; - String filePath = dirPath + pdbId + ".mmtf"; - - File thisFile = new File(dirPath); - boolean success = thisFile.mkdirs(); - if(success){ - System.out.println("Made base files"); - } - try { - FileOutputStream fos = new FileOutputStream(filePath); - fos.write(b); - fos.close(); - } catch (IOException e) { - // Error in caching the file - System.err.println("Error in caching file on file system: " + filePath); - return; - } - - } - - /** - * Deflate a gzip byte array. - * - * @param inputBytes -> gzip compressed byte - * array - * @return A deflated byte array - * @throws IOException Signals that an I/O exception has occurred. - */ - private byte[] deflateGzip(final byte[] inputBytes){ - // Start the byte input stream - ByteArrayInputStream bis = new ByteArrayInputStream(inputBytes); - GZIPInputStream gis; - try { - gis = new GZIPInputStream(bis); - } catch (IOException e) { - System.err.println("Error in opening byte array."); - e.printStackTrace(); - return null; - } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - // Make a buffer - byte[] tmp = new byte[BYTE_BUFFER_CHUNK_SIZE]; - try { - while (gis.available() == 1) { - int size = gis.read(tmp); - if(size==-1){ - break; - } - baos.write(tmp, 0, size); - } - } - catch (Exception ex) { - ex.printStackTrace(); - return null; - } - finally { - try { - if (baos != null) { - baos.close(); - } - } catch (Exception ex) { - ex.printStackTrace(); - return null; - } - } - // Get the bytes - byte[] outArr = baos.toByteArray(); - return outArr; - } - - /** - * Get the base path to cache the data in - * @return The base path to store the data in. Defined by environment variables. - */ - private String getBasePath() { - // First try to get it from a local file - String basePath = System.getProperty("PDB_CACHE_DIR"); - if (basePath == null) { - System.out.println("PDB_CACHE_DIR not available"); - basePath = System.getProperty("PDB_DIR"); - } - return basePath; - } -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/examples/HelloWorld.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/examples/HelloWorld.java deleted file mode 100644 index 4ca2b69..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/examples/HelloWorld.java +++ /dev/null @@ -1,17 +0,0 @@ -package org.rcsb.mmtf.examples; - -import org.rcsb.mmtf.api.DataApiInterface; -import org.rcsb.mmtf.dataholders.PDBGroup; - -public class HelloWorld { - - public static void main(String[] args) { - HandleIO handleIO = new HandleIO(); - DataApiInterface dataApi = handleIO.getDataApiFromUrlOrFile("4cup"); - System.out.println("PDB Code: "+dataApi.getPdbId()+" has "+dataApi.getNumChains()+" chains"); - PDBGroup pdbGroup = dataApi.getGroupMap().get(0); - System.out.println("HET group "+pdbGroup.getGroupName()+" has the following atomic charges: "+pdbGroup.getAtomCharges()); - System.out.println("PDB Code: "+dataApi.getPdbId()+" has "+dataApi.getBioAssemblyList().size()+" bioassemblies"); - } - -} diff --git a/mmtf-decoder/src/main/java/org/rcsb/mmtf/examples/package-info.java b/mmtf-decoder/src/main/java/org/rcsb/mmtf/examples/package-info.java deleted file mode 100644 index ae4a2d6..0000000 --- a/mmtf-decoder/src/main/java/org/rcsb/mmtf/examples/package-info.java +++ /dev/null @@ -1,8 +0,0 @@ -/** - * Example functions using the decoder. - */ -/** - * @author Anthony Bradley - * - */ -package org.rcsb.mmtf.examples; diff --git a/mmtf-decoder/src/test/java/org/rcsb/mmtf/arraydecompressors/TestArrayDecompressor.java b/mmtf-decoder/src/test/java/org/rcsb/mmtf/arraydecompressors/TestArrayDecompressor.java deleted file mode 100644 index b1927f8..0000000 --- a/mmtf-decoder/src/test/java/org/rcsb/mmtf/arraydecompressors/TestArrayDecompressor.java +++ /dev/null @@ -1,123 +0,0 @@ -package org.rcsb.mmtf.arraydecompressors; - - -import java.io.ByteArrayOutputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.ArrayList; - -import org.junit.Test; -import org.rcsb.mmtf.arraydecompressors.DeltaDeCompress; -import org.rcsb.mmtf.arraydecompressors.RunLengthDecodeInt; -import org.rcsb.mmtf.arraydecompressors.RunLengthDecodeString; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertArrayEquals; - -/** - * Tests for the array decompressor library. - * @author Anthony Bradley - * - */ -public class TestArrayDecompressor { - - - /** The Constant NUMBER_INTS. */ - private static final int NUMBER_INTS = 100; - - /** The Constant REPEITITONS. */ - private static final int REPEITITONS = 1000; - - /** The Constant NUMBER_CHARS. */ - private static final int NUMBER_CHARS = 100; - - /** The Constant INITIAL_INT. */ - private static final int INITIAL_INT = 3000; - - /** The Constant TOTAL_LENGTH. */ - private static final int TOTAL_LENGTH = 100; - /** - * Run length decode int test. - */ - @Test - public final void runLengthDecodeIntTest() { - - RunLengthDecodeInt rlds = new RunLengthDecodeInt(); - // Set the size and character - - // Build the test array - ArrayList testArray = new ArrayList(); - for (int i = 0; i < NUMBER_INTS; i++) { - testArray.add(REPEITITONS); - } - // Add the lists - ArrayList otherArray = new ArrayList(); - otherArray.add(REPEITITONS); - otherArray.add(NUMBER_INTS); - // Check theyr'e the same - assertEquals(testArray, rlds.decompressIntArray(otherArray)); - } - - /** - * Run lenght decode string test. - */ - @Test - public final void runLenghtDecodeStringTest() { - - RunLengthDecodeString rlds = new RunLengthDecodeString(); - // Set the size and character - String charRep = "A"; - // Build the test array - ArrayList testArray = new ArrayList(); - for (int i = 0; i < NUMBER_CHARS; i++) { - testArray.add(charRep); - } - // Add the lists - ArrayList otherArray = new ArrayList(); - otherArray.add(charRep); - otherArray.add(Integer.toString(NUMBER_CHARS)); - // Check theyr'e the same - assertEquals(testArray, rlds.deCompressStringArray(otherArray)); - } - - /** - * Delta decompressor test. - * - * @throws IOException Signals that an I/O exception has occurred. - */ - @Test - public final void deltaDecompressorTest() throws IOException { - - DeltaDeCompress ddc = new DeltaDeCompress(); - // Now let's generate the byte arrays for the test data - ByteArrayOutputStream bigBos = new ByteArrayOutputStream(); - DataOutputStream bigDos = new DataOutputStream(bigBos); - ByteArrayOutputStream littleBos = new ByteArrayOutputStream(); - DataOutputStream littleDos = new DataOutputStream(littleBos); - - // Set the size of the start and lenght of the aray - - // Make the big byte array - bigDos.writeInt(INITIAL_INT); - bigDos.writeInt(TOTAL_LENGTH); - // Now write the shorts - for (int i = 0; i < TOTAL_LENGTH; i++) { - littleDos.writeShort(1); - } - - // Get the test array - int[] testArray = new int[TOTAL_LENGTH + 1]; - testArray[0] = INITIAL_INT; - int incrementorInt = INITIAL_INT; - for (int i = 1; i < TOTAL_LENGTH + 1; i++) { - incrementorInt += 1; - testArray[i] = incrementorInt; - } - // Now proccess these - int[] outArray = ddc.decompressByteArray(bigBos.toByteArray(), - littleBos.toByteArray()); - // Check if there the same - assertArrayEquals(outArray, testArray); - } - -} diff --git a/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/DummyApiImpl.java b/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/DummyApiImpl.java new file mode 100644 index 0000000..19d474f --- /dev/null +++ b/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/DummyApiImpl.java @@ -0,0 +1,288 @@ +package org.rcsb.mmtf.decoder; + +import org.rcsb.mmtf.api.DecodedDataInterface; + +public class DummyApiImpl implements DecodedDataInterface { + + + public final int numAtoms = 1; + public final int numGroups = 1; + public final int atomsPerGroup = 1; + public final int bondsPerGroup = 1; + public final int interGroupBonds = 1; + public final int numChains = 1; + public final int numModels = 1; + + + @Override + public float[] getxCoords() { + return new float[numAtoms]; + } + + @Override + public float[] getyCoords() { + return new float[numAtoms]; + } + + @Override + public float[] getzCoords() { + return new float[numAtoms]; + } + + @Override + public float[] getbFactors() { + return new float[numAtoms]; + } + + @Override + public float[] getOccupancies() { + return new float[numAtoms]; + } + + @Override + public int[] getAtomIds() { + return new int[numAtoms]; + } + + @Override + public char[] getAltLocIds() { + return new char[numAtoms]; + } + + @Override + public char[] getInsCodes() { + return new char[numGroups]; + } + + @Override + public int[] getGroupIds() { + return new int[numGroups]; + } + + @Override + public String getGroupName(int groupInd) { + return "NAME"; + } + + @Override + public int getNumAtomsInGroup(int groupInd) { + return atomsPerGroup; + } + + @Override + public String[] getGroupAtomNames(int groupInd) { + return new String[atomsPerGroup]; + } + + @Override + public String[] getGroupElementNames(int groupInd) { + return new String[atomsPerGroup]; + } + + @Override + public int[] getGroupBondOrders(int groupInd) { + return new int[bondsPerGroup]; + } + + @Override + public int[] getGroupBondIndices(int groupInd) { + return new int[bondsPerGroup*2]; + } + + @Override + public int[] getGroupAtomCharges(int groupInd) { + return new int[atomsPerGroup]; + } + + @Override + public char getGroupSingleLetterCode(int groupInd) { + return 0; + } + + @Override + public String getGroupChemCompType(int groupInd) { + return "CHEM"; + } + + @Override + public int[] getGroupTypeIndices() { + return new int[numGroups]; + } + + @Override + public int[] getGroupSequenceIndices() { + return new int[numGroups]; + } + + @Override + public String[] getChainIds() { + return new String[numChains]; + } + + @Override + public String[] getChainNames() { + return new String[numChains]; + } + + @Override + public int[] getChainsPerModel() { + + return new int[] {numChains}; + } + + @Override + public int[] getGroupsPerChain() { + return new int[] {numGroups}; + } + + @Override + public String getSpaceGroup() { + return "SPACE"; + } + + @Override + public float[] getUnitCell() { + return new float[6]; + } + + @Override + public int getNumBioassemblies() { + return 1; + } + + @Override + public int getNumTransInBioassembly(int bioassemblyIndex) { + return 1; + } + + @Override + public int[] getChainIndexListForTransform(int bioassemblyIndex, int transformationIndex) { + return new int[1]; + } + + @Override + public double[] getMatrixForTransform(int bioassemblyIndex, int transformationIndex) { + return new double[1]; + } + + @Override + public int[] getInterGroupBondIndices() { + return new int[interGroupBonds*2]; + + } + + @Override + public int[] getInterGroupBondOrders() { + return new int[interGroupBonds]; + } + + @Override + public String getMmtfVersion() { + return "VERS"; + } + + @Override + public String getMmtfProducer() { + return "PROD"; + } + + @Override + public int getNumEntities() { + return 1; + } + + @Override + public String getEntityDescription(int entityInd) { + return "DESC"; + } + + @Override + public String getEntityType(int entityInd) { + return "TYPE"; + } + + @Override + public int[] getEntityChainIndexList(int entityInd) { + return new int[] {0}; + } + + @Override + public String getEntitySequence(int entityInd) { + return "SEQ"; + } + + @Override + public String getStructureId() { + return "1EG1"; + } + + @Override + public int getNumModels() { + return 1; + } + + @Override + public int getNumBonds() { + return bondsPerGroup*numGroups+interGroupBonds; + } + + @Override + public int getNumChains() { + return numChains; + } + + @Override + public int getNumGroups() { + return numGroups; + } + + @Override + public int getNumAtoms() { + return numAtoms; + } + + @Override + public float getRfree() { + return 1.0f; + + } + + @Override + public float getRwork() { + return 1.0f; + + } + + @Override + public float getResolution() { + return 1.0f; + } + + @Override + public String getTitle() { + return "NA"; + } + + @Override + public String[] getExperimentalMethods() { + return new String[] {"NA"}; + + } + + @Override + public String getDepositionDate() { + return "NA"; + + } + + @Override + public String getReleaseDate() { + return "NA"; + } + + @Override + public int[] getSecStructList() { + return new int[numGroups]; + } + +} diff --git a/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/DummyTransferImpl.java b/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/DummyTransferImpl.java new file mode 100644 index 0000000..380dd0a --- /dev/null +++ b/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/DummyTransferImpl.java @@ -0,0 +1,71 @@ +package org.rcsb.mmtf.decoder; + +import org.rcsb.mmtf.api.DataTransferInterface; + +public class DummyTransferImpl implements DataTransferInterface { + + @Override + public void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups, int totalNumChains, + int totalNumModels, String structureId) { + + } + + @Override + public void finalizeStructure() { + + } + + @Override + public void setModelInfo(int modelId, int chainCount) { + + } + + @Override + public void setChainInfo(String chainId, String chainName, int groupCount) { + + } + + @Override + public void setEntityInfo(int[] chainIndices, String sequence, String description, String title) { + + } + + @Override + public void setGroupInfo(String groupName, int groupNumber, char insertionCode, String groupType, int atomCount, + int boundCount, char singleLetterCode, int sequenceIndex, int secondaryStructureType) { + + } + + @Override + public void setAtomInfo(String atomName, int serialNumber, char alternativeLocationId, float x, float y, float z, + float occupancy, float temperatureFactor, String element, int charge) { + + } + + @Override + public void setBioAssemblyTrans(int bioAssemblyIndex, int[] inputChainIndices, double[] inputTransform) { + + } + + @Override + public void setXtalInfo(String spaceGroup, float[] unitCell) { + + } + + @Override + public void setGroupBond(int atomIndexOne, int atomIndexTwo, int bondOrder) { + + } + + @Override + public void setInterGroupBond(int atomIndexOne, int atomIndexTwo, int bondOrder) { + + } + + @Override + public void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate, + String releaseDate, String[] experimnetalMethods) { + + } + +} diff --git a/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/TestArrayConverters.java b/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/TestArrayConverters.java new file mode 100644 index 0000000..7509d1a --- /dev/null +++ b/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/TestArrayConverters.java @@ -0,0 +1,132 @@ +package org.rcsb.mmtf.decoder; + +import org.junit.Test; + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.nio.ByteBuffer; + + +public class TestArrayConverters { + + + /** + * Test the decoding of the chain list + */ + @Test + public final void testConvertChainList() { + byte[] byteArray = {'A','\0','\0','\0', + 'A','B','C','\0', + 'a','b','c','\0'}; + String[] testStringList = {"A","ABC","abc"}; + String[] stringList = ArrayConverters.decodeChainList(byteArray); + assertArrayEquals(testStringList, stringList); + } + + + /** + * Test the conversion of the integer array to a float + */ + @Test + public final void testConvertIntToFloat() { + int[] intArray = {10001,100203,124542}; + float[] testFloatArray = {10.001f,100.203f,124.542f}; + float divider = 1000.0f; + float[] floatArray = ArrayConverters.convertIntsToFloats(intArray,divider); + assertArrayEquals(testFloatArray, floatArray, 0.0f); + } + + /** + * Test the conversion of byte arrays to one byte integer arrays + * @throws IOException + */ + @Test + public final void oneByteToIntegersTest() throws IOException { + int[] testIntArray = {12,123,24}; + byte[] byteArray = {(byte) 12, (byte) 123, (byte) 24}; + int[] intArray = ArrayConverters.convertByteToIntegers(byteArray); + assertArrayEquals(testIntArray, intArray); + } + + /** + * Test the conversion of byte arrays to two byte integer arrays + * @throws IOException + */ + @Test + public final void twoByteToIntegersTest() throws IOException { + int[] testIntArray = {1000,1002,546}; + byte[] byteArray = getByteArray(testIntArray,2); + int[] intArray = ArrayConverters.convertTwoByteToIntegers(byteArray); + assertArrayEquals(testIntArray, intArray); + } + + /** + * Test the conversion of byte arrays to four byte integer arrays + * @throws IOException + */ + @Test + public final void fourByteToIntegersTest() throws IOException { + int[] testIntArray = {32403,11200,100090}; + byte[] byteArray = getByteArray(testIntArray,4); + int[] intArray = ArrayConverters.convertFourByteToIntegers(byteArray); + assertArrayEquals(testIntArray, intArray); + } + + /** + * Utiliy function to get a byte array. I don't really like this but at least + * it's an orthogonal approach. + * @param inArray the input int array + * @param numBytes the number of bytes per integer + * @return the output byte array + */ + private byte[] getByteArray(int[] inArray, int numBytes) { + byte[] outBytes = new byte[inArray.length*numBytes]; + for(int i=0; i testList = new ArrayList<>(); - //Loop through and add one, two, three and four character strings - for(int i = 0; i < NUM_EXAMPLES; i++){ - int numChars = randGenerator.nextInt(MAX_CHARS_PER_CHAIN); - if(numChars==0){ - numChars = 1; - } - int numBlank = MAX_CHARS_PER_CHAIN - numChars; - StringBuilder stringBuild = new StringBuilder(); - for(int j=0; j< numChars; j++){ - char c = (char)(randGenerator.nextInt(26) + 'a'); - stringBuild.append(c); - bos.write(c); - } - for(int j=0; j< numBlank; j++){ - - bos.write((byte) 0); - - } - testList.add(stringBuild.toString()); - } - int counter = 0; - byte[] testByteArr = bos.toByteArray(); - for(String testChainId : testList){ - assertEquals(testChainId, decoderUtils.getChainId(testByteArr, counter)); - counter += 1; - } - - } - - @Test - public void bytesToIntsTests() throws IOException { - - // The input byte array of one byte integers - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - int[] testIntArr = new int[NUM_EXAMPLES]; - // Get the int stream of random numbers - for(int i = 0; i < NUM_EXAMPLES; i++){ - int currInt = randGenerator.nextInt(); - bos.write(ByteBuffer.allocate(4).putInt(currInt).array()); - testIntArr[i] = currInt; - } - // Now check they are the same - assertArrayEquals(testIntArr, decoderUtils.bytesToInts(bos.toByteArray())); - - - } - - @Test - public void bytesToByteIntsTests() throws IOException { - - // The input byte array of one byte integers - byte[] inputByteArr = new byte[NUM_EXAMPLES]; - int[] testIntArr = new int[NUM_EXAMPLES]; - // Get the int stream of random numbers - for(int i = 0; i < NUM_EXAMPLES; i++){ - int currInt = randGenerator.nextInt(Byte.MAX_VALUE); - inputByteArr[i] = (byte) currInt; - testIntArr[i] = currInt; - } - assertArrayEquals(testIntArr, decoderUtils.bytesToByteInts(inputByteArr)); - } - - @Test - public void convertIntToFloatTest() { - int[] inputData = {10213, 20303, 102, 183, 1021}; - float[] outPutData = {(float) 10.213, (float) 20.303, (float) 0.102, (float) 0.183, (float) 1.021}; - float [] outPutToTest = decoderUtils.decodeIntsToFloats(inputData, (float) 1000.0); - // Test they are the same length - assertEquals(outPutData.length, outPutToTest.length); - assertTrue(Arrays.equals(outPutData, outPutToTest)); - - } -} diff --git a/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/TestDefaultDecoder.java b/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/TestDefaultDecoder.java new file mode 100644 index 0000000..eb3c1d2 --- /dev/null +++ b/mmtf-decoder/src/test/java/org/rcsb/mmtf/decoder/TestDefaultDecoder.java @@ -0,0 +1,49 @@ +package org.rcsb.mmtf.decoder; + +import static org.junit.Assert.assertNotNull; + +import java.beans.IntrospectionException; +import java.beans.Introspector; +import java.beans.PropertyDescriptor; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; + +import org.junit.Test; +import org.rcsb.mmtf.api.DataTransferInterface; +import org.rcsb.mmtf.dataholders.MmtfBean; +import org.unitils.reflectionassert.ReflectionAssert; + +import uk.co.jemos.podam.api.PodamFactory; +import uk.co.jemos.podam.api.PodamFactoryImpl; + +public class TestDefaultDecoder { + + @Test + public void testDecodeAllFields() throws IOException, IntrospectionException, IllegalAccessException, IllegalArgumentException, InvocationTargetException { + PodamFactory factory = new PodamFactoryImpl(); + MmtfBean mmtfBean = factory.manufacturePojo(MmtfBean.class); + DefaultDecoder defaultDecoder = new DefaultDecoder(mmtfBean); + ReflectionAssert.assertPropertiesNotNull("Some properties null after decoding", defaultDecoder); + for(PropertyDescriptor propertyDescriptor : + Introspector.getBeanInfo(MmtfBean.class).getPropertyDescriptors()){ + assertNotNull(propertyDescriptor.getReadMethod().invoke(mmtfBean)); + } + // Check the decoder has been populated to + for(PropertyDescriptor propertyDescriptor : + Introspector.getBeanInfo(DefaultDecoder.class).getPropertyDescriptors()){ + if(propertyDescriptor.getReadMethod()!=null){ + assertNotNull(propertyDescriptor.getReadMethod().invoke(defaultDecoder)); + } + } + } + + + @Test + public void testReader() { + DummyApiImpl dummyApiImpl = new DummyApiImpl(); + DecoderToReader decoderToReader = new DecoderToReader(); + DataTransferInterface inputInflator = new DummyTransferImpl(); + decoderToReader.read(dummyApiImpl, inputInflator); + } + +} diff --git a/mmtf-decoder/src/test/java/org/rcsb/mmtf/deserializers/TestMessagePack.java b/mmtf-decoder/src/test/java/org/rcsb/mmtf/deserializers/TestMessagePack.java new file mode 100644 index 0000000..3252ff1 --- /dev/null +++ b/mmtf-decoder/src/test/java/org/rcsb/mmtf/deserializers/TestMessagePack.java @@ -0,0 +1,22 @@ +package org.rcsb.mmtf.deserializers; + +import static org.junit.Assert.assertNotNull; + +import java.io.IOException; + +import org.junit.Test; +import org.rcsb.mmtf.dataholders.MmtfBean; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.JsonMappingException; + +public class TestMessagePack { + + + @Test + public void testBasic() throws JsonParseException, JsonMappingException, IOException { + MessagePackDeserializer messagePackDeserializer = new MessagePackDeserializer(); + MmtfBean mmtfBean = messagePackDeserializer.deserialize(new byte[] {(byte) (char) 129, (byte) (char)162, (byte) (char)100, (byte) (char)111, (byte) (char)1}); + assertNotNull(mmtfBean); + } +} diff --git a/mmtf-encoder/pom.xml b/mmtf-encoder/pom.xml index f1c6c7b..a540297 100644 --- a/mmtf-encoder/pom.xml +++ b/mmtf-encoder/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 org.rcsb @@ -17,275 +18,62 @@ - commons-beanutils - commons-beanutils - 1.8.3 + org.rcsb + mmtf-common + 0.0.1-alpha4-SNAPSHOT org.rcsb - mmtf-decoder + mmtf-api 0.0.1-alpha4-SNAPSHOT - org.biojava - biojava-structure - 5.0.0-alpha3 + org.rcsb + mmtf-decoder + 0.0.1-alpha4-SNAPSHOT org.msgpack jackson-dataformat-msgpack 0.7.1 + junit junit - 4.11 - test + + + uk.co.jemos.podam + podam org.unitils unitils-core - 3.4.2 - test + - uk.co.jemos.podam - podam - 2.3.5.RELEASE - test + org.slf4j + slf4j-api + + + + org.apache.logging.log4j + log4j-slf4j-impl + + + org.apache.logging.log4j + log4j-api + + + org.apache.logging.log4j + log4j-core + + + commons-beanutils + commons-beanutils - - - - org.apache.maven.plugins - maven-jar-plugin - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - true - - - - pl.project13.maven - git-commit-id-plugin - 2.2.0 - - - - revision - - - - - - - ${project.basedir}/.git - - - git - - - dd.MM.yyyy '@' HH:mm:ss z - - - - ${user.timezone} - - - false - - - - - - true - - - ${project.build.outputDirectory}/git.properties - - - properties - - - true - - - - false - - - - true - - - - true - - - - false - - - - false - - - - - - - - - - - - - - - - false - - - - 7 - - - - - flat - - - - - - - false - - - false - - 7 - - - -dirty - - - * - - - false - - - - - true - - - ${project.build.outputDirectory}/git.properties - - - - - - - - diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/FindDeltas.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/FindDeltas.java deleted file mode 100644 index fe9ab08..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/FindDeltas.java +++ /dev/null @@ -1,42 +0,0 @@ -package org.rcsb.mmtf.arraycompressors; - -import java.io.Serializable; -import java.util.ArrayList; - -/** - * Class to encode an integer array with deltas. - * - * @author Anthony Bradley - */ -public class FindDeltas implements IntArrayCompressor, Serializable { - - - /** The Constant serialVersionUID. */ - private static final long serialVersionUID = -8404400061650470813L; - - /* (non-Javadoc) - * @see org.rcsb.mmtf.arraycompressors.IntArray - * Compressor#compressIntArray(java.util.ArrayList) - */ - public final ArrayList compressIntArray(final ArrayList inArray) { - // - ArrayList outArray = new ArrayList(); - int oldInt = 0; - for (int i = 0; i < inArray.size(); i++) { - // Get the value out here - int numInt = inArray.get(i); - if (i==0){ - oldInt = numInt; - outArray.add(numInt); - } - else{ - int this_int = numInt - oldInt; - outArray.add((int) this_int); - oldInt = numInt; - } - } - return outArray; - } - - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/IntArrayCompressor.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/IntArrayCompressor.java deleted file mode 100644 index e3bd07e..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/IntArrayCompressor.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.rcsb.mmtf.arraycompressors; - -import java.util.ArrayList; - - -/** - * The Interface IntArrayCompressor. - */ -public interface IntArrayCompressor { - - /** - * Generic function to compress an integer array. - * - * @param inArray the in array - * @return the array list - */ - public ArrayList compressIntArray(ArrayList inArray); -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/RunLengthEncode.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/RunLengthEncode.java deleted file mode 100644 index 97d4a3b..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/RunLengthEncode.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.rcsb.mmtf.arraycompressors; - -import java.io.Serializable; -import java.util.ArrayList; - -/** - * Class to run length encode an integer array. - * @author Anthony Bradley - * - */ -public class RunLengthEncode implements IntArrayCompressor, Serializable { - - - /** The Constant serialVersionUID. */ - private static final long serialVersionUID = -793325266722283046L; - - /* (non-Javadoc) - * @see org.rcsb.mmtf.arraycompressors.IntArrayCompressor#compressIntArray(java.util.ArrayList) - */ - public ArrayList compressIntArray(ArrayList inArray) { - - ArrayList outArray = new ArrayList(); - int oldVal = 0; - boolean inSwitch = false; - int counter = 0; - // Loop through the vals - for (int i = 0; i < inArray.size(); i++) { - // Get the value out here - int num_int = inArray.get(i); - - - if(inSwitch==false){ - inSwitch=true; - // If it's a new number add it to the array - outArray.add(num_int); - counter=1; - oldVal=num_int; - } - else if (num_int!=oldVal){ - // Add the counter to the array - outArray.add(counter); - if(counter<0){ - System.out.println("THIS ERROR - "+counter); - } - // If it's a new number add it to the array - outArray.add(num_int); - counter=1; - oldVal=num_int; - } - else{ - counter+=1; - } - } - outArray.add(counter); - return outArray; - } - - - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/RunLengthEncodeString.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/RunLengthEncodeString.java deleted file mode 100644 index 5bc011a..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/RunLengthEncodeString.java +++ /dev/null @@ -1,41 +0,0 @@ -package org.rcsb.mmtf.arraycompressors; - -import java.util.ArrayList; - -/** - * Class to run length encode a string array. - * @author Anthony Bradley - * - */ -public class RunLengthEncodeString implements StringArrayCompressor { - - - /* (non-Javadoc) - * @see org.rcsb.mmtf.arraycompressors.StringArrayCompressor#compressStringArray(java.util.ArrayList) - */ - public ArrayList compressStringArray(ArrayList inArray) { - ArrayList outArray = new ArrayList(); - String oldVal = ""; - int counter = 0; - // Loop through the vals - for (int i = 0; i < inArray.size(); i++) { - // Get the value out here - String inString = inArray.get(i); - if (inString != oldVal){ - if(oldVal != ""){ - // Add the counter to the array - outArray.add(Integer.toString(counter)); - } - // If it's a new number add it to the array - outArray.add(inString); - counter = 1; - oldVal = inString; - } else { - counter += 1; - } - } - outArray.add(Integer.toString(counter)); - return outArray; - } - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/StringArrayCompressor.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/StringArrayCompressor.java deleted file mode 100644 index 72e4923..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/StringArrayCompressor.java +++ /dev/null @@ -1,17 +0,0 @@ -package org.rcsb.mmtf.arraycompressors; - -import java.util.ArrayList; - -/** - * The Interface StringArrayCompressor. - */ -public interface StringArrayCompressor { - - /** - * Generic function to compress a string array. - * - * @param inArray the in array - * @return the array list - */ - ArrayList compressStringArray(ArrayList inArray); -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/package-info.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/package-info.java deleted file mode 100644 index 845718a..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/arraycompressors/package-info.java +++ /dev/null @@ -1,8 +0,0 @@ -/** - * Package to compress arrays - */ -/** - * @author Anthony Bradley - * - */ -package org.rcsb.mmtf.arraycompressors; diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biocompressors/BioCompressor.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/biocompressors/BioCompressor.java deleted file mode 100644 index ef3c755..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biocompressors/BioCompressor.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.rcsb.mmtf.biocompressors; - -import java.lang.reflect.InvocationTargetException; - -import org.rcsb.mmtf.dataholders.CoreSingleStructure; - -public interface BioCompressor { - - /** - * Generic function to modify the data structure of a protein to enhance compression - * @param coress - * @return The updated data structure - * @throws IllegalAccessException - * @throws InvocationTargetException - */ - public CoreSingleStructure compresStructure(CoreSingleStructure coress); - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biocompressors/CompressDoubles.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/biocompressors/CompressDoubles.java deleted file mode 100644 index c1afd96..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biocompressors/CompressDoubles.java +++ /dev/null @@ -1,90 +0,0 @@ -package org.rcsb.mmtf.biocompressors; - -import java.io.Serializable; -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.beanutils.BeanUtils; -import org.rcsb.mmtf.dataholders.BioDataStruct; -import org.rcsb.mmtf.dataholders.CoreSingleStructure; -import org.rcsb.mmtf.dataholders.NoFloatDataStruct; - -/** - * Class to compress a structure by turning doubles to integers. - * @author Anthony Bradley - * - */ -public class CompressDoubles implements BioCompressor, Serializable { - - /** The Constant serialVersionUID. */ - private static final long serialVersionUID = -8942639615818134183L; - - /* (non-Javadoc) - * @see org.rcsb.mmtf.biocompressors.BioCompressor#compresStructure(org.rcsb.mmtf.dataholders.CoreSingleStructure) - */ - public CoreSingleStructure compresStructure(CoreSingleStructure coress) { - // Take in the appropriate arrays - BioDataStruct bioDataS = (BioDataStruct) coress; - NoFloatDataStruct noFloatDataS = new NoFloatDataStruct(); - - try { - BeanUtils.copyProperties(noFloatDataS, bioDataS); - } catch (IllegalAccessException e) { - System.err.println("Unknown bug - copying bean data. Report as bug."); - e.printStackTrace(); - throw new RuntimeException(e); - } catch (InvocationTargetException e) { - System.err.println("Unknown bug - copying bean data. Report as bug."); - e.printStackTrace(); - throw new RuntimeException(e); - } - // Get all the arrays we want to compress - // Set the coordinates - noFloatDataS.set_atom_site_Cartn_xInt(getIntArrayFromDouble(bioDataS.get_atom_site_Cartn_x(),1000.0)); - noFloatDataS.set_atom_site_Cartn_yInt(getIntArrayFromDouble(bioDataS.get_atom_site_Cartn_y(),1000.0)); - noFloatDataS.set_atom_site_Cartn_zInt(getIntArrayFromDouble(bioDataS.get_atom_site_Cartn_z(),1000.0)); - // Now set the temperature factors and occupancy - noFloatDataS.set_atom_site_B_iso_or_equivInt(getIntArrayFromFloat(bioDataS.get_atom_site_B_iso_or_equiv(),(float) 100.0)); - noFloatDataS.set_atom_site_occupancyInt(getIntArrayFromFloat(bioDataS.get_atom_site_occupancy(),(float) 100.0)); - // Now assign these to the new dataStructure - return noFloatDataS; - } - - /** - * Function to return an int array from a float array. - * - * @param inArray the input array of floats - * @param multiplier - the multiplication factor for conversion - * @return the integer array after conversion - */ - public List getIntArrayFromFloat(List inArray, float multiplier) { - // Initialise the out array - List outArray = new ArrayList(inArray.size()); - for(Float oldDouble: inArray){ - Integer newInt = (int) Math.round(oldDouble * multiplier); - outArray.add(newInt); - } - return outArray; - - } - - /** - * Function to return an int array from a double array. - * - * @param inArray the input array of doubles - * @param multiplier the multiplier - * @return the int array from double - */ - public List getIntArrayFromDouble(List inArray, Double multiplier){ - // Initialise the out array - List outArray = new ArrayList(inArray.size()); - for(Double oldDouble: inArray){ - Integer newInt = (int) Math.round(oldDouble * multiplier); - outArray.add(newInt); - } - return outArray; - - } - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/CustomChemCompProvider.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/CustomChemCompProvider.java deleted file mode 100644 index ed0a042..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/CustomChemCompProvider.java +++ /dev/null @@ -1,469 +0,0 @@ -package org.rcsb.mmtf.biojavaencoder; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileOutputStream; -import java.io.FilenameFilter; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.zip.GZIPOutputStream; - -import org.biojava.nbio.core.util.InputStreamProvider; -import org.biojava.nbio.structure.align.util.HTTPConnectionTools; -import org.biojava.nbio.structure.align.util.UserConfiguration; -import org.biojava.nbio.structure.io.mmcif.AllChemCompProvider; -import org.biojava.nbio.structure.io.mmcif.ChemCompConsumer; -import org.biojava.nbio.structure.io.mmcif.ChemCompProvider; -import org.biojava.nbio.structure.io.mmcif.ChemicalComponentDictionary; -import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; -import org.biojava.nbio.structure.io.mmcif.MMcifParser; -import org.biojava.nbio.structure.io.mmcif.ReducedChemCompProvider; -import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser; -import org.biojava.nbio.structure.io.mmcif.model.ChemComp; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - - -/** This is a custom provider of chem comp information - to provide data from custom urls (specified at run time). - * - * - * @author Anthony Bradley - * @author Andreas Prlic - * - */ -public class CustomChemCompProvider implements ChemCompProvider { - - private static final Logger logger = LoggerFactory.getLogger(DownloadChemCompProvider.class); - - public static final String CHEM_COMP_CACHE_DIRECTORY = "chemcomp"; - - public static final String SERVER_LOCATION = "http://www.rcsb.org/pdb/files/ligand/"; - - - private static String extraServerLocation; - private static File path; - //private static final String FILE_SEPARATOR = System.getProperty("file.separator"); - private static final String NEWLINE = System.getProperty("line.separator"); - - - // flags to make sure there is only one thread running that is loading the dictionary - static AtomicBoolean loading = new AtomicBoolean(false); - - static final List protectedIDs = new ArrayList (); - static { - protectedIDs.add("CON"); - protectedIDs.add("PRN"); - protectedIDs.add("AUX"); - protectedIDs.add("NUL"); - } - - /** by default we will download only some of the files. User has to request that all files should be downloaded... - * - */ - boolean downloadAll = false; - public CustomChemCompProvider(){ - logger.debug("Initialising DownloadChemCompProvider"); - extraServerLocation = null; - - // note that path is static, so this is just to make sure that all non-static methods will have path initialised - initPath(); - } - public CustomChemCompProvider(String extraUrl){ - logger.debug("Initialising DownloadChemCompProvider"); - extraServerLocation = extraUrl; - - // note that path is static, so this is just to make sure that all non-static methods will have path initialised - initPath(); - } - - public CustomChemCompProvider(String cacheFilePath, String extraUrl){ - logger.debug("Initialising DownloadChemCompProvider"); - - // note that path is static, so this is just to make sure that all non-static methods will have path initialised - path = new File(cacheFilePath); - extraServerLocation = extraUrl; - } - - private static void initPath(){ - - if (path==null) { - UserConfiguration config = new UserConfiguration(); - path = new File(config.getCacheFilePath()); - } - } - - /** - * Checks if the chemical components already have been installed into the PDB directory. - * If not, will download the chemical components definitions file and split it up into small - * subfiles. - */ - public void checkDoFirstInstall(){ - - if ( ! downloadAll ) { - return; - } - - - // this makes sure there is a file separator between every component, - // if path has a trailing file separator or not, it will work for both cases - File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY); - File f = new File(dir, "components.cif.gz"); - - if ( ! f.exists()) { - - downloadAllDefinitions(); - - } else { - // file exists.. did it get extracted? - - FilenameFilter filter =new FilenameFilter() { - - @Override - public boolean accept(File dir, String file) { - return file.endsWith(".cif.gz"); - } - }; - String[] files = dir.list(filter); - if ( files.length < 500) { - // not all did get unpacked - try { - split(); - } catch (IOException e) { - logger.error("Could not split file {} into individual chemical component files. Error: {}", - f.toString(), e.getMessage()); - } - } - } - } - - private void split() throws IOException { - - logger.info("Installing individual chem comp files ..."); - - File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY); - File f = new File(dir, "components.cif.gz"); - - - int counter = 0; - InputStreamProvider prov = new InputStreamProvider(); - - try( BufferedReader buf = new BufferedReader (new InputStreamReader (prov.getInputStream(f))); - ) { - String line = null; - line = buf.readLine (); - StringWriter writer = new StringWriter(); - - String currentID = null; - while (line != null){ - - if ( line.startsWith("data_")) { - // a new record found! - - if ( currentID != null) { - writeID(writer.toString(), currentID); - counter++; - } - - currentID = line.substring(5); - writer = new StringWriter(); - } - - writer.append(line); - writer.append(NEWLINE); - - line = buf.readLine (); - } - - // write the last record... - writeID(writer.toString(),currentID); - counter++; - - } - - logger.info("Created " + counter + " chemical component files."); - } - - /** - * Output chemical contents to a file - * @param contents File contents - * @param currentID Chemical ID, used to determine the filename - * @throws IOException - */ - private void writeID(String contents, String currentID) throws IOException{ - - String localName = DownloadChemCompProvider.getLocalFileName(currentID); - - try ( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(localName))) ) { - - pw.print(contents.toString()); - pw.flush(); - } - } - - /** - * Loads the definitions for this {@link ChemComp} from a local file and instantiates a new object. - * - * @param recordName the ID of the {@link ChemComp} - * @return a new {@link ChemComp} definition. - */ - @Override - public ChemComp getChemComp(String recordName) { - - // make sure we work with upper case records - recordName = recordName.toUpperCase().trim(); - - boolean haveFile = true; - if ( recordName.equals("?")){ - return null; - } - - if ( ! fileExists(recordName)) { - // check if we should install all components - checkDoFirstInstall(); - } - if ( ! fileExists(recordName)) { - // we previously have installed already the definitions, - // just do an incrememntal update - haveFile = downloadChemCompRecord(recordName); - } - - // Added check that download was successful and chemical component is available. - if (haveFile) { - String filename = getLocalFileName(recordName); - InputStream inStream = null; - try { - - InputStreamProvider isp = new InputStreamProvider(); - - inStream = isp.getInputStream(filename); - - MMcifParser parser = new SimpleMMcifParser(); - - ChemCompConsumer consumer = new ChemCompConsumer(); - - // The Consumer builds up the BioJava - structure object. - // you could also hook in your own and build up you own data model. - parser.addMMcifConsumer(consumer); - - parser.parse(new BufferedReader(new InputStreamReader(inStream))); - - ChemicalComponentDictionary dict = consumer.getDictionary(); - - ChemComp chemComp = dict.getChemComp(recordName); - - return chemComp; - - } catch (IOException e) { - - logger.error("Could not parse chemical component file {}. Error: {}. " - + "There will be no chemical component info available for {}", filename, e.getMessage(), recordName); - - } - finally{ - // Now close it - if(inStream!=null){ - try { - inStream.close(); - } catch (IOException e) { - // This would be weird... - logger.error("Could not close chemical component file {}. A resource leak could occur!!", filename); - } - } - - } - } - - // see https://github.com/biojava/biojava/issues/315 - // probably a network error happened. Try to use the ReducedChemCOmpProvider - ReducedChemCompProvider reduced = new ReducedChemCompProvider(); - - return reduced.getChemComp(recordName); - - } - - /** Returns the file name that contains the definition for this {@link ChemComp} - * - * @param recordName the ID of the {@link ChemComp} - * @return full path to the file - */ - public static String getLocalFileName(String recordName){ - - if ( protectedIDs.contains(recordName)){ - recordName = "_" + recordName; - } - - initPath(); - - File f = new File(path, CHEM_COMP_CACHE_DIRECTORY); - if (! f.exists()){ - logger.info("Creating directory " + f); - - boolean success = f.mkdir(); - // we've checked in initPath that path is writable, so there's no need to check if it succeeds - // in the unlikely case that in the meantime it isn't writable at least we log an error - if (!success) logger.error("Directory {} could not be created",f); - - } - - File theFile = new File(f,recordName + ".cif.gz"); - - return theFile.toString(); - } - - private static boolean fileExists(String recordName){ - - String fileName = getLocalFileName(recordName); - - File f = new File(fileName); - - return f.exists(); - - } - - /** - * @param recordName : three-letter name - * @return true if successful download - */ - private static boolean downloadChemCompRecord(String recordName) { - - String localName = getLocalFileName(recordName); - File newFile; - try{ - newFile = File.createTempFile("chemcomp"+recordName, "cif"); - } - catch(IOException e){ - logger.error("Could not write to temp directory {} to create the chemical component download temp file", System.getProperty("java.io.tmpdir")); - return false; - } - // If there is no input server - just leave - if (extraServerLocation == null) { - return false; - } - String inputUrl = extraServerLocation + recordName.charAt(0) + "/" + recordName + "/" + recordName + ".cif"; - - logger.debug("downloading " + inputUrl); - - URL url = null; - - - try { - url = new URL(inputUrl); - - HttpURLConnection uconn = HTTPConnectionTools.openHttpURLConnection(url); - - try( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(newFile))); - BufferedReader fileBuffer = new BufferedReader(new InputStreamReader(uconn.getInputStream())); - ) { - - String line; - - while ((line = fileBuffer.readLine()) != null) { - pw.println(line); - } - - pw.flush(); - // Now we move this across to where it actually wants to be - boolean couldRename = newFile.renameTo(new File(localName)); - - if (!couldRename) { - - throw new IOException("Could not rename temp file "+newFile.toString()+" to file " + localName); - } - - return true; - } - } catch (IOException e){ - logger.error("Could not download "+url.toString()+" OR store locally to "+localName+" Error ="+e.getMessage()); - newFile.delete(); - } - return false; - } - - private void downloadAllDefinitions() { - - if ( loading.get()){ - logger.info("Waiting for other thread to install chemical components..."); - } - - while ( loading.get() ) { - - // another thread is already downloading the components definitions - // wait for the other thread to finish... - - try { - // wait half a second - - Thread.sleep(500); - } catch (InterruptedException e) { - //e.printStackTrace(); - logger.error("Thread interrupted "+e.getMessage()); - } - - logger.info("Another thread installed the chemical components."); - return; - - } - - loading.set(true); - long timeS = System.currentTimeMillis(); - - logger.info("Performing first installation of chemical components."); - logger.info("Downloading components.cif.gz ..."); - - - try { - AllChemCompProvider.downloadFile(); - } catch (IOException e){ - logger.error("Could not download the all chemical components file. Error: {}. " - + "Chemical components information won't be available", e.getMessage()); - // no point in trying to split if the file could not be downloaded - loading.set(false); - return; - } - try { - split(); - } catch (IOException e) { - logger.error("Could not split all chem comp file into individual chemical component files. Error: {}", - e.getMessage()); - // no point in reporting time - loading.set(false); - return; - } - long timeE = System.currentTimeMillis(); - logger.info("time to install chem comp dictionary: " + (timeE - timeS) / 1000 + " sec."); - loading.set(false); - - } - - /** By default this provider will download only some of the {@link ChemComp} files. - * The user has to request that all files should be downloaded by setting this parameter to true. - * - * @return flag if the all components should be downloaded and installed at startup. (default: false) - */ - public boolean isDownloadAll() { - return downloadAll; - } - - /** By default this provider will download only some of the {@link ChemComp} files. - * The user has to request that all files should be downloaded by setting this parameter to true. - * - * @param flag if the all components should be downloaded and installed at startup. (default: false) - */ - public void setDownloadAll(boolean downloadAll) { - this.downloadAll = downloadAll; - } - - - - -} - diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/EncodeStructure.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/EncodeStructure.java deleted file mode 100644 index 8d47404..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/EncodeStructure.java +++ /dev/null @@ -1,86 +0,0 @@ -package org.rcsb.mmtf.biojavaencoder; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -import org.biojava.nbio.structure.Structure; -import org.rcsb.mmtf.dataholders.MmtfBean; -import org.rcsb.mmtf.dataholders.PDBGroup; - -public class EncodeStructure { - - /** - * Get a byte array of the compressed messagepack MMTF data - * from an input PDB id - * @param pdbId - * @return a byte array of compressed data - */ - public byte[] getCompressedMessagePackFromPdbId(String pdbId) { - // Get the utility class to get the strucutes - ParseFromBiojava parsedDataStruct = new ParseFromBiojava(); - Map totMap = new HashMap(); - // Parse the data into the basic data structure - parsedDataStruct.createFromJavaStruct(pdbId, totMap); - // Compress the data and get it back out - return buildFromDataStructure(parsedDataStruct); - } - - /** - * Generate the compressed messagepack MMTF data from a biojava structure - * @param bioJavaStruct - * @return a byte array of compressed data - */ - public byte[] encodeFromBiojava(Structure bioJavaStruct){ - // Get the utility class to get the strucutes - ParseFromBiojava parsedDataStruct = new ParseFromBiojava(); - Map totMap = new HashMap(); - // Parse the data into the basic data structure - parsedDataStruct.generateDataStructuresFromBioJavaStructure(bioJavaStruct, totMap); - return buildFromDataStructure(parsedDataStruct); - } - - /** - * Build up a byte array from the parsed data - * @param parsedDataStruct - * @return a byte array of compressed data - */ - private byte[] buildFromDataStructure(ParseFromBiojava parsedDataStruct) { - EncoderUtils eu = new EncoderUtils(); - // Compress the data and get it back out - try { - MmtfBean mmtfBean = eu.compressMainData(parsedDataStruct.getBioStruct(), parsedDataStruct.getHeaderStruct()); - return eu.getMessagePack(mmtfBean); - } catch (IOException e) { - // Here we've failed to read or write a byte array - e.printStackTrace(); - System.err.println("Error reading or writing byte array - file bug report"); - throw new RuntimeException(e); - } - } - - /** - * Generate the compressed messagepack of the calpha, phospohate and ligand data. - * @param pdbId The input pdb id - * @return a byte array of compressed calpha data - */ - public byte[] encodeBackBoneFromPdbId(String pdbId){ - // Get the two utility classes - EncoderUtils eu = new EncoderUtils(); - ParseFromBiojava cbs = new ParseFromBiojava(); - Map totMap = new HashMap(); - // Parse the data into the basic data structure - cbs.createFromJavaStruct(pdbId, totMap); - // Compress the data and get it back out - try { - return eu.getMessagePack(eu.compCAlpha(cbs.getCalphaStruct(), cbs.getHeaderStruct())); - } catch (IOException e) { - e.printStackTrace(); - System.err.println("Error in reading or writing byte array"); - throw new RuntimeException(e); - } - } - - - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/EncoderUtils.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/EncoderUtils.java deleted file mode 100644 index 7314048..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/EncoderUtils.java +++ /dev/null @@ -1,490 +0,0 @@ -package org.rcsb.mmtf.biojavaencoder; - - - - -import java.io.ByteArrayOutputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.Serializable; -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.zip.GZIPOutputStream; - -import org.biojava.nbio.structure.Atom; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureIO; -import org.biojava.nbio.structure.align.util.AtomCache; -import org.biojava.nbio.structure.io.FileParsingParameters; -import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; -import org.msgpack.jackson.dataformat.MessagePackFactory; -import org.rcsb.mmtf.arraycompressors.FindDeltas; -import org.rcsb.mmtf.arraycompressors.IntArrayCompressor; -import org.rcsb.mmtf.arraycompressors.RunLengthEncode; -import org.rcsb.mmtf.arraycompressors.RunLengthEncodeString; -import org.rcsb.mmtf.arraycompressors.StringArrayCompressor; -import org.rcsb.mmtf.biocompressors.BioCompressor; -import org.rcsb.mmtf.biocompressors.CompressDoubles; -import org.rcsb.mmtf.dataholders.BioDataStruct; -import org.rcsb.mmtf.dataholders.CalphaBean; -import org.rcsb.mmtf.dataholders.CalphaDistBean; -import org.rcsb.mmtf.dataholders.CoreSingleStructure; -import org.rcsb.mmtf.dataholders.HeaderBean; -import org.rcsb.mmtf.dataholders.MmtfBean; -import org.rcsb.mmtf.dataholders.NoFloatDataStruct; -import org.rcsb.mmtf.dataholders.NoFloatDataStructBean; -import org.rcsb.mmtf.gitversion.GetRepoState; - -import com.fasterxml.jackson.core.JsonProcessingException; - - -/** - * This class finds an mmCIF file and saves it as a csv file . - * - * @author Anthony Bradley - */ -public class EncoderUtils implements Serializable { - - /** The Constant serialVersionUID. */ - private static final long serialVersionUID = 376413981384858130L; - - /** The class to get the git repo start */ - private GetRepoState grs = new GetRepoState(); - - /** A converter of doubles to ints. */ - private BioCompressor doublesToInts = new CompressDoubles(); - - /** The delta compressor of arrays. */ - private IntArrayCompressor deltaComp = new FindDeltas(); - - /** The run length compressor of arrays. */ - private IntArrayCompressor runLengthComp = new RunLengthEncode(); - - /** - * Take a list of integers (as List). - * - * @param inputList the input integer array - * @return the byte[] output - * @throws IOException Occurred writing the int to the stream. - */ - public byte[] integersToBytes(List inputList) throws IOException - { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - DataOutputStream dos = new DataOutputStream(baos); - for(Integer i: inputList) - { - dos.writeInt(i); - } - return baos.toByteArray(); - } - - - /** - * Get a messagepack from a bean. - * - * @param inputObject the input object - * @return the message pack - * @throws JsonProcessingException the json processing exception - most likely related - * to serialization. - */ - public byte[] getMessagePack(Object inputObject) throws JsonProcessingException{ - com.fasterxml.jackson.databind.ObjectMapper objectMapper = new com.fasterxml.jackson.databind.ObjectMapper(new MessagePackFactory()); - byte[] inBuf = objectMapper.writeValueAsBytes(inputObject); - return inBuf; - } - - - /** - * Compress the biological and header data into a combined data structure. - * - * @param inStruct the in struct - * @param inHeader the in header - * @return the byte array for the compressed data - * @throws IOException reading byte array - * @throws Exception - */ - public MmtfBean compressMainData(BioDataStruct inStruct, HeaderBean inHeader) throws IOException { - EncoderUtils cm = new EncoderUtils(); - // Compress the protein - CoreSingleStructure strucureData = compressHadoopStruct(inStruct); - // NOW SET UP THE - MmtfBean thisDistBeanTot = new MmtfBean(); - NoFloatDataStructBean bioBean = (NoFloatDataStructBean) strucureData.findDataAsBean(); - // Copt these things - thisDistBeanTot.setPdbId(bioBean.getPdbCode()); - thisDistBeanTot.setInsCodeList(bioBean.get_atom_site_pdbx_PDB_ins_code()); - thisDistBeanTot.setAltLabelList(bioBean.get_atom_site_label_alt_id()); - // Set this experimental data - thisDistBeanTot.setResolution(inHeader.getResolution()); - thisDistBeanTot.setrFree(inHeader.getrFree()); - thisDistBeanTot.setrWork(inHeader.getrWork()); - // Copy the asym data - thisDistBeanTot.setChainIdList(inHeader.getAsymChainList()); - thisDistBeanTot.setChainsPerModel(inHeader.getAsymChainsPerModel()); - thisDistBeanTot.setGroupsPerChain(inHeader.getAsymGroupsPerChain()); - // Get - thisDistBeanTot.setEntityList(inHeader.getEntityList()); - // Get the seqres information - thisDistBeanTot.setSeqResIdList(cm.integersToBytes(runLengthComp.compressIntArray(deltaComp.compressIntArray((ArrayList) inHeader.getSeqResGroupIds())))); - thisDistBeanTot.setExperimentalMethods(inHeader.getExperimentalMethods()); - // Now get this list - thisDistBeanTot.setBondAtomList(cm.integersToBytes(inStruct.getInterGroupBondInds())); - thisDistBeanTot.setBondOrderList(cm.integersToSmallBytes(inStruct.getInterGroupBondOrders())); - // Now get these from the headers - thisDistBeanTot.setChainNameList(inHeader.getChainList()); - thisDistBeanTot.setNumAtoms(inHeader.getNumAtoms()); - thisDistBeanTot.setNumBonds(inHeader.getNumBonds()); - // Now get the Xtalographic info from this header - thisDistBeanTot.setSpaceGroup(inHeader.getSpaceGroup()); - thisDistBeanTot.setGroupMap(inStruct.getGroupMap()); - thisDistBeanTot.setUnitCell(inHeader.getUnitCell()); - thisDistBeanTot.setBioAssemblyList(inHeader.getBioAssembly()); - // Now set this extra header information - thisDistBeanTot.setTitle(inHeader.getTitle()); - // Now add the byte arrays to the bean - addByteArrs(thisDistBeanTot, bioBean); - // Now set the version - thisDistBeanTot.setMmtfProducer("RCSB-PDB Generator---version: "+grs.getCurrentVersion()); - return thisDistBeanTot; - } - - /** - * Add the required bytearrays to an mmtfbean. - * - * @param thisDistBeanTot the this dist bean tot - * @param bioBean the bio bean - * @throws IOException Signals that an I/O exception has occurred. - */ - private void addByteArrs(MmtfBean thisDistBeanTot, NoFloatDataStructBean bioBean) throws IOException { - EncoderUtils cm = new EncoderUtils(); - // X,Y and Z and Bfactors - set these arrays - List retArr = getBigAndLittle(bioBean.get_atom_site_Cartn_xInt()); - thisDistBeanTot.setxCoordBig(retArr.get(0)); - thisDistBeanTot.setxCoordSmall(retArr.get(1)); - retArr = getBigAndLittle(bioBean.get_atom_site_Cartn_yInt()); - thisDistBeanTot.setyCoordBig(retArr.get(0)); - thisDistBeanTot.setyCoordSmall(retArr.get(1)); - retArr = getBigAndLittle(bioBean.get_atom_site_Cartn_zInt()); - thisDistBeanTot.setzCoordBig(retArr.get(0)); - thisDistBeanTot.setzCoordSmall(retArr.get(1)); - retArr = getBigAndLittle(bioBean.get_atom_site_B_iso_or_equivInt()); - thisDistBeanTot.setbFactorBig(retArr.get(0)); - thisDistBeanTot.setbFactorSmall(retArr.get(1)); - // Now the occupancy - thisDistBeanTot.setOccList(cm.integersToBytes(bioBean.get_atom_site_occupancyInt())); - // System.out.println(Collections.max(bioBean.getResOrder())); - thisDistBeanTot.setGroupTypeList((cm.integersToBytes(bioBean.getResOrder()))); - thisDistBeanTot.setAtomIdList(cm.integersToBytes(bioBean.get_atom_site_id())); - - // Now the secondary structure - thisDistBeanTot.setSecStructList(cm.integersToSmallBytes(bioBean.getSecStruct())); - // Now set the group num list - thisDistBeanTot.setGroupIdList(cm.integersToBytes(bioBean.get_atom_site_auth_seq_id())); - } - - - /** - * Write a list of integers to 1 byte integers. - * - * @param values the values - * @return the byte[] - * @throws IOException Signals that an I/O exception has occurred. - */ - public byte[] integersToSmallBytes(List values) throws IOException { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - DataOutputStream dos = new DataOutputStream(baos); - for(int i: values) - { - dos.writeByte(i); - } - return baos.toByteArray(); - } - - /** - * Split an array into small (2 byte) and big (4 byte) integers. - * - * @param inArr the in arr - * @return the big and little - * @throws IOException Signals that an I/O exception has occurred. - */ - public List getBigAndLittle(List inArr) throws IOException{ - ListoutArr = new ArrayList(); - int counter = 0; - ByteArrayOutputStream littleOS = new ByteArrayOutputStream(); - DataOutputStream littleDOS = new DataOutputStream(littleOS); - ByteArrayOutputStream bigOS = new ByteArrayOutputStream(); - DataOutputStream bigDOS = new DataOutputStream(bigOS); - for(int i=0;i30000){ - // Counter added to the big list - bigDOS.writeInt(counter); - // Big number added to big list - bigDOS.writeInt(inArr.get(i)); - // Counter set to zero - counter = 0; - } - else{ - // Little number added to little list - littleDOS.writeShort(inArr.get(i)); - // Add to the counter - counter+=1; - } - } - // Finally add the counter to the big list - bigDOS.writeInt(counter); - - outArr.add(bigOS.toByteArray()); - outArr.add(littleOS.toByteArray()); - return outArr; - } - - /** - * Utility function to gzip compress a byte[]. - * - * @param inputArray the input array - * @return the byte[] - */ - public byte[] gzipCompress(byte[] inputArray){ - ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); - try{ - GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream); - gzipOutputStream.write(inputArray); - gzipOutputStream.close(); - } catch(IOException e){ - throw new RuntimeException(e); - } - System.out.printf("Compression %f\n", (1.0f * inputArray.length/byteArrayOutputStream.size())); - return byteArrayOutputStream.toByteArray(); - } - - /** - * Function to compress the input biological data. - * - * @param inputBioDataStruct the input data structure - * @return a core single structure - * @throws IllegalAccessException - * @throws InvocationTargetException - * @throws Exception The bean data copying didn't work - weird. - */ - public CoreSingleStructure compressHadoopStruct(BioDataStruct inputBioDataStruct) { - - CoreSingleStructure outStruct; - outStruct = doublesToInts.compresStructure(inputBioDataStruct); - // Get the input structure - NoFloatDataStruct inStruct = (NoFloatDataStruct) outStruct; - ArrayList cartnX = (ArrayList) inStruct.get_atom_site_Cartn_xInt(); - ArrayList cartnY = (ArrayList) inStruct.get_atom_site_Cartn_yInt(); - ArrayList cartnZ = (ArrayList) inStruct.get_atom_site_Cartn_zInt(); - - // Get the number of models - inStruct.set_atom_site_Cartn_xInt(deltaComp.compressIntArray(cartnX)); - inStruct.set_atom_site_Cartn_yInt(deltaComp.compressIntArray(cartnY)); - inStruct.set_atom_site_Cartn_zInt(deltaComp.compressIntArray(cartnZ)); - // // Now the occupancy and BFACTOR -> VERY SMALL GAIN - inStruct.set_atom_site_B_iso_or_equivInt(deltaComp.compressIntArray((ArrayList) inStruct.get_atom_site_B_iso_or_equivInt())); - // SMALL GAIN - inStruct.set_atom_site_occupancyInt(runLengthComp.compressIntArray((ArrayList) inStruct.get_atom_site_occupancyInt())); - // Now the sequential numbers - huge gain - new order of good compressors - // Now runlength encode the residue order - inStruct.setResOrder(inStruct.getResOrder()); - // THESE ONES CAN BE RUN LENGTH ON DELTA - - // Check for negative counters - inStruct.set_atom_site_auth_seq_id(runLengthComp.compressIntArray(deltaComp.compressIntArray((ArrayList) inStruct.get_atom_site_auth_seq_id()))); - inStruct.set_atom_site_label_entity_poly_seq_num(runLengthComp.compressIntArray(deltaComp.compressIntArray((ArrayList) inStruct.get_atom_site_label_entity_poly_seq_num()))); - inStruct.set_atom_site_id(runLengthComp.compressIntArray(deltaComp.compressIntArray((ArrayList) inStruct.get_atom_site_id()))); - //// NOW THE STRINGS - small gain - StringArrayCompressor stringRunEncode = new RunLengthEncodeString(); - inStruct.set_atom_site_label_alt_id(stringRunEncode.compressStringArray((ArrayList) inStruct.get_atom_site_label_alt_id())); - //inStruct.set_atom_site_label_entity_id(stringRunEncode.compressStringArray((ArrayList) inStruct.get_atom_site_label_entity_id())); - inStruct.set_atom_site_pdbx_PDB_ins_code(stringRunEncode.compressStringArray((ArrayList) inStruct.get_atom_site_pdbx_PDB_ins_code())); - return inStruct; - } - - /** - * Comp c alpha. - * - * @param calphaStruct the calpha struct - * @param inHeader the in header - * @return the calpha dist bean - * @throws IOException Signals that an I/O exception has occurred. - */ - public CalphaDistBean compCAlpha(CalphaBean calphaStruct, HeaderBean inHeader) throws IOException { - EncoderUtils cm = new EncoderUtils(); - // Create the object to leave - CalphaDistBean calphaOut = new CalphaDistBean(); - calphaOut.setMmtfProducer("RCSB-PDB Generator---version: "+grs.getCurrentVersion()); - // The PDBCode - calphaOut.setPdbId(inHeader.getPdbCode()); - // The title of the structure - calphaOut.setTitle(inHeader.getTitle()); - // String for the space group - calphaOut.setSpaceGroup(inHeader.getSpaceGroup()); - // The unit cell information - calphaOut.setUnitCell(inHeader.getUnitCell()); - // A map of Bioassembly -> new class so serializable - calphaOut.setBioAssembly(inHeader.getBioAssembly()); - // Now set the number of bonds - calphaOut.setNumBonds(calphaStruct.getNumBonds()); - calphaOut.setGroupsPerChain(calphaStruct.getGroupsPerChain()); - // Set this header info - calphaOut.setChainsPerModel(inHeader.getChainsPerModel()); - calphaOut.setGroupsPerChain(calphaStruct.getGroupsPerChain()); - calphaOut.setChainIdList(inHeader.getChainList()); - calphaOut.setNumAtoms(calphaStruct.getNumAtoms()); - // Write the secondary stucture out - calphaOut.setSecStructList(cm.integersToSmallBytes(calphaStruct.getSecStruct())); - calphaOut.setGroupMap(calphaStruct.getGroupMap()); - calphaOut.setGroupTypeList(cm.integersToBytes(calphaStruct.getResOrder())); - // Get the input structure - ArrayList cartnX = (ArrayList) calphaStruct.getCartn_x(); - ArrayList cartnY = (ArrayList) calphaStruct.getCartn_y(); - ArrayList cartnZ = (ArrayList) calphaStruct.getCartn_z(); - // Now add the X coords - List bigAndLittleX = getBigAndLittle(deltaComp.compressIntArray(cartnX)); - calphaOut.setxCoordBig(bigAndLittleX.get(0)); - calphaOut.setxCoordSmall(bigAndLittleX.get(1)); - // No add they Y coords - List bigAndLittleY = getBigAndLittle(deltaComp.compressIntArray(cartnY)); - calphaOut.setyCoordBig(bigAndLittleY.get(0)); - calphaOut.setyCoordSmall(bigAndLittleY.get(1)); - // Now add the Z coords - List bigAndLittleZ = getBigAndLittle(deltaComp.compressIntArray(cartnZ)); - calphaOut.setzCoordBig(bigAndLittleZ.get(0)); - calphaOut.setzCoordSmall(bigAndLittleZ.get(1)); - // THESE ONES CAN BE RUN LENGTH ON DELTA - calphaOut.setGroupIdList(cm.integersToBytes(runLengthComp.compressIntArray(deltaComp.compressIntArray((ArrayList) calphaStruct.get_atom_site_auth_seq_id())))); - return calphaOut; - } - - /** - * Set up the configuration parameters for BioJava. - */ - public AtomCache setUpBioJava() { - // Set up the atom cache etc - AtomCache cache = new AtomCache(); - cache.setUseMmCif(true); - FileParsingParameters params = cache.getFileParsingParams(); - params.setCreateAtomBonds(true); - params.setAlignSeqRes(true); - params.setParseBioAssembly(true); - params.setUseInternalChainId(true); - CustomChemCompProvider cc = new CustomChemCompProvider(); - ChemCompGroupFactory.setChemCompProvider(cc); - cc.checkDoFirstInstall(); - cache.setFileParsingParams(params); - StructureIO.setAtomCache(cache); - return cache; - } - - /** - * Set up the configuration parameters for BioJava. - with an extra URL - */ - public AtomCache setUpBioJava(String extraUrl) { - // Set up the atom cache etc - AtomCache cache = new AtomCache(); - cache.setUseMmCif(true); - FileParsingParameters params = cache.getFileParsingParams(); - params.setCreateAtomBonds(true); - params.setAlignSeqRes(true); - params.setParseBioAssembly(true); - params.setUseInternalChainId(true); - CustomChemCompProvider cc = new CustomChemCompProvider(extraUrl); - ChemCompGroupFactory.setChemCompProvider(cc); - cc.checkDoFirstInstall(); - cache.setFileParsingParams(params); - StructureIO.setAtomCache(cache); - return cache; - } - - - /** - * This sets all microheterogeneous groups (previously alternate location groups) as separate groups. - * @param bioJavaStruct - */ - public void fixMicroheterogenity(Structure bioJavaStruct) { - // Loop through the models - for (int i=0; i chains = bioJavaStruct.getModel(i); - for (Chain c : chains) { - // Build a new list of groups - List outGroups = new ArrayList<>(); - for (Group g : c.getAtomGroups()) { - List removeList = new ArrayList<>(); - for (Group altLoc : g.getAltLocs()) { - // Check if they are not equal -> microheterogenity - if(! altLoc.getPDBName().equals(g.getPDBName())) { - // Now add this group to the main list - removeList.add(altLoc); - } - } - // Add this group - outGroups.add(g); - // Remove any microhet alt locs - g.getAltLocs().removeAll(removeList); - // Add these microhet alt locs - outGroups.addAll(removeList); - } - c.setAtomGroups(outGroups); - } - } - } - - /** - * Function to get all the atoms in the strucutre as a list. - * - * @param bioJavaStruct the bio java struct - * @return the all atoms - */ - public List getAllAtoms(Structure bioJavaStruct) { - // Get all the atoms - List theseAtoms = new ArrayList(); - for (int i=0; i chains = bioJavaStruct.getModel(i); - for (Chain c : chains) { - for (Group g : c.getAtomGroups()) { - for(Atom a: getAtomsForGroup(g)){ - theseAtoms.add(a); - } - } - } - } - return theseAtoms; - } - - /** - * Function to get a list of atoms for a group. - * - * @param inputGroup the Biojava Group to consider - * @return the atoms for the input Biojava Group - */ - public List getAtomsForGroup(Group inputGroup) { - Set uniqueAtoms = new HashSet(); - List theseAtoms = new ArrayList(); - for(Atom a: inputGroup.getAtoms()){ - theseAtoms.add(a); - uniqueAtoms.add(a); - } - List altLocs = inputGroup.getAltLocs(); - for(Group thisG: altLocs){ - for(Atom a: thisG.getAtoms()){ - if(uniqueAtoms.contains(a)){ - continue; - } - theseAtoms.add(a); - } - } - return theseAtoms; - } - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/ParseFromBiojava.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/ParseFromBiojava.java deleted file mode 100644 index 29ea31e..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/ParseFromBiojava.java +++ /dev/null @@ -1,947 +0,0 @@ -package org.rcsb.mmtf.biojavaencoder; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import javax.vecmath.Matrix4d; - -import org.biojava.nbio.structure.Atom; -import org.biojava.nbio.structure.Bond; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Element; -import org.biojava.nbio.structure.EntityInfo; -import org.biojava.nbio.structure.ExperimentalTechnique; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.JournalArticle; -import org.biojava.nbio.structure.PDBCrystallographicInfo; -import org.biojava.nbio.structure.PDBHeader; -import org.biojava.nbio.structure.ResidueNumber; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; -import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; -import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; -import org.biojava.nbio.structure.secstruc.DSSPParser; -import org.biojava.nbio.structure.secstruc.SecStrucCalc; -import org.biojava.nbio.structure.secstruc.SecStrucState; -import org.biojava.nbio.structure.xtal.CrystalCell; -import org.biojava.nbio.structure.xtal.SpaceGroup; -import org.rcsb.mmtf.dataholders.BioAssemblyData; -import org.rcsb.mmtf.dataholders.BioDataStruct; -import org.rcsb.mmtf.dataholders.BioAssemblyTrans; -import org.rcsb.mmtf.dataholders.CalphaBean; -import org.rcsb.mmtf.dataholders.CodeHolders; -import org.rcsb.mmtf.dataholders.Entity; -import org.rcsb.mmtf.dataholders.HeaderBean; -import org.rcsb.mmtf.dataholders.PDBGroup; - - -/** - * A class to use biojava to parse MMCIF data and produce a data structure that can be fed into the MMTF. - */ -public class ParseFromBiojava { - - /** - * The multiplication factor for coordinate information - */ - private static final int COORD_MULT = 1000; - - /** The bio struct. */ - // Instances availble to the class of the main, calpha and header data structures - private BioDataStruct bioStruct = new BioDataStruct(); - - /** The calpha struct. */ - private CalphaBean calphaStruct = new CalphaBean(); - - /** The header struct. */ - private HeaderBean headerStruct = new HeaderBean(); - - /** The code holder. */ - // A class to store encoding information - private CodeHolders codeHolder = new CodeHolders(); - - /** The bonds for the structure. Used to keep track of which bonds have already been considered */ - private List totBonds = new ArrayList(); - - /** The number of groups per calpha chain. */ - private int[] calphaGroupsPerChain; - - /** The hash to calpha res. */ - private Map hashToCalphaRes; - - /** The a map relating hash codes to groups. For calphas */ - private Map calphaHashCodeToGroupMap; - - /** The chain counter. */ - private int chainCounter; - - /** The calpha group / residue counter. */ - private int calphaResCounter; - - /** The Biojava group currently being parsed. */ - private Group currentGroup; - - /** The Constant MY_MAP. Relates the group name to the type of atom id */ - private static final Map MY_MAP; - static { - Map aMap = new HashMap(); - aMap.put("hetatm", "HETATM"); - aMap.put("amino", "ATOM"); - aMap.put("nucleotide", "ATOM"); - MY_MAP = Collections.unmodifiableMap(aMap); - } - - /** - * Gets the bio struct. - * - * @return the bio struct - */ - public BioDataStruct getBioStruct() { - return bioStruct; - } - - - /** - * Sets the bio struct. - * - * @param bioStruct the new bio struct - */ - public void setBioStruct(BioDataStruct bioStruct) { - this.bioStruct = bioStruct; - } - - - /** - * Gets the calpha struct. - * - * @return the calpha struct - */ - public CalphaBean getCalphaStruct() { - return calphaStruct; - } - - - /** - * Sets the calpha struct. - * - * @param calphaStruct the new calpha struct - */ - public void setCalphaStruct(CalphaBean calphaStruct) { - this.calphaStruct = calphaStruct; - } - - - /** - * Gets the header struct. - * - * @return the header struct - */ - public HeaderBean getHeaderStruct() { - return headerStruct; - } - - - /** - * Sets the header struct. - * - * @param headerStruct the new header struct - */ - public void setHeaderStruct(HeaderBean headerStruct) { - this.headerStruct = headerStruct; - } - - - - /** - * Helper function to generate a main, calpha and header data form a PDB id. - * - * @param pdbId the pdb id - * @param bioStructMap the bio struct map - */ - public void createFromJavaStruct(String pdbId, Map bioStructMap) { - // Get the structure from here - Structure bioJavaStruct; - try { - bioJavaStruct = StructureIO.getStructure(pdbId); - } catch (IOException e) { - e.printStackTrace(); - System.err.println("Could not find/open data file for input: "+pdbId); - throw new RuntimeException(e); - } catch (StructureException e) { - e.printStackTrace(); - System.err.println("Error in parsing structure for input: "+pdbId); - throw new RuntimeException(e); - } - generateDataStructuresFromBioJavaStructure(bioJavaStruct, bioStructMap); - } - - - /** - * Function to generate a main, calpha and header data form a biojava structure. - * - * @param bioJavaStruct the Biojava structure - * @param bioStructMap the map relating hash codes to PDB groups. - * input so that a consistent map can be held across several structures - */ - public void generateDataStructuresFromBioJavaStructure(Structure bioJavaStruct, Map bioStructMap) { - EncoderUtils encoderUtils = new EncoderUtils(); - // Reset structure to consider altloc groups with the same residue number but different group names as seperate groups - encoderUtils.fixMicroheterogenity(bioJavaStruct); - // Generate the secondary structure - genSecStruct(bioJavaStruct); - // Set the header information - setHeaderInfo(bioJavaStruct); - // Get the number of models - Integer numModels = bioJavaStruct.nrModels(); - bioStruct.setNumModels(numModels); - // Set these maps and lists - List> bioStructList= new ArrayList>(); - Map hashToRes = new HashMap(); - hashToCalphaRes = new HashMap(); - // Set these counters - int atomCounter = 0; - chainCounter = 0; - int resCounter = 0; - int totAsymChains = 0; - // Get the total number of chains - for (int i=0; i(); - // Get these lists to keep track of everthing - and to give a datastrcutrue at the end - // List of chains per model - int[] chainsPerModel = new int[numModels]; - int[] internalChainsPerModel = new int[numModels]; - // Set this list - headerStruct.setChainsPerModel(chainsPerModel); - headerStruct.setAsymChainsPerModel(internalChainsPerModel); - byte[] charChainList = new byte[totAsymChains*4]; - byte[] charInternalChainList = new byte[totAsymChains*4]; - headerStruct.setChainList(charChainList); - headerStruct.setAsymChainList(charInternalChainList); - int[] groupsPerChain = new int[totAsymChains]; - int[] groupsPerInternalChain = new int[totAsymChains]; - headerStruct.setAsymGroupsPerChain(groupsPerInternalChain); - headerStruct.setGroupsPerChain(groupsPerChain); - headerStruct.setSequence(new ArrayList()); - // Find the experimental techniques - Set techniqueSet = bioJavaStruct.getPDBHeader().getExperimentalTechniques(); - headerStruct.setExperimentalMethods(new ArrayList()); - for (ExperimentalTechnique currentTechnique : techniqueSet){ - headerStruct.getExperimentalMethods().add(currentTechnique.toString()); - } - headerStruct.setSeqResGroupIds(new ArrayList()); - int bondCounter = 0; - - calphaGroupsPerChain = new int[totAsymChains]; - for(int i=0; i totAtoms = encoderUtils.getAllAtoms(bioJavaStruct); - for (int i=0; i chains = bioJavaStruct.getModel(i); - // Set the PDB Code - bioStruct.setPdbCode(bioJavaStruct.getPDBCode()); - ArrayList chainList = new ArrayList(); - // Set the number of chains in this model - internalChainsPerModel[i] = chains.size(); - // Get the number of unique ones - Set chainIdSet = new HashSet(); - for(Chain c : chains){ - String intChainId = c.getInternalChainID(); - chainIdSet.add(intChainId); - } - chainsPerModel[i] = chainIdSet.size(); - // Take the atomic information and place in a Hashmap - for (Chain biojavaChain: chains) { - // Get the seq res groups for this chain - List seqResGroups = biojavaChain.getSeqResGroups(); - // Set the sequence - if it's the first model... - if(i==0){ - headerStruct.getSequence().add(biojavaChain.getSeqResSequence()); - } - // Set the auth chain id - setChainId(biojavaChain.getInternalChainID(), charChainList, chainCounter); - // Set the asym chain id - setChainId(biojavaChain.getChainID(), charInternalChainList, chainCounter); - // Set the number of groups per chain - groupsPerChain[chainCounter] += biojavaChain.getAtomGroups().size(); - // Set the number of groups per internal chain - groupsPerInternalChain[chainCounter] = biojavaChain.getAtomGroups().size(); - // Add this chain to the list - chainList.add(biojavaChain.getChainID()); - // Get the groups - String currentChainId = biojavaChain.getChainID(); - int numBonds = 0; - for (Group loopGroup : biojavaChain.getAtomGroups()) { - currentGroup = loopGroup; - // Set the seq res group id - if(i==0){ - headerStruct.getSeqResGroupIds().add(seqResGroups.indexOf(currentGroup)); - } - // Get the pdb id - String res_id = currentGroup.getPDBName(); - // Get the atoms for this group - List atomsInThisGroup = encoderUtils.getAtomsForGroup(currentGroup); - // Get any bonds between groups - getInterGroupBond(atomsInThisGroup, totAtoms, atomCounter); - // Count the number of bonds - // Now loop through and get the coords - - // Generate the group level data - // Get the - List atomInfo = getAtomInfo(atomsInThisGroup); - // Get the atomic info required - bioStruct is the unique identifier of the group - int hashCode = getHashFromStringList(atomInfo); - // If we need bioStruct new information - if (hashToRes.containsKey(hashCode)==false){ - // Make a new group - PDBGroup outGroup = new PDBGroup(); - // Set the one letter code - outGroup.setSingleLetterCode(currentGroup.getChemComp().getOne_letter_code()); - // Set the group type - outGroup.setChemCompType(currentGroup.getChemComp().getType()); - outGroup.setGroupName(atomInfo.remove(0)); - outGroup.setAtomInfo(atomInfo); - // Now get the bond list (lengths, orders and indices) - createBondList(atomsInThisGroup, outGroup); - getCharges(atomsInThisGroup, outGroup); - // - bioStructMap.put(resCounter, outGroup); - hashToRes.put(hashCode, resCounter); - bioStruct.getResOrder().add(resCounter); - resCounter+=1; - numBonds = outGroup.getBondOrders().size(); - } - else{ - // Add this to the residue order - bioStruct.getResOrder().add(hashToRes.get(hashCode)); - numBonds = bioStructMap.get(hashToRes.get(hashCode)).getBondOrders().size(); - } - // Add the number of bonds - bondCounter+=numBonds; - - ResidueNumber residueNum = currentGroup.getResidueNumber(); - - // bioStruct data item corresponds to the PDB insertion code. - Character insertionCode = residueNum.getInsCode(); - if (insertionCode==null){ - bioStruct.get_atom_site_pdbx_PDB_ins_code().add(null); - } - else{ - bioStruct.get_atom_site_pdbx_PDB_ins_code().add(insertionCode.toString()); - } - - SecStrucState props = (SecStrucState) currentGroup.getProperty("secstruc"); - // Only assign secondary structure for the first model - if(i==0){ - if(props==null){ - bioStruct.getSecStruct().add(codeHolder.getDsspMap().get("NA")); - } - else{ - bioStruct.getSecStruct().add(codeHolder.getDsspMap().get(props.getType().name)); - } - } - // Now add the residue sequnece number - bioStruct.get_atom_site_auth_seq_id().add(residueNum.getSeqNum()); - // Set whether or not this is a calpha - List cAlphaGroup = new ArrayList(); - for (Atom currentAtom : atomsInThisGroup) { - // Update the structure - addAtomInfo(currentAtom, currentChainId, res_id, residueNum, biojavaChain); - // Update the calpha - updateCalpha(cAlphaGroup, currentAtom); - // Increment the atom counter - atomCounter+=1; - } - // Now add this group - if there is something to consider - addCalphaGroup(cAlphaGroup, props, residueNum); - } - // Increment again by one - chainCounter+=1; - } - } - // Set this final information in the total datastruct - bioStruct.setGroupList(bioStructList); - bioStruct.setGroupMap(bioStructMap); - calphaStruct.setGroupMap(calphaHashCodeToGroupMap); - // Now set this header info - headerStruct.setNumBonds(bondCounter+bioStruct.getInterGroupBondInds().size()); - headerStruct.setNumAtoms(atomCounter); - headerStruct.setNumChains(chainCounter); - headerStruct.setPdbCode(bioJavaStruct.getPDBCode()); - } - - - /** - * Find and store the entity information in the header structure. - * @param bioJavaStruct - */ - private void findEntityInfo(Structure bioJavaStruct) { - List entities = bioJavaStruct.getEntityInfos(); - // Get the list of chains for all the models - List structChains = new ArrayList<>(); - for (int i=0; i < bioJavaStruct.nrModels(); i++) { - structChains.addAll(bioJavaStruct.getChains(i)); - } - Entity[] entityList = new Entity[entities.size()]; - int entityCounter = 0; - for(EntityInfo entityInfo : entities) { - Entity newEntity = new Entity(); - // Get the indices for the chains in this guy - List entChains = entityInfo.getChains(); - int[] indexList = new int[entChains.size()]; - int counter = 0; - for(Chain entChain : entChains) { - int indexChain = structChains.indexOf(entChain); - indexList[counter] = indexChain; - counter++; - } - newEntity.setChainIndexList(indexList); - newEntity.setDescription(entityInfo.getDescription()); - newEntity.setType(entityInfo.getType().toString()); - if (entityInfo.getChains().size()==0){ - newEntity.setSequence(""); - } - else { - newEntity.setSequence(entityInfo.getChains().get(0).getSeqResSequence()); - } - entityList[entityCounter] = newEntity; - entityCounter++; - } - headerStruct.setEntityList(entityList); - } - - - - - /** - * Sets the header info. - * - * @param bioJavaStruct the new header info - */ - private void setHeaderInfo(Structure bioJavaStruct) { - headerStruct.setPdbCode(bioJavaStruct.getPDBCode()); - // Now get hte xtalographic info - PDBCrystallographicInfo xtalInfo = bioJavaStruct.getCrystallographicInfo(); - CrystalCell xtalCell = xtalInfo.getCrystalCell(); - SpaceGroup spaceGroup = xtalInfo.getSpaceGroup(); - float[] inputUnitCell = new float[6]; - if(xtalCell==null){ - - }else{ - headerStruct.setUnitCell(inputUnitCell); - inputUnitCell[0] = (float) xtalCell.getA(); - inputUnitCell[1] = (float) xtalCell.getB(); - inputUnitCell[2] = (float) xtalCell.getC(); - inputUnitCell[3] = (float) xtalCell.getAlpha(); - inputUnitCell[4] = (float) xtalCell.getBeta(); - inputUnitCell[5] = (float) xtalCell.getGamma(); - if(spaceGroup==null){ - // This could be the I21 shown here - headerStruct.setSpaceGroup("NA"); - } - else{ - headerStruct.setSpaceGroup(spaceGroup.getShortSymbol()); - } - } - // GET THE HEADER INFORMATION - PDBHeader header = bioJavaStruct.getPDBHeader(); - List outMap = generateSerializableBioAssembly(bioJavaStruct, header); - headerStruct.setBioAssembly(outMap); - headerStruct.setTitle(header.getTitle()); - headerStruct.setDescription(header.getDescription()); - headerStruct.setClassification(header.getClassification()); - headerStruct.setDepDate(header.getDepDate()); - headerStruct.setModDate(header.getModDate()); - headerStruct.setResolution(header.getResolution()); - headerStruct.setrFree(header.getRfree()); - - JournalArticle myJournal = header.getJournalArticle(); - if( myJournal==null){ - - } - else{ - headerStruct.setDoi(myJournal.getDoi()); - } - } - - - /** - * - * Function to generate the secondary structuee for a biojava structure object. - * - * @param bioJavaStruct the bio java struct - */ - private void genSecStruct(Structure bioJavaStruct) { - SecStrucCalc ssp = new SecStrucCalc(); - try{ - ssp.calculate(bioJavaStruct, true); - } - - catch(StructureException e) { - try{ - DSSPParser.fetch(bioJavaStruct.getPDBCode(), bioJavaStruct, true); //download from PDB the DSSP result - } - catch(FileNotFoundException enew){ - } - catch(Exception bige){ - System.out.println(bige); - } - } - - } - - - /** - * Adds the calpha group. - * - * @param cAlphaGroup the c alpha group - * @param props the props - * @param residueNum the residue number Biojava objext - * @param singleLetterCode the single letter code - */ - private void addCalphaGroup(List cAlphaGroup,SecStrucState props, ResidueNumber residueNum) { - // Generate a variable of the residue number - int thisResNum; - if(cAlphaGroup.size()>0){ - calphaGroupsPerChain[chainCounter] = calphaGroupsPerChain[chainCounter]+1; - List calphaAtomInfo = getAtomInfo(cAlphaGroup); - /// Now consider the C-Alpha, phosophate and ligand cases - int calphaHashCode = getHashFromStringList(calphaAtomInfo); - // If we need bioStruct new information - if (hashToCalphaRes.containsKey(calphaHashCode)==false){ - // Make a new group - PDBGroup outGroup = new PDBGroup(); - outGroup.setSingleLetterCode(currentGroup.getChemComp().getOne_letter_code()); - // - outGroup.setChemCompType(currentGroup.getChemComp().getType()); - outGroup.setGroupName(calphaAtomInfo.remove(0)); - outGroup.setAtomInfo(calphaAtomInfo); - // Now get the bond list (lengths, orders and indices) and atom charges - List bondIndices = new ArrayList(); - List bondOrders = new ArrayList(); - List atomCharges = new ArrayList(); - for(Atom a : cAlphaGroup){ - atomCharges.add((int) a.getCharge()); - List thisAtomBonds = a.getBonds(); - if(thisAtomBonds!=null){ - for (Bond b: thisAtomBonds) { - // Get the index - int thisInd = cAlphaGroup.indexOf(a); - int otherInd = cAlphaGroup.indexOf(b.getOther(a)); - if(otherInd!=-1){ - if(thisInd cAlphaGroup, Atom a) { - // NOW THE CALPHA / PHOSPHATE / LIGAND STUFF - // GET THE CALPHA - if (a.getName().equals("CA") && a.getElement().toString().equals("C")){ - // Now add the calpha - cAlphaGroup.add(a); - } - // GET THE PHOSPHATE - if(a.getName().equals("P")){ - cAlphaGroup.add(a); - } - // GET THE LIGANDS - if(currentGroup.isWater()==false && currentGroup.getType().name().equals("HETATM")){ - cAlphaGroup.add(a); - } - } - - - /** - * Functon to set the chain id. - * - * @param chainId the chain id - * @param charChainList the char chain list - * @param chainCounter the chain counter - */ - private void setChainId(String chainId, byte[] charChainList, int chainCounter) { - // A char array to store the chars - char[] outChar = new char[4]; - // The lenght of this chain id - int chainIdLen = chainId.length(); - chainId.getChars(0, chainIdLen, outChar, 0); - // Set the bytrarray - chain ids can be up to 4 chars - pad with empty bytes - charChainList[chainCounter*4+0] = (byte) outChar[0]; - if(chainIdLen>1){ - charChainList[chainCounter*4+1] = (byte) outChar[1]; - } - else{ - charChainList[chainCounter*4+1] = (byte) 0; - } - if(chainIdLen>2){ - charChainList[chainCounter*4+2] = (byte) outChar[2]; - } - else{ - charChainList[chainCounter*4+2] = (byte) 0; - } - if(chainIdLen>3){ - charChainList[chainCounter*4+3] = (byte) outChar[3]; - } - else{ - charChainList[chainCounter*4+3] = (byte) 0; - } - } - - - /** - * Add a new calpha / phosophate / ligand atom. - * - * @param a the a - * @param props the props - * @param residueNumber the residue number (Biojava group) - */ - private void addCalpha(Atom a, SecStrucState props, ResidueNumber residueNumber) { - calphaStruct.setNumAtoms(calphaStruct.getNumAtoms()+1); - calphaStruct.getCartn_x().add((int) Math.round(a.getX()*COORD_MULT)); - calphaStruct.getCartn_y().add((int) Math.round(a.getY()*COORD_MULT)); - calphaStruct.getCartn_z().add((int) Math.round(a.getZ()*COORD_MULT)); - // Get the residue name - calphaStruct.get_atom_site_auth_seq_id().add(residueNumber.getSeqNum()); - calphaStruct.get_atom_site_label_entity_poly_seq_num().add(residueNumber.getSeqNum()); - // Now set the sec structure - // - if(props==null){ - calphaStruct.getSecStruct().add(codeHolder.getDsspMap().get("NA")); - - } - else{ - calphaStruct.getSecStruct().add(codeHolder.getDsspMap().get(props.getType().name)); - } - - } - - - /** - * Find the atomic charge information. - * - * @param inputAtoms the atoms - * @param pdbGroup the PDBGroup being considered - * @return the atomic charges - */ - private void getCharges(List inputAtoms, PDBGroup pdbGroup) { - for(Atom a: inputAtoms){ - pdbGroup.getAtomCharges().add((int) a.getCharge()); - } - - } - - - /** - * Generate a serializable biotransformation for storing - * in the messagepack. - * - * @param bioJavaStruct the Biojava structure - * @param header the header - * @return a map of the bioassembly information that is serializable - */ - private List generateSerializableBioAssembly(Structure bioJavaStruct, PDBHeader header) { - // Here we need to iterate through and get the chain ids and the matrices - Map inputBioAss = header.getBioAssemblies(); - List outMap = new ArrayList(); - - - for (Map.Entry entry : inputBioAss.entrySet()) { - Map matSet = new HashMap(); - BioAssemblyInfo value = entry.getValue(); - // Make a new one of these - BioAssemblyData newValue = new BioAssemblyData(); - outMap.add(newValue); - // Copy across this info - List outTrans = new ArrayList(); - for(BiologicalAssemblyTransformation transform: value.getTransforms()){ - - // Get's the chain id -> this is the asym id - String thisChain = transform.getChainId(); - // Get the current matrix 4d - Matrix4d currentTransMat = transform.getTransformationMatrix(); - double[] outList = new double[16]; - // Iterate over the matrix - for(int i=0; i<4; i++){ - for(int j=0; j<4; j++){ - // Now set this element - outList[i*4+j] = currentTransMat.getElement(i,j); - } - } - if(matSet.containsKey(currentTransMat)){ - // Get it - BioAssemblyTrans bioTransNew = matSet.get(currentTransMat); - bioTransNew.getChainIdList().add(thisChain); - } - else{ - // Create a new one - BioAssemblyTrans bioTransNew = new BioAssemblyTrans(); - bioTransNew.setTransformation(outList); - bioTransNew.getChainIdList().add(thisChain); - matSet.put(currentTransMat, bioTransNew); - } - } - for(BioAssemblyTrans thisTrans: matSet.values()){ - outTrans.add(thisTrans); - } - // Set the transform information - newValue.setTransforms(outTrans); - } - - return outMap; - } - - - /** - * Function to find a hash code from a list of strings. - * - * @param strings the strings - * @return the hash from string list - */ - private int getHashFromStringList(List strings){ - int prime = 31; - int result = 1; - for( String s : strings ) - { - result = result * prime + s.hashCode(); - } - return result; - } - - - - - /** - * Get the atomic information from a list of Atoms. - * - * @param atomList the atom list - * @return the atom info - */ - private List getAtomInfo(List atomList){ - int numAtoms = atomList.size(); - int arraySize = numAtoms*2+2; - List outString = new ArrayList(arraySize); - // Get the group name - outString.add(atomList.get(0).getGroup().getPDBName()); - for (Atom a: atomList){ - outString.add(a.getElement().toString()); - outString.add(a.getName()); - } - return outString; - } - - - /** - * Find bonds between groups. - * - * @param atoms the atoms - * @param totAtoms the tot atoms - * @param atomCounter the atom counter - * @return the inter group bond - */ - private void getInterGroupBond(List inputAtomsInThisGroup, List totAtoms, int atomCounter){ - // Get the atoms - for (Atom currentAtom : inputAtomsInThisGroup) { - // Get the atom - List currentAtomBonds = currentAtom.getBonds(); - if(currentAtomBonds!=null){ - for (Bond currentBond: currentAtomBonds) { - Atom other = currentBond.getOther(currentAtom); - int index = inputAtomsInThisGroup.indexOf(other); - int order = currentBond.getBondOrder(); - if (index<0 || index >= totAtoms.size()){ - // Get the index of hte atom ins the total list - int newInd = totAtoms.indexOf(other); - if(newInd > -1){ - // Check if it exists in tot bonds - if(totBonds.indexOf(currentBond)!=-1){ - continue; - } - // Otherwise add it to the list - totBonds.add(currentBond); - // Then add this inter group bond - bioStruct.getInterGroupBondInds().add(newInd); - bioStruct.getInterGroupBondInds().add(totAtoms.indexOf(currentAtom)); - bioStruct.getInterGroupBondOrders().add(order); - } - } - } - } - - } - } - - /** - * Generate lists for the bonds in the group. - * - * @param atoms the atoms - * @param outGroup the out group - */ - private void createBondList(List atoms, PDBGroup outGroup) { - int n = atoms.size(); - if (n == 0) { - System.out.println("creating empty bond list"); - } - - // Lists to hold bond indices and orders - List bondList = new ArrayList(); - List bondOrder = new ArrayList(); - - List> totalBondList = new ArrayList>(); - - for (int i = 0; i < n; i++) { - // Get the atom - Atom a = atoms.get(i); - List thisAtomBonds = a.getBonds(); - if(thisAtomBonds!=null){ - for (Bond b: thisAtomBonds) { - Atom other = b.getOther(a); - int index = atoms.indexOf(other); - int order = b.getBondOrder(); - // Now build this to check if the indices - List thisArr = new ArrayList(); - thisArr.add(index); - thisArr.add(i); - Collections.sort(thisArr); - // Now check if we've done it - if(totalBondList.contains(thisArr)){ - continue; - } - if (index != -1) { - // Add the information - bondList.add(index); - bondList.add(i); - bondOrder.add(order); - } - totalBondList.add(thisArr); - } - } - } - outGroup.setBondOrders(bondOrder); - outGroup.setBondIndices(bondList); - } - - - /** - * Adds the atom info. - * - * @param inputAtom the input atom - * @param inputChainId the input chain id - * @param inputResidueId the input residue id - * @param residueNumber the residue number - * @param biojavaChain the input chain - */ - private void addAtomInfo(Atom inputAtom, String inputChainId, String inputResidueId, - ResidueNumber residueNumber, Chain biojavaChain) { - - bioStruct.get_atom_site_id().add(inputAtom.getPDBserial()); - // Atom symbol - Element ele = inputAtom.getElement(); - bioStruct.get_atom_site_symbol().add(ele.toString()); - bioStruct.get_atom_site_asym_id().add(inputChainId); - // identify coordinate records (e.g. ATOM or HETATM). - bioStruct.get_atom_site_group_PDB().add(MY_MAP.get(inputAtom.getGroup().getType().toString())); - // bioStruct item is a uniquely identifies for each alternative site for - // bioStruct atom position. - if (inputAtom.getAltLoc()==" ".charAt(0)){ - bioStruct.get_atom_site_label_alt_id().add("?"); - } - else{ - bioStruct.get_atom_site_label_alt_id().add(inputAtom.getAltLoc().toString()); - } - // bioStruct data item is reference to item _struct_asym.id defined in - // category STRUCT_ASYM. bioStruct item identifies an instance of - // particular entity in the deposited coordinate set. For a - // structure determined by crystallographic method bioStruct corresponds - // to a unique identifier within the cyrstallographic asymmetric - // unit. - bioStruct.get_atom_site_label_asym_id().add(biojavaChain.getInternalChainID().toString()); - // bioStruct data item is a reference to item _chem_comp_atom.atom_id - // defined in category CHEM_COMP_ATOM which is stored in the - // Chemical Component Dictionary. bioStruct atom identifier uniquely - // identifies each atom within each chemical component. - bioStruct.get_atom_site_label_atom_id().add(inputAtom.getName()); - // bioStruct data item is a reference to item _chem_comp.id defined in - // category CHEM_COMP. bioStruct item is the primary identifier for - // chemical components which may either be mononers in a polymeric - // entity or complete non-polymer entities. - bioStruct.get_atom_site_label_comp_id().add(inputAtom.getGroup().getPDBName()); - // bioStruct data item is a reference to _entity.id defined in the ENTITY - // category. bioStruct item is used to identify chemically distinct - // portions of the molecular structure (e.g. polymer chains, - // ligands, solvent). - bioStruct.get_atom_site_label_entity_id().add(MY_MAP.get(inputAtom.getGroup().getType().toString())); - // bioStruct data item is a reference to _entity_poly_seq.num defined in - // the ENTITY_POLY_SEQ category. bioStruct item is used to maintain the - // correspondence between the chemical sequence of a polymeric - // entity and the sequence information in the coordinate list and in - // may other structural categories. bioStruct identifier has no meaning - // for non-polymer entities. - bioStruct.get_atom_site_label_entity_poly_seq_num().add(residueNumber.getSeqNum()); - // Cartesian coordinate components describing the position of bioStruct - // atom site. - bioStruct.get_atom_site_Cartn_x().add(inputAtom.getX()); - bioStruct.get_atom_site_Cartn_y().add(inputAtom.getY()); - bioStruct.get_atom_site_Cartn_z().add(inputAtom.getZ()); - // Isotropic atomic displacement parameter - bioStruct.get_atom_site_B_iso_or_equiv().add(inputAtom.getTempFactor()); - // The fraction of the atom present at bioStruct atom position. - bioStruct.get_atom_site_occupancy().add(inputAtom.getOccupancy()); - // The net integer charge assigned to bioStruct atom. - } - - - - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/package-info.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/package-info.java deleted file mode 100644 index cad8d86..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/biojavaencoder/package-info.java +++ /dev/null @@ -1,6 +0,0 @@ -/** - * The core package to encode a structure to MMTF - * @author Anthony Bradley - * - */ -package org.rcsb.mmtf.biojavaencoder; \ No newline at end of file diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/BioBean.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/BioBean.java deleted file mode 100644 index 96d078c..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/BioBean.java +++ /dev/null @@ -1,10 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -/** - * An interface to the basic data structure for a data structure - * @author Anthony Bradley - * - */ -public interface BioBean { - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/BioDataStruct.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/BioDataStruct.java deleted file mode 100644 index ee65e52..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/BioDataStruct.java +++ /dev/null @@ -1,56 +0,0 @@ -package org.rcsb.mmtf.dataholders; -import java.lang.reflect.InvocationTargetException; - -import org.apache.commons.beanutils.BeanUtils; - -/** - * Class to store the basic biological data from an MMCIF file. - * - * @author Anthony Bradley - */ -public class BioDataStruct extends BioDataStructBean implements CoreSingleStructure { - - /** - * Instantiates a new bio data struct. - */ - public BioDataStruct() { - } - - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.CoreSingleStructure#findDataAsBean() - */ - public BioDataStructBean findDataAsBean() { - // Cast this to the pure data - BioDataStructBean newData = new BioDataStructBean(); - try { - BeanUtils.copyProperties(newData, this); - } catch (IllegalAccessException e) { - e.printStackTrace(); - throw new RuntimeException(e); - } catch (InvocationTargetException e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - return newData; - } - - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.CoreSingleStructure#findNumAtoms() - */ - public int findNumAtoms() { - return get_atom_site_Cartn_x().size(); - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.CoreSingleStructure#findStructureCode() - */ - @Override - public String findStructureCode() { - // Get the PDB code - return this.getPdbCode(); - } - - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/BioDataStructBean.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/BioDataStructBean.java deleted file mode 100644 index 6d699f6..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/BioDataStructBean.java +++ /dev/null @@ -1,214 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import java.util.ArrayList; -import java.util.List; - -/** - * A bean to store the information about the protein structure. - * - * @author Anthony Bradley - */ -public class BioDataStructBean extends NoCoordDataStruct implements BioBean { - - - // Coordinate infroramtion - /** The _atom_site_cartn_x. */ - protected List _atom_site_Cartn_x = new ArrayList(); - - /** The _atom_site_ cartn_y. */ - protected List _atom_site_Cartn_y = new ArrayList(); - - /** The _atom_site_ cartn_z. */ - protected List _atom_site_Cartn_z = new ArrayList(); - - /** The _atom_site_ b_iso_or_equiv. */ - // Isotropic atomic displacement parameter - protected List _atom_site_B_iso_or_equiv= new ArrayList(); - - /** The _atom_site_occupancy. */ - // The fraction of the atom present at this atom position_ - protected List _atom_site_occupancy= new ArrayList(); - - /** The secondary structure list. */ - // An array to store the secondary structure data - private List secStruct = new ArrayList(); - - /** The residue order list. */ - // An array to store the sequence of residues - private List resOrder = new ArrayList(); - - /** The inter-group bond indicess. */ - // Arrays to store the indices and bond orders of inter residue bonds - private List interGroupBondInds = new ArrayList(); - - /** The inter-group bond orders. */ - private List interGroupBondOrders = new ArrayList(); - - - /** - * Gets the _atom_site_id. - * - * @return the _atom_site_id - */ - public List get_atom_site_id() { - return _atom_site_id; - } - - /** - * Sets the _atom_site_id. - * - * @param _atom_site_id the new _atom_site_id - */ - public void set_atom_site_id(List _atom_site_id) { - this._atom_site_id = _atom_site_id; - } - - /** The _atom_site_id. */ - private List _atom_site_id = new ArrayList(); - - /** - * Gets the _atom_site_ cartn_x. - * - * @return the _atom_site_ cartn_x - */ - public List get_atom_site_Cartn_x() { - return _atom_site_Cartn_x; - } - - /** - * Sets the _atom_site_ cartn_x. - * - * @param _atom_site_Cartn_x the new _atom_site_ cartn_x - */ - public void set_atom_site_Cartn_x(ArrayList _atom_site_Cartn_x) { - this._atom_site_Cartn_x = _atom_site_Cartn_x; - } - - /** - * Gets the _atom_site_ cartn_y. - * - * @return the _atom_site_ cartn_y - */ - public List get_atom_site_Cartn_y() { - return _atom_site_Cartn_y; - } - - /** - * Sets the _atom_site_ cartn_y. - * - * @param _atom_site_Cartn_y the new _atom_site_ cartn_y - */ - public void set_atom_site_Cartn_y(ArrayList _atom_site_Cartn_y) { - this._atom_site_Cartn_y = _atom_site_Cartn_y; - } - - /** - * Gets the _atom_site_ cartn_z. - * - * @return the _atom_site_ cartn_z - */ - public List get_atom_site_Cartn_z() { - return _atom_site_Cartn_z; - } - - /** - * Sets the _atom_site_ cartn_z. - * - * @param _atom_site_Cartn_z the new _atom_site_ cartn_z - */ - public void set_atom_site_Cartn_z(ArrayList _atom_site_Cartn_z) { - this._atom_site_Cartn_z = _atom_site_Cartn_z; - } - - /** - * Gets the _atom_site_ b_iso_or_equiv. - * - * @return the _atom_site_ b_iso_or_equiv - */ - public List get_atom_site_B_iso_or_equiv() { - return _atom_site_B_iso_or_equiv; - } - - /** - * Sets the _atom_site_ b_iso_or_equiv. - * - * @param _atom_site_B_iso_or_equiv the new _atom_site_ b_iso_or_equiv - */ - public void set_atom_site_B_iso_or_equiv(ArrayList _atom_site_B_iso_or_equiv) { - this._atom_site_B_iso_or_equiv = _atom_site_B_iso_or_equiv; - } - - /** - * Gets the _atom_site_occupancy. - * - * @return the _atom_site_occupancy - */ - public List get_atom_site_occupancy() { - return _atom_site_occupancy; - } - - /** - * Sets the _atom_site_occupancy. - * - * @param _atom_site_occupancy the new _atom_site_occupancy - */ - public void set_atom_site_occupancy(ArrayList _atom_site_occupancy) { - this._atom_site_occupancy = _atom_site_occupancy; - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.NoCoordDataStruct#getResOrder() - */ - public List getResOrder() { - return resOrder; - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.NoCoordDataStruct#setResOrder(java.util.List) - */ - public void setResOrder(List resOrder) { - this.resOrder = resOrder; - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.NoCoordDataStruct#getSecStruct() - */ - public List getSecStruct() { - return secStruct; - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.NoCoordDataStruct#setSecStruct(java.util.List) - */ - public void setSecStruct(List secStruct) { - this.secStruct = secStruct; - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.NoCoordDataStruct#getInterGroupBondOrders() - */ - public List getInterGroupBondOrders() { - return interGroupBondOrders; - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.NoCoordDataStruct#setInterGroupBondOrders(java.util.List) - */ - public void setInterGroupBondOrders(List interGroupBondOrders) { - this.interGroupBondOrders = interGroupBondOrders; - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.NoCoordDataStruct#getInterGroupBondInds() - */ - public List getInterGroupBondInds() { - return interGroupBondInds; - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.NoCoordDataStruct#setInterGroupBondInds(java.util.List) - */ - public void setInterGroupBondInds(List interGroupBondInds) { - this.interGroupBondInds = interGroupBondInds; - } -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CalphaBean.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CalphaBean.java deleted file mode 100644 index 738f80a..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CalphaBean.java +++ /dev/null @@ -1,255 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.rcsb.mmtf.dataholders.PDBGroup; - -/** - * A bean to store C-Alpha / DNA backbone and ligand information. - * - * @author Anthony Bradley - */ -public class CalphaBean { - - /** The number of bonds. */ - // Two integers to store the number of bonds and numeb - private int numBonds = 0; - - /** The number of atoms. */ - private int numAtoms = 0; - - /** The map of residue codes to groups. */ - // Convert this information - private Map groupMap = new HashMap(); - - /** The _atom_site_auth_seq_id. */ - // Delta and run length - private List _atom_site_auth_seq_id = new ArrayList(); - - /** The _atom_site_label_entity_poly_seq_num. */ - // Delta and run length encoded - private List _atom_site_label_entity_poly_seq_num = new ArrayList(); - - /** The residue order. */ - private List resOrder = new ArrayList(); - - /** The X coords. */ - // The list of c-alpha coords - private List cartn_x = new ArrayList(); - - /** The Y coords. */ - private List cartn_y = new ArrayList(); - - /** The Z coords. */ - private List cartn_z = new ArrayList(); - - /** The secondary structure list. */ - //secondary structure (on a per reisude basis - private List secStruct = new ArrayList(); - - /** The number of groups per chain. */ - // A list of integers indicating the number of groups in a chain - private int[] groupsPerChain; - - /** - * Gets the cartn_x. - * - * @return the cartn_x - */ - // Now the getters and setters - public List getCartn_x() { - return cartn_x; - } - - /** - * Sets the cartn_x. - * - * @param cartn_x the new cartn_x - */ - public void setCartn_x(List cartn_x) { - this.cartn_x = cartn_x; - } - - /** - * Gets the cartn_y. - * - * @return the cartn_y - */ - public List getCartn_y() { - return cartn_y; - } - - /** - * Sets the cartn_y. - * - * @param cartn_y the new cartn_y - */ - public void setCartn_y(List cartn_y) { - this.cartn_y = cartn_y; - } - - /** - * Gets the cartn_z. - * - * @return the cartn_z - */ - public List getCartn_z() { - return cartn_z; - } - - /** - * Sets the cartn_z. - * - * @param cartn_z the new cartn_z - */ - public void setCartn_z(List cartn_z) { - this.cartn_z = cartn_z; - } - - /** - * Gets the sec struct. - * - * @return the sec struct - */ - public List getSecStruct() { - return secStruct; - } - - /** - * Sets the sec struct. - * - * @param secStruct the new sec struct - */ - public void setSecStruct(List secStruct) { - this.secStruct = secStruct; - } - - /** - * Gets the _atom_site_auth_seq_id. - * - * @return the _atom_site_auth_seq_id - */ - public List get_atom_site_auth_seq_id() { - return _atom_site_auth_seq_id; - } - - /** - * Sets the _atom_site_auth_seq_id. - * - * @param _atom_site_auth_seq_id the new _atom_site_auth_seq_id - */ - public void set_atom_site_auth_seq_id(List _atom_site_auth_seq_id) { - this._atom_site_auth_seq_id = _atom_site_auth_seq_id; - } - - /** - * Gets the _atom_site_label_entity_poly_seq_num. - * - * @return the _atom_site_label_entity_poly_seq_num - */ - public List get_atom_site_label_entity_poly_seq_num() { - return _atom_site_label_entity_poly_seq_num; - } - - /** - * Sets the _atom_site_label_entity_poly_seq_num. - * - * @param _atom_site_label_entity_poly_seq_num the new _atom_site_label_entity_poly_seq_num - */ - public void set_atom_site_label_entity_poly_seq_num(List _atom_site_label_entity_poly_seq_num) { - this._atom_site_label_entity_poly_seq_num = _atom_site_label_entity_poly_seq_num; - } - - /** - * Gets the res order. - * - * @return the res order - */ - public List getResOrder() { - return resOrder; - } - - /** - * Sets the res order. - * - * @param resOrder the new res order - */ - public void setResOrder(List resOrder) { - this.resOrder = resOrder; - } - - /** - * Gets the group map. - * - * @return the group map - */ - public Map getGroupMap() { - return groupMap; - } - - /** - * Sets the group map. - * - * @param groupMap the group map - */ - public void setGroupMap(Map groupMap) { - this.groupMap = groupMap; - } - - /** - * Gets the groups per chain. - * - * @return the groups per chain - */ - public int[] getGroupsPerChain() { - return groupsPerChain; - } - - /** - * Sets the groups per chain. - * - * @param groupsPerChain the new groups per chain - */ - public void setGroupsPerChain(int[] groupsPerChain) { - this.groupsPerChain = groupsPerChain; - } - - /** - * Gets the num atoms. - * - * @return the num atoms - */ - public int getNumAtoms() { - return numAtoms; - } - - /** - * Sets the num atoms. - * - * @param numAtoms the new num atoms - */ - public void setNumAtoms(int numAtoms) { - this.numAtoms = numAtoms; - } - - /** - * Gets the num bonds. - * - * @return the num bonds - */ - public int getNumBonds() { - return numBonds; - } - - /** - * Sets the num bonds. - * - * @param numBonds the new num bonds - */ - public void setNumBonds(int numBonds) { - this.numBonds = numBonds; - } -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CalphaDistBean.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CalphaDistBean.java deleted file mode 100644 index 7dc085b..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CalphaDistBean.java +++ /dev/null @@ -1,519 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.rcsb.mmtf.dataholders.BioAssemblyData; -import org.rcsb.mmtf.dataholders.PDBGroup; - -/** - * A bean to store C-Alpha / DNA backbone and ligand information - in a format that can - * then be efficiently sent as messagepack. - * - * @author abradley - */ -public class CalphaDistBean { - - /** The mmtf version. */ - // The version of the format - private String mmtfVersion = "0.1"; - - /** The mmtf producer. */ - // The producer - private String mmtfProducer; - - /** The num bonds. */ - // The number of bonds - private int numBonds; - - /** The pdb id. */ - // The PDBCode - private String pdbId; - - /** The title. */ - // The title of the structure - private String title; - - /** The space group. */ - // String for the space group - private String spaceGroup; - - /** The unit cell. */ - // The unit cell information - private float[] unitCell; - - /** The bio assembly. */ - // A map of Bioassembly -> new class so serializable - private List bioAssembly = new ArrayList(); - - /** The group map. */ - // The list of sequence information - private Map groupMap = new HashMap(); - - /** The group num list. */ - // Delta and run length - private byte[] groupIdList; - - /** The group type list. */ - private byte[] groupTypeList; - - /** The sec struct list. */ - private byte[] secStructList; - - /** The x coord big. */ - // For the big arrays split into two -> one of 32 bit ints, one of 16 - private byte[] xCoordBig; - - /** The y coord big. */ - private byte[] yCoordBig; - - /** The z coord big. */ - private byte[] zCoordBig; - - /** The x coord small. */ - // Now for the small ints -> 16 bit - private byte[] xCoordSmall; - - /** The y coord small. */ - private byte[] yCoordSmall; - - /** The z coord small. */ - private byte[] zCoordSmall; - // Add this header info - /** The num atoms. */ - // Total data for memory allocation - private int numAtoms; - - /** The chains per model. */ - // Add this to store the model information - private int[] chainsPerModel; - - /** The chain list. */ - // List to store the chainids - private byte[] chainIdList; - - /** The groups per chain. */ - // List to store the number of groups per chain - private int[] groupsPerChain; - - /** The one letter amin seq. */ - // Store the one letter amino acid sequence of the protein - private char[] oneLetterAminSeq; - - /** - * Gets the pdb id. - * - * @return the pdb id - */ - public String getPdbId() { - return pdbId; - } - - /** - * Sets the pdb id. - * - * @param pdbId the new pdb id - */ - public void setPdbId(String pdbId) { - this.pdbId = pdbId; - } - - /** - * Gets the title. - * - * @return the title - */ - public String getTitle() { - return title; - } - - /** - * Sets the title. - * - * @param title the new title - */ - public void setTitle(String title) { - this.title = title; - } - - /** - * Gets the space group. - * - * @return the space group - */ - public String getSpaceGroup() { - return spaceGroup; - } - - /** - * Sets the space group. - * - * @param spaceGroup the new space group - */ - public void setSpaceGroup(String spaceGroup) { - this.spaceGroup = spaceGroup; - } - - /** - * Gets the unit cell. - * - * @return the unit cell - */ - public float[] getUnitCell() { - return unitCell; - } - - /** - * Sets the unit cell. - * - * @param unitCell the new unit cell - */ - public void setUnitCell(float[] inputUnitCell) { - this.unitCell = inputUnitCell; - } - - /** - * Gets the bio assembly. - * - * @return the bio assembly - */ - public List getBioAssembly() { - return bioAssembly; - } - - /** - * Sets the bio assembly. - * - * @param bioAssembly the bio assembly - */ - public void setBioAssembly(List bioAssembly) { - this.bioAssembly = bioAssembly; - } - - /** - * Gets the num atoms. - * - * @return the num atoms - */ - public int getNumAtoms() { - return numAtoms; - } - - /** - * Sets the num atoms. - * - * @param numAtoms the new num atoms - */ - public void setNumAtoms(int numAtoms) { - this.numAtoms = numAtoms; - } - - /** - * Gets the chains per model. - * - * @return the chains per model - */ - public int[] getChainsPerModel() { - return chainsPerModel; - } - - /** - * Sets the chains per model. - * - * @param chainsPerModel the new chains per model - */ - public void setChainsPerModel(int[] chainsPerModel) { - this.chainsPerModel = chainsPerModel; - } - - /** - * Gets the chain list. - * - * @return the chain list - */ - public byte[] getChainIdList() { - return chainIdList; - } - - /** - * Sets the chain list. - * - * @param chainList the new chain list - */ - public void setChainIdList(byte[] chainList) { - this.chainIdList = chainList; - } - - /** - * Gets the groups per chain. - * - * @return the groups per chain - */ - public int[] getGroupsPerChain() { - return groupsPerChain; - } - - /** - * Sets the groups per chain. - * - * @param groupsPerChain the new groups per chain - */ - public void setGroupsPerChain(int[] groupsPerChain) { - this.groupsPerChain = groupsPerChain; - } - - /** - * Gets the group map. - * - * @return the group map - */ - public Map getGroupMap() { - return groupMap; - } - - /** - * Sets the group map. - * - * @param groupMap the group map - */ - public void setGroupMap(Map groupMap) { - this.groupMap = groupMap; - } - - /** - * Gets the group num list. - * - * @return the group num list - */ - public byte[] getGroupIdList() { - return groupIdList; - } - - /** - * Sets the group num list. - * - * @param _atom_site_auth_seq_id the new group num list - */ - public void setGroupIdList(byte[] _atom_site_auth_seq_id) { - this.groupIdList = _atom_site_auth_seq_id; - } - - /** - * Gets the group type list. - * - * @return the group type list - */ - public byte[] getGroupTypeList() { - return groupTypeList; - } - - /** - * Sets the group type list. - * - * @param resOrder the new group type list - */ - public void setGroupTypeList(byte[] resOrder) { - this.groupTypeList = resOrder; - } - - /** - * Gets the sec struct list. - * - * @return the sec struct list - */ - public byte[] getSecStructList() { - return secStructList; - } - - /** - * Sets the sec struct list. - * - * @param secStruct the new sec struct list - */ - public void setSecStructList(byte[] secStruct) { - this.secStructList = secStruct; - } - - /** - * Gets the x coord big. - * - * @return the x coord big - */ - public byte[] getxCoordBig() { - return xCoordBig; - } - - /** - * Sets the x coord big. - * - * @param cartn_x_big the new x coord big - */ - public void setxCoordBig(byte[] cartn_x_big) { - this.xCoordBig = cartn_x_big; - } - - /** - * Gets the y coord big. - * - * @return the y coord big - */ - public byte[] getyCoordBig() { - return yCoordBig; - } - - /** - * Sets the y coord big. - * - * @param cartn_y_big the new y coord big - */ - public void setyCoordBig(byte[] cartn_y_big) { - this.yCoordBig = cartn_y_big; - } - - /** - * Gets the z coord big. - * - * @return the z coord big - */ - public byte[] getzCoordBig() { - return zCoordBig; - } - - /** - * Sets the z coord big. - * - * @param cartn_z_big the new z coord big - */ - public void setzCoordBig(byte[] cartn_z_big) { - this.zCoordBig = cartn_z_big; - } - - /** - * Gets the x coord small. - * - * @return the x coord small - */ - public byte[] getxCoordSmall() { - return xCoordSmall; - } - - /** - * Sets the x coord small. - * - * @param cartn_x_small the new x coord small - */ - public void setxCoordSmall(byte[] cartn_x_small) { - this.xCoordSmall = cartn_x_small; - } - - /** - * Gets the y coord small. - * - * @return the y coord small - */ - public byte[] getyCoordSmall() { - return yCoordSmall; - } - - /** - * Sets the y coord small. - * - * @param cartn_y_small the new y coord small - */ - public void setyCoordSmall(byte[] cartn_y_small) { - this.yCoordSmall = cartn_y_small; - } - - /** - * Gets the z coord small. - * - * @return the z coord small - */ - public byte[] getzCoordSmall() { - return zCoordSmall; - } - - /** - * Sets the z coord small. - * - * @param cartn_z_small the new z coord small - */ - public void setzCoordSmall(byte[] cartn_z_small) { - this.zCoordSmall = cartn_z_small; - } - - /** - * Gets the mmtf version. - * - * @return the mmtf version - */ - public String getMmtfVersion() { - return mmtfVersion; - } - - /** - * Sets the mmtf version. - * - * @param mmtfVersion the new mmtf version - */ - public void setMmtfVersion(String mmtfVersion) { - this.mmtfVersion = mmtfVersion; - } - - /** - * Gets the mmtf producer. - * - * @return the mmtf producer - */ - public String getMmtfProducer() { - return mmtfProducer; - } - - /** - * Sets the mmtf producer. - * - * @param mmtfProducer the new mmtf producer - */ - public void setMmtfProducer(String mmtfProducer) { - this.mmtfProducer = mmtfProducer; - } - - /** - * Gets the num bonds. - * - * @return the num bonds - */ - public int getNumBonds() { - return numBonds; - } - - /** - * Sets the num bonds. - * - * @param numBonds the new num bonds - */ - public void setNumBonds(int numBonds) { - this.numBonds = numBonds; - } - - /** - * Gets the one letter amin seq. - * - * @return the one letter amin seq - */ - public char[] getOneLetterAminSeq() { - return oneLetterAminSeq; - } - - /** - * Sets the one letter amin seq. - * - * @param oneLetterAminSeq the new one letter amin seq - */ - public void setOneLetterAminSeq(char[] oneLetterAminSeq) { - this.oneLetterAminSeq = oneLetterAminSeq; - } - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CoreDataStruct.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CoreDataStruct.java deleted file mode 100644 index 9aadaf2..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CoreDataStruct.java +++ /dev/null @@ -1,54 +0,0 @@ -package org.rcsb.mmtf.dataholders; - - -/** - * A basic class to hold the basic information about a protein. - * - * @author Anthony Bradley - */ -public class CoreDataStruct { - - /** The pdb code. */ - // The core data in all modules - protected String pdbCode = null; - - /** - * Gets the pdb code. - * - * @return the pdb code - */ - public String getPdbCode() { - return pdbCode; - } - - /** - * Sets the pdb code. - * - * @param pdbCode the new pdb code - */ - public void setPdbCode(String pdbCode) { - this.pdbCode = pdbCode; - } - - /** The number of models. */ - private int numModels = 0; - - /** - * Gets the num models. - * - * @return the num models - */ - public int getNumModels() { - return numModels; - } - - /** - * Sets the num models. - * - * @param numModels the new num models - */ - public void setNumModels(int numModels) { - this.numModels = numModels; - } - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CoreSingleStructure.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CoreSingleStructure.java deleted file mode 100644 index 412afb3..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/CoreSingleStructure.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import java.lang.reflect.InvocationTargetException; - -/** - * An interface required for core single structure types - * @author abradley - * - */ -public interface CoreSingleStructure { - // Interface to deal with core structural data - // Return the data as a HashMap - - /** - * Function to return the data as a bean - * @return the data as a bean - * @throws IllegalAccessException - * @throws InvocationTargetException - */ - BioBean findDataAsBean(); - - /** - * Function to return the structure code - * @return - */ - String findStructureCode(); - - /** - * Function to return the number of atoms - * @return - */ - int findNumAtoms(); -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/HeaderBean.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/HeaderBean.java deleted file mode 100644 index c55e851..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/HeaderBean.java +++ /dev/null @@ -1,631 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import java.util.Date; -import java.util.List; - -import org.rcsb.mmtf.dataholders.BioAssemblyData; - -/** - * Class to store header information in a bean that can be converted to a messagepack. - * - * @author abradley - */ -public class HeaderBean { - - /** The number of chains. */ - // The number of chains - private int numChains; - - /** The number of atoms. */ - // The number of atoms - private int numAtoms; - - /** The number of bonds. */ - // The number of bonds - private int numBonds; - - /** The id code. */ - // The PDBCode - private String idCode; - - /** The title. */ - // The title of the structure - private String title; - - /** The description. */ - // PDB HEADER INFORMATION - private String description; - - /** The classification. */ - private String classification; - - /** The depositon date. */ - private Date depDate; - - /** The modified date. */ - private Date modDate; - - /** The resolution.-1.0 if not available */ - private float resolution; - - /** The r free. -1.0 if not available*/ - private float rFree; - - /** The r work. -1.0 if not available */ - private float rWork; - - /** The doi. */ - private String doi; - - /** The authors. */ - private String authors; - - /** The SEQRES sequence, per asym chain. */ - private List sequence; - - /** The SeqRes group ids. */ - private List seqResGroupIds; - - /** The experimental method(s). */ - private List experimentalMethods; - - /** The chains per model. */ - // Add this to store the model information - private int[] chainsPerModel; - - /** The asym chains per model. */ - private int[] asymChainsPerModel; - - /** The chain list. */ - // List to store the chainids - private byte[] chainList; - - /** The asym chain list. */ - // List to store the chainids - private byte[] asymChainList; - - /** The groups per chain. */ - // List to store the number of groups per chain - private int[] groupsPerChain; - - /** The asym groups per chain. */ - private int[] asymGroupsPerChain; - - /** The space group. */ - // LOTS OF OTHER STUFF HERE -> TBD - private String spaceGroup; - - /** The list of entities in this structure. */ - private Entity[] entityList; - - /** The pdbx_description of a given chain (entity) */ - private String[] chainDescriptionList; - - /** The type of a given chain (entity) */ - private String[] chainTypeList; - - /** The unit cell. */ - private float[] unitCell; - - /** The bio assembly. */ - private List bioAssembly; - - public HeaderBean() { - resolution = (float) -1.0; - - rFree = (float) -1.0; - - rWork = (float) -1.0; - } - - // Getters and setters - /** - * Gets the number of chains. - * - * @return the number of chains - */ - public int getNumChains() { - return numChains; - } - - /** - * Sets the number of chains. - * - * @param numChains the new number of chains - */ - public void setNumChains(int numChains) { - this.numChains = numChains; - } - - /** - * Gets the number of atoms. - * - * @return the number of atoms - */ - public int getNumAtoms() { - return numAtoms; - } - - /** - * Sets the number of atoms. - * - * @param numAtoms the new number of atoms - */ - public void setNumAtoms(int numAtoms) { - this.numAtoms = numAtoms; - } - - /** - * Gets the pdb code. - * - * @return the pdb code - */ - public String getPdbCode() { - return idCode; - } - - /** - * Sets the pdb code. - * - * @param pdbCode the new pdb code - */ - public void setPdbCode(String pdbCode) { - this.idCode = pdbCode; - } - - /** - * Gets the space group. - * - * @return the space group - */ - public String getSpaceGroup() { - return spaceGroup; - } - - /** - * Sets the space group. - * - * @param spaceGroup the new space group - */ - public void setSpaceGroup(String spaceGroup) { - this.spaceGroup = spaceGroup; - } - - /** - * Gets the unit cell. - * - * @return the unit cell - */ - public float[] getUnitCell() { - return unitCell; - } - - /** - * Sets the unit cell. - * - * @param unitCell the new unit cell - */ - public void setUnitCell(float[] inputUnitCell) { - this.unitCell = inputUnitCell; - } - - /** - * Gets the chains per model. - * - * @return the chains per model - */ - public int[] getChainsPerModel() { - return chainsPerModel; - } - - /** - * Sets the chains per model. - * - * @param chainsPerModel the new chains per model - */ - public void setChainsPerModel(int[] chainsPerModel) { - this.chainsPerModel = chainsPerModel; - } - - /** - * Gets the groups per chain. - * - * @return the groups per chain - */ - public int[] getGroupsPerChain() { - return groupsPerChain; - } - - /** - * Sets the groups per chain. - * - * @param groupsPerChain the new groups per chain - */ - public void setGroupsPerChain(int[] groupsPerChain) { - this.groupsPerChain = groupsPerChain; - } - - /** - * Gets the id code. - * - * @return the id code - */ - public String getIdCode() { - return idCode; - } - - /** - * Sets the id code. - * - * @param idCode the new id code - */ - public void setIdCode(String idCode) { - this.idCode = idCode; - } - - /** - * Gets the description. - * - * @return the description - */ - public String getDescription() { - return description; - } - - /** - * Sets the description. - * - * @param description the new description - */ - public void setDescription(String description) { - this.description = description; - } - - /** - * Gets the classification. - * - * @return the classification - */ - public String getClassification() { - return classification; - } - - /** - * Sets the classification. - * - * @param classification the new classification - */ - public void setClassification(String classification) { - this.classification = classification; - } - - /** - * Gets the dep date. - * - * @return the dep date - */ - public Date getDepDate() { - return depDate; - } - - /** - * Sets the dep date. - * - * @param depDate the new dep date - */ - public void setDepDate(Date depDate) { - this.depDate = depDate; - } - - /** - * Gets the mod date. - * - * @return the mod date - */ - public Date getModDate() { - return modDate; - } - - /** - * Sets the mod date. - * - * @param modDate the new mod date - */ - public void setModDate(Date modDate) { - this.modDate = modDate; - } - - /** - * Gets the resolution. - * - * @return the resolution - */ - public float getResolution() { - return resolution; - } - - /** - * Sets the resolution. - * - * @param resolution the new resolution - */ - public void setResolution(float resolution) { - this.resolution = resolution; - } - - /** - * Gets the r free. - * - * @return the r free - */ - public float getrFree() { - return rFree; - } - - /** - * Sets the r free. - * - * @param rFree the new r free - */ - public void setrFree(float rFree) { - this.rFree = rFree; - } - - /** - * Gets the doi. - * - * @return the doi - */ - public String getDoi() { - return doi; - } - - /** - * Sets the doi. - * - * @param doi the new doi - */ - public void setDoi(String doi) { - this.doi = doi; - } - - /** - * Gets the authors. - * - * @return the authors - */ - public String getAuthors() { - return authors; - } - - /** - * Sets the authors. - * - * @param authors the new authors - */ - public void setAuthors(String authors) { - this.authors = authors; - } - - /** - * Gets the bio assembly. - * - * @return the bio assembly - */ - public List getBioAssembly() { - return bioAssembly; - } - - /** - * Sets the bio assembly. - * - * @param bioAssembly the bio assembly - */ - public void setBioAssembly(List inputBioAssembly) { - this.bioAssembly = inputBioAssembly; - } - - /** - * Gets the chain list. - * - * @return the chain list - */ - public byte[] getChainList() { - return chainList; - } - - /** - * Sets the chain list. - * - * @param chainList the new chain list - */ - public void setChainList(byte[] chainList) { - this.chainList = chainList; - } - - /** - * Gets the asym chain list. - * - * @return the asym chain list - */ - public byte[] getAsymChainList() { - return asymChainList; - } - - /** - * Sets the asym chain list. - * - * @param asymChainList the new asym chain list - */ - public void setAsymChainList(byte[] asymChainList) { - this.asymChainList = asymChainList; - } - - /** - * Gets the title. - * - * @return the title - */ - public String getTitle() { - return title; - } - - /** - * Sets the title. - * - * @param title the new title - */ - public void setTitle(String title) { - this.title = title; - } - - /** - * Gets the sequence. - * - * @return the sequence - */ - public List getSequence() { - return sequence; - } - - /** - * Sets the sequence. - * - * @param sequence the new sequence - */ - public void setSequence(List sequence) { - this.sequence = sequence; - } - - /** - * Gets the number of bonds. - * - * @return the number of bonds - */ - public int getNumBonds() { - return numBonds; - } - - /** - * Sets the number of bonds. - * - * @param numBonds the new number of bonds - */ - public void setNumBonds(int numBonds) { - this.numBonds = numBonds; - } - - /** - * Gets the asym chains per model. - * - * @return the asym chains per model - */ - public int[] getAsymChainsPerModel() { - return asymChainsPerModel; - } - - /** - * Sets the asym chains per model. - * - * @param asymChainsPerModel the new asym chains per model - */ - public void setAsymChainsPerModel(int[] asymChainsPerModel) { - this.asymChainsPerModel = asymChainsPerModel; - } - - /** - * Gets the asym groups per chain. - * - * @return the asym groups per chain - */ - public int[] getAsymGroupsPerChain() { - return asymGroupsPerChain; - } - - /** - * Sets the asym groups per chain. - * - * @param asymGroupsPerChain the new asym groups per chain - */ - public void setAsymGroupsPerChain(int[] asymGroupsPerChain) { - this.asymGroupsPerChain = asymGroupsPerChain; - } - - /** - * @return the seqResGroupIds - */ - public List getSeqResGroupIds() { - return seqResGroupIds; - } - - /** - * @param seqResGroupIds the seqResGroupIds to set - */ - public void setSeqResGroupIds(List seqResGroupIds) { - this.seqResGroupIds = seqResGroupIds; - } - - /** - * @return the experimental methods - */ - public List getExperimentalMethods() { - return experimentalMethods; - } - - /** - * @param experimentalMethods the experimental methods to set - */ - public void setExperimentalMethods(List experimentalMethods) { - this.experimentalMethods = experimentalMethods; - } - - /** - * @return the rWork - */ - public float getrWork() { - return rWork; - } - - /** - * @param rWork the rWork to set - */ - public void setrWork(float rWork) { - this.rWork = rWork; - } - - /** - * @return the chainDescriptionList - */ - public String[] getChainDescriptionList() { - return chainDescriptionList; - } - - /** - * @param chainDescriptionList the chainDescriptionList to set - */ - public void setChainDescriptionList(String[] chainDescriptionList) { - this.chainDescriptionList = chainDescriptionList; - } - - /** - * @return the chainTypeList - */ - public String[] getChainTypeList() { - return chainTypeList; - } - - /** - * @param chainTypeList the chainTypeList to set - */ - public void setChainTypeList(String[] chainTypeList) { - this.chainTypeList = chainTypeList; - } - - public Entity[] getEntityList() { - return entityList; - } - - public void setEntityList(Entity[] entityList) { - this.entityList = entityList; - } -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/NoCoordDataStruct.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/NoCoordDataStruct.java deleted file mode 100644 index 001a573..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/NoCoordDataStruct.java +++ /dev/null @@ -1,408 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.rcsb.mmtf.dataholders.PDBGroup; - -/** - * A bean to store information about non coordinate information - fields are related strongly - * to the mmCif field they come from. - * - * @author anthony - */ -public class NoCoordDataStruct extends CoreDataStruct { - - /** The _atom_site_symbol. */ - // The symbol of thae atom - private List _atom_site_symbol = new ArrayList(); - // This data item is an author defined alternative to the value of - // _atom_site_label_asym_id_ This item holds the PDB chain - /** The _atom_site_asym_id. */ - // identifier_ - private List _atom_site_asym_id = new ArrayList(); - // This data item is an author defined alternative to the value of - // _atom_site_label_atom_id_ This item holds the PDB atom name_ -// private List _atom_site_auth_atom_id = new ArrayList(); - // This data item is an author defined alternative to the value of - // _atom_site_label_comp_id_ This item holds the PDB 3-letter-code - // residue names -// private List _atom_site_auth_comp_id = new ArrayList(); - // This data item is an author defined alternative to the value of - /** The _atom_site_auth_seq_id. */ - // _atom_site_label_seq_id_ This item holds the PDB residue number_ - private List _atom_site_auth_seq_id = new ArrayList(); - - /** The _atom_site_label_entity_poly_seq_num. */ - private List _atom_site_label_entity_poly_seq_num= new ArrayList(); - - /** The _atom_site_pdbx_ pd b_ins_code. */ - // This data item corresponds to the PDB insertion code_ - private List _atom_site_pdbx_PDB_ins_code = new ArrayList(); - // This data item identifies the model number in an ensemble of - // coordinate data_ -// private List _atom_site_pdbx_PDB_model_num = new ArrayList(); - // This data item is a place holder for the tags used by the PDB to - /** The _atom_site_group_ pdb. */ - // identify coordinate records (e_g_ ATOM or HETATM)_ - private List _atom_site_group_PDB = new ArrayList(); - // This item is a uniquely identifies for each alternative site for - /** The _atom_site_label_alt_id. */ - // this atom position_ - private List _atom_site_label_alt_id= new ArrayList(); - - /** The _atom_site_label_asym_id. */ - // This data item is reference to item _struct_asym_id defined in - private List _atom_site_label_asym_id= new ArrayList(); - // This data item is a reference to item _chem_comp_atom_atom_id - // defined in category CHEM_COMP_ATOM which is stored in the - // Chemical Component Dictionary_ This atom identifier uniquely - /** The _atom_site_label_atom_id. */ - // identifies each atom within each chemical component_ - private List _atom_site_label_atom_id= new ArrayList(); - // This data item is a reference to item _chem_comp_id defined in - // category CHEM_COMP_ This item is the primary identifier for - // chemical components which may either be mononers in a polymeric - /** The _atom_site_label_comp_id. */ - // entity or complete non-polymer entities_ - private List _atom_site_label_comp_id= new ArrayList(); - // This data item is a reference to _entity_id defined in the ENTITY - // category_ This item is used to identify chemically distinct - // portions of the molecular structure (e_g_ polymer chains, - // ligands, solvent)_ - // This data item is a reference to _entity_poly_seq_num defined in - // the ENTITY_POLY_SEQ category_ This item is used to maintain the - // correspondence between the chemical sequence of a polymeric - // entity and the sequence information in the coordinate list and in - // may other structural categories_ This identifier has no meaning - /** The _atom_site_label_entity_id. */ - // for non-polymer entities_ - private List _atom_site_label_entity_id= new ArrayList(); - - /** The inter group bond inds. */ - // The indices and orders of bonds between groups - private List interGroupBondInds = new ArrayList(); - - /** The inter group bond orders. */ - private List interGroupBondOrders = new ArrayList(); - - /** The sec struct. */ - private List secStruct = new ArrayList(); - - /** The res order. */ - private List resOrder = new ArrayList(); - - /** The group list. */ - private List> groupList = new ArrayList>(); - - /** - * Gets the _atom_site_symbol. - * - * @return the _atom_site_symbol - */ - public List get_atom_site_symbol() { - return _atom_site_symbol; - } - - /** - * Sets the _atom_site_symbol. - * - * @param _atom_site_symbol the new _atom_site_symbol - */ - public void set_atom_site_symbol(List _atom_site_symbol) { - this._atom_site_symbol = _atom_site_symbol; - } - - /** - * Gets the _atom_site_asym_id. - * - * @return the _atom_site_asym_id - */ - public List get_atom_site_asym_id() { - return _atom_site_asym_id; - } - - /** - * Sets the _atom_site_asym_id. - * - * @param _atom_site_asym_id the new _atom_site_asym_id - */ - public void set_atom_site_asym_id(List _atom_site_asym_id) { - this._atom_site_asym_id = _atom_site_asym_id; - } - - /** - * Gets the _atom_site_auth_seq_id. - * - * @return the _atom_site_auth_seq_id - */ - public List get_atom_site_auth_seq_id() { - return _atom_site_auth_seq_id; - } - - /** - * Sets the _atom_site_auth_seq_id. - * - * @param _atom_site_auth_seq_id the new _atom_site_auth_seq_id - */ - public void set_atom_site_auth_seq_id(List _atom_site_auth_seq_id) { - this._atom_site_auth_seq_id = _atom_site_auth_seq_id; - } - - /** - * Gets the _atom_site_pdbx_ pd b_ins_code. - * - * @return the _atom_site_pdbx_ pd b_ins_code - */ - public List get_atom_site_pdbx_PDB_ins_code() { - return _atom_site_pdbx_PDB_ins_code; - } - - /** - * Sets the _atom_site_pdbx_ pd b_ins_code. - * - * @param _atom_site_pdbx_PDB_ins_code the new _atom_site_pdbx_ pd b_ins_code - */ - public void set_atom_site_pdbx_PDB_ins_code(List _atom_site_pdbx_PDB_ins_code) { - this._atom_site_pdbx_PDB_ins_code = _atom_site_pdbx_PDB_ins_code; - } - - /** - * Gets the _atom_site_group_ pdb. - * - * @return the _atom_site_group_ pdb - */ - public List get_atom_site_group_PDB() { - return _atom_site_group_PDB; - } - - /** - * Sets the _atom_site_group_ pdb. - * - * @param _atom_site_group_PDB the new _atom_site_group_ pdb - */ - public void set_atom_site_group_PDB(List _atom_site_group_PDB) { - this._atom_site_group_PDB = _atom_site_group_PDB; - } - - /** - * Gets the _atom_site_label_alt_id. - * - * @return the _atom_site_label_alt_id - */ - public List get_atom_site_label_alt_id() { - return _atom_site_label_alt_id; - } - - /** - * Sets the _atom_site_label_alt_id. - * - * @param _atom_site_label_alt_id the new _atom_site_label_alt_id - */ - public void set_atom_site_label_alt_id(List _atom_site_label_alt_id) { - this._atom_site_label_alt_id = _atom_site_label_alt_id; - } - - /** - * Gets the _atom_site_label_asym_id. - * - * @return the _atom_site_label_asym_id - */ - public List get_atom_site_label_asym_id() { - return _atom_site_label_asym_id; - } - - /** - * Sets the _atom_site_label_asym_id. - * - * @param _atom_site_label_asym_id the new _atom_site_label_asym_id - */ - public void set_atom_site_label_asym_id(List _atom_site_label_asym_id) { - this._atom_site_label_asym_id = _atom_site_label_asym_id; - } - - /** - * Gets the _atom_site_label_atom_id. - * - * @return the _atom_site_label_atom_id - */ - public List get_atom_site_label_atom_id() { - return _atom_site_label_atom_id; - } - - /** - * Sets the _atom_site_label_atom_id. - * - * @param _atom_site_label_atom_id the new _atom_site_label_atom_id - */ - public void set_atom_site_label_atom_id(List _atom_site_label_atom_id) { - this._atom_site_label_atom_id = _atom_site_label_atom_id; - } - - /** - * Gets the _atom_site_label_comp_id. - * - * @return the _atom_site_label_comp_id - */ - public List get_atom_site_label_comp_id() { - return _atom_site_label_comp_id; - } - - /** - * Sets the _atom_site_label_comp_id. - * - * @param _atom_site_label_comp_id the new _atom_site_label_comp_id - */ - public void set_atom_site_label_comp_id(List _atom_site_label_comp_id) { - this._atom_site_label_comp_id = _atom_site_label_comp_id; - } - - /** - * Gets the _atom_site_label_entity_id. - * - * @return the _atom_site_label_entity_id - */ - public List get_atom_site_label_entity_id() { - return _atom_site_label_entity_id; - } - - /** - * Sets the _atom_site_label_entity_id. - * - * @param _atom_site_label_entity_id the new _atom_site_label_entity_id - */ - public void set_atom_site_label_entity_id(List _atom_site_label_entity_id) { - this._atom_site_label_entity_id = _atom_site_label_entity_id; - } - - /** - * Gets the _atom_site_label_entity_poly_seq_num. - * - * @return the _atom_site_label_entity_poly_seq_num - */ - public List get_atom_site_label_entity_poly_seq_num() { - return _atom_site_label_entity_poly_seq_num; - } - - /** - * Sets the _atom_site_label_entity_poly_seq_num. - * - * @param _atom_site_label_entity_poly_seq_num the new _atom_site_label_entity_poly_seq_num - */ - public void set_atom_site_label_entity_poly_seq_num(List _atom_site_label_entity_poly_seq_num) { - this._atom_site_label_entity_poly_seq_num = _atom_site_label_entity_poly_seq_num; - } - - /** - * Gets the group map. - * - * @return the group map - */ - public Map getGroupMap() { - return groupMap; - } - - /** - * Sets the group map. - * - * @param groupMap the group map - */ - public void setGroupMap(Map groupMap) { - this.groupMap = groupMap; - } - - /** - * Gets the group list. - * - * @return the group list - */ - public List> getGroupList() { - return groupList; - } - - /** - * Sets the group list. - * - * @param groupList the new group list - */ - public void setGroupList(List> groupList) { - this.groupList = groupList; - } - - /** - * Gets the res order. - * - * @return the res order - */ - public List getResOrder() { - return resOrder; - } - - /** - * Sets the res order. - * - * @param resOrder the new res order - */ - public void setResOrder(List resOrder) { - this.resOrder = resOrder; - } - - /** - * Gets the sec struct. - * - * @return the sec struct - */ - public List getSecStruct() { - return secStruct; - } - - /** - * Sets the sec struct. - * - * @param secStruct the new sec struct - */ - public void setSecStruct(List secStruct) { - this.secStruct = secStruct; - } - - /** - * Gets the inter group bond inds. - * - * @return the inter group bond inds - */ - public List getInterGroupBondInds() { - return interGroupBondInds; - } - - /** - * Sets the inter group bond inds. - * - * @param interGroupBondInds the new inter group bond inds - */ - public void setInterGroupBondInds(List interGroupBondInds) { - this.interGroupBondInds = interGroupBondInds; - } - - /** - * Gets the inter group bond orders. - * - * @return the inter group bond orders - */ - public List getInterGroupBondOrders() { - return interGroupBondOrders; - } - - /** - * Sets the inter group bond orders. - * - * @param interGroupBondOrders the new inter group bond orders - */ - public void setInterGroupBondOrders(List interGroupBondOrders) { - this.interGroupBondOrders = interGroupBondOrders; - } - - /** The group map. */ - protected Map groupMap = new HashMap(); -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/NoFloatDataStruct.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/NoFloatDataStruct.java deleted file mode 100644 index b897355..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/NoFloatDataStruct.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import java.lang.reflect.InvocationTargetException; -import org.apache.commons.beanutils.BeanUtils; - - -/** - * Class to extend the NoFloatDataStructBean with functions - * - * @author Anthony Bradley - */ -public class NoFloatDataStruct extends NoFloatDataStructBean implements CoreSingleStructure { - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.CoreSingleStructure#findDataAsBean() - */ - @SuppressWarnings("static-access") - public BioBean findDataAsBean() { - // Cast this to the pure data - NoFloatDataStructBean newData = new NoFloatDataStructBean(); - BeanUtils bu = new BeanUtils(); - try { - bu.copyProperties(newData, this); - } catch (IllegalAccessException e) { - System.err.println("Unknown bug - copying bean data. Report as bug."); - e.printStackTrace(); - throw new RuntimeException(e); - } catch (InvocationTargetException e) { - System.err.println("Unknown bug - copying bean data. Report as bug."); - e.printStackTrace(); - throw new RuntimeException(e); - } - return newData; - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.CoreSingleStructure#findStructureCode() - */ - @Override - public String findStructureCode() { - return this.findStructureCode(); - } - - /* (non-Javadoc) - * @see org.rcsb.mmtf.dataholders.CoreSingleStructure#findNumAtoms() - */ - @Override - public int findNumAtoms() { - return this.findNumAtoms(); - } - - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/NoFloatDataStructBean.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/NoFloatDataStructBean.java deleted file mode 100644 index f85c1f8..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/NoFloatDataStructBean.java +++ /dev/null @@ -1,143 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import java.util.ArrayList; -import java.util.List; - -/** - * Class to store the data after removal of floats. - * - * @author Anthony Bradley - */ -public class NoFloatDataStructBean extends NoCoordDataStruct implements BioBean { - - /** The _atom_site_cartn_x_int. */ - // All this information is stored as Integer arrays - private List _atom_site_Cartn_xInt = new ArrayList(); - - /** The _atom_site_cartn_y_int. */ - private List _atom_site_Cartn_yInt = new ArrayList(); - - /** The _atom_site_cartn_z_int. */ - private List _atom_site_Cartn_zInt = new ArrayList(); - - /** The _atom_site_b_iso_or_equiv_int. */ - // Isotropic atomic displacement parameter - private List _atom_site_B_iso_or_equivInt = new ArrayList(); - - /** The _atom_site_occupancy_int. */ - // The fraction of the atom present at this atom position_ - private List _atom_site_occupancyInt = new ArrayList(); - - /** - * Gets the atom serial ids. - * - * @return the atom serial ids - */ - public List get_atom_site_id() { - return _atom_site_id; - } - - /** - * Sets the atom serial ids. - * - * @param _atom_site_id the new atom serial ids. - */ - public void set_atom_site_id(List _atom_site_id) { - this._atom_site_id = _atom_site_id; - } - - /** The the atom serial ids. */ - private List _atom_site_id = new ArrayList(); - - /** - * Gets the _atom_site_cartn_x_int. - * - * @return the _atom_site_cartn_x_int - */ - public List get_atom_site_Cartn_xInt() { - return _atom_site_Cartn_xInt; - } - - /** - * Sets the X coordinates stored as integers. - * - * @param _atom_site_Cartn_xInt the new X coordinates stored as integers. - */ - public void set_atom_site_Cartn_xInt(List _atom_site_Cartn_xInt) { - this._atom_site_Cartn_xInt = _atom_site_Cartn_xInt; - } - - /** - * Gets the Y coordinates stored as integers. - * - * @return the Y coordinates stored as integers. - */ - public List get_atom_site_Cartn_yInt() { - return _atom_site_Cartn_yInt; - } - - /** - * Sets the Y coordinates stored as integers. - * - * @param _atom_site_Cartn_yInt the new Y coordinates stored as integers. - */ - public void set_atom_site_Cartn_yInt(List _atom_site_Cartn_yInt) { - this._atom_site_Cartn_yInt = _atom_site_Cartn_yInt; - } - - /** - * Gets the Z coordinates stored as integers. - * - * @return the Z coordinates stored as integers. - */ - public List get_atom_site_Cartn_zInt() { - return _atom_site_Cartn_zInt; - } - - /** - * Sets the Z coordinates stored as integers. - * - * @param _atom_site_Cartn_zInt the new Z coordinates stored as integers. - */ - public void set_atom_site_Cartn_zInt(List _atom_site_Cartn_zInt) { - this._atom_site_Cartn_zInt = _atom_site_Cartn_zInt; - } - - /** - * Gets the B factor stored as integers. - * - * @return the B factor stored as integers. - */ - public List get_atom_site_B_iso_or_equivInt() { - return _atom_site_B_iso_or_equivInt; - } - - /** - * Sets the B factor data stored as integers. - * - * @param _atom_site_B_iso_or_equivInt the new B factor data stored as integers. - */ - public void set_atom_site_B_iso_or_equivInt(List _atom_site_B_iso_or_equivInt) { - this._atom_site_B_iso_or_equivInt = _atom_site_B_iso_or_equivInt; - } - - /** - * Gets the occupancy data stored as integers. - * - * @return the occupancy data stored as integers - */ - public List get_atom_site_occupancyInt() { - return _atom_site_occupancyInt; - } - - /** - * Sets the occupancy data stored as integers. - * - * @param _atom_site_occupancyInt the new the occupancy data stored as integers. - */ - public void set_atom_site_occupancyInt(List _atom_site_occupancyInt) { - this._atom_site_occupancyInt = _atom_site_occupancyInt; - } - - -} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/package-info.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/package-info.java deleted file mode 100644 index e7333a5..0000000 --- a/mmtf-encoder/src/main/java/org/rcsb/mmtf/dataholders/package-info.java +++ /dev/null @@ -1,8 +0,0 @@ -/** - * - */ -/** - * @author Anthony Bradley - * - */ -package org.rcsb.mmtf.dataholders; \ No newline at end of file diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/ArrayConverters.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/ArrayConverters.java new file mode 100644 index 0000000..95dc22f --- /dev/null +++ b/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/ArrayConverters.java @@ -0,0 +1,187 @@ +package org.rcsb.mmtf.encoder; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.rcsb.mmtf.utils.CodecUtils; + +/** + * A class of array converters. + * @author Anthony Bradley + * + */ +public class ArrayConverters { + + + /** + * Convert an integer array to byte array, where each integer is encoded by a + * single byte. + * @param intArray the input array of integers + * @return the byte array of the integers + * @throws IOException the byte array cannot be read + */ + public static byte[] convertIntegersToBytes(int[] intArray) throws IOException{ + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(baos); + for(int i=0; i < intArray.length; ++i) + { + dos.writeByte(intArray[i]); + } + + return baos.toByteArray(); + } + + /** + * Convert an integer array to byte array, where each integer is encoded by a + * two bytes. + * @param intArray the input array of integers + * @return the byte array of the integers + * @throws IOException the byte array cannot be read + */ + public static byte[] convertIntegersToTwoBytes(int[] intArray) throws IOException{ + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(baos); + for(int i=0; i < intArray.length; ++i) + { + dos.writeShort(intArray[i]); + } + + return baos.toByteArray(); + } + + /** + * Convert an integer array to byte array, where each integer is encoded by a + * four bytes. + * @param intArray the input array of integers + * @return the byte array of the integers + * @throws IOException the byte array cannot be read + */ + public static byte[] convertIntegersToFourByte(int[] intArray) throws IOException{ + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(baos); + for(int i=0; i < intArray.length; ++i) + { + dos.writeInt(intArray[i]); + } + + return baos.toByteArray(); + } + + /** + * Convert a float array to an integer array by multiplying by a float. + * @param intArray the input integer array to be divided + * @param floatDivider the float divider to divide the integers by. + * @return a float array converted from the input. + */ + public static int[] convertFloatsToInts(float[] floatArray, float floatMultiplier) { + // Assign the output array to write + int[] outArray = new int[floatArray.length]; + for (int i=0; i splitIntegers(int[] inputArray) { + // set the two output arrays + List fourByteInts = new ArrayList<>(); + List twoByteInts = new ArrayList<>(); + // First element goes in the four byte integer array. + fourByteInts.add(inputArray[0]); + // Set the counter + int counter =0; + for(int i=1;iShort.MAX_VALUE || inputArray[i] < Short.MIN_VALUE){ + // Add the counter + fourByteInts.add(counter); + // Add the new four byte integer + fourByteInts.add(inputArray[i]); + // Counter set to zero + counter = 0; + } + else{ + // Little number added to little list + twoByteInts.add(inputArray[i]); + // Add to the counter + counter++; + } + } + // Finally add the counter to the big list + fourByteInts.add(counter); + // Now add these to a list - big first + List outputList = new ArrayList<>(); + outputList.add(CodecUtils.convertToIntArray(fourByteInts)); + outputList.add(CodecUtils.convertToIntArray(twoByteInts)); + return outputList; + } + + /** + * Convert a char array to an integer array using the ASCII code for characters + * @param charArray the input character array + * @return an integer array of ASCII decoded chars + */ + public static int[] convertCharToIntegers(char[] charArray) { + int[] outArray = new int[charArray.length]; + for (int i=0; i1){ + byteArr[chainIndex*4+1] = (byte) outChar[1]; + } + else{ + byteArr[chainIndex*4+1] = (byte) 0; + } + if(chainIdLen>2){ + byteArr[chainIndex*4+2] = (byte) outChar[2]; + } + else{ + byteArr[chainIndex*4+2] = (byte) 0; + } + if(chainIdLen>3){ + byteArr[chainIndex*4+3] = (byte) outChar[3]; + } + else{ + byteArr[chainIndex*4+3] = (byte) 0; + } + } +} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/ArrayEncoders.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/ArrayEncoders.java new file mode 100644 index 0000000..fbfccbb --- /dev/null +++ b/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/ArrayEncoders.java @@ -0,0 +1,63 @@ +package org.rcsb.mmtf.encoder; + +import java.util.ArrayList; +import java.util.List; + +import org.rcsb.mmtf.utils.CodecUtils; + +/** + * A class of methods that can be used to encode arrays. + * @author Anthony Bradley + * + */ +public class ArrayEncoders { + + /** + * Delta encode an array of integers. + * @param intArray the input array + * @return the encoded array + */ + public static int[] deltaEncode(int[] intArray) { + int[] out = new int[intArray.length]; + System.arraycopy(intArray, 0, out, 0, intArray.length); + for (int i = out.length-1; i > 0; i--) { + out[i] = out[i] - out[i-1]; + } + return out; + } + + /** + * Run length encode an array of integers. + * @param intArray the input array + * @return the encoded integer array + */ + public static int[] runlengthEncode(int[] intArray) { + // If it's length zero + if (intArray.length==0){ + return new int[0]; + } + // We don't know the length so use + List outList = new ArrayList<>(); + int lastInt = intArray[0]; + int counter = 1; + for (int i=1; i xCoords = ArrayConverters.splitIntegers( + ArrayEncoders.deltaEncode( + ArrayConverters.convertFloatsToInts( + decodedDataInterface.getxCoords(), + MmtfBean.COORD_DIVIDER))); + mmtfBean.setxCoordBig(ArrayConverters.convertIntegersToFourByte(xCoords.get(0))); + mmtfBean.setxCoordSmall(ArrayConverters.convertIntegersToTwoBytes(xCoords.get(1))); + + List yCoords = ArrayConverters.splitIntegers( + ArrayEncoders.deltaEncode( + ArrayConverters.convertFloatsToInts( + decodedDataInterface.getyCoords(), + MmtfBean.COORD_DIVIDER))); + mmtfBean.setyCoordBig(ArrayConverters.convertIntegersToFourByte(yCoords.get(0))); + mmtfBean.setyCoordSmall(ArrayConverters.convertIntegersToTwoBytes(yCoords.get(1))); + + List zCoords = ArrayConverters.splitIntegers( + ArrayEncoders.deltaEncode( + ArrayConverters.convertFloatsToInts( + decodedDataInterface.getzCoords(), + MmtfBean.COORD_DIVIDER))); + mmtfBean.setzCoordBig(ArrayConverters.convertIntegersToFourByte(zCoords.get(0))); + mmtfBean.setzCoordSmall(ArrayConverters.convertIntegersToTwoBytes(zCoords.get(1))); + + + List bFactor = ArrayConverters.splitIntegers( + ArrayEncoders.deltaEncode( + ArrayConverters.convertFloatsToInts( + decodedDataInterface.getbFactors(), + MmtfBean.OCCUPANCY_BFACTOR_DIVIDER))); + mmtfBean.setbFactorBig(ArrayConverters.convertIntegersToFourByte(bFactor.get(0))); + mmtfBean.setbFactorSmall(ArrayConverters.convertIntegersToTwoBytes(bFactor.get(1))); + + + // Run length encode the occupancy array + mmtfBean.setOccupancyList(ArrayConverters.convertIntegersToFourByte( + ArrayEncoders.runlengthEncode( + ArrayConverters.convertFloatsToInts( + decodedDataInterface.getOccupancies(), + MmtfBean.OCCUPANCY_BFACTOR_DIVIDER)))); + + // Run length and delta + mmtfBean.setAtomIdList(ArrayConverters.convertIntegersToFourByte( + ArrayEncoders.runlengthEncode( + ArrayEncoders.deltaEncode(decodedDataInterface.getAtomIds())))); + // Run length encoded + mmtfBean.setAltLocList(ArrayConverters.convertIntegersToFourByte( + ArrayEncoders.runlengthEncode( + ArrayConverters.convertCharToIntegers( + decodedDataInterface.getAltLocIds())))); + mmtfBean.setInsCodeList(ArrayConverters.convertIntegersToFourByte( + ArrayEncoders.runlengthEncode( + ArrayConverters.convertCharToIntegers( + decodedDataInterface.getInsCodes())))); + + // Set the groupNumber + mmtfBean.setGroupIdList(ArrayConverters.convertIntegersToFourByte( + ArrayEncoders.runlengthEncode( + ArrayEncoders.deltaEncode( + decodedDataInterface.getGroupIds())))); + + // Set the group map (all the unique groups in the structure). + mmtfBean.setGroupList(EncoderUtils.generateGroupMap(decodedDataInterface)); + // Set the indices for the groups mapping to the sequence + mmtfBean.setSequenceIndexList(ArrayConverters.convertIntegersToFourByte( + ArrayEncoders.runlengthEncode( + ArrayEncoders.deltaEncode( + decodedDataInterface.getGroupSequenceIndices())))); + // Set the number of chains per model + mmtfBean.setChainsPerModel(decodedDataInterface.getChainsPerModel()); + mmtfBean.setGroupsPerChain(decodedDataInterface.getGroupsPerChain()); + // Set the internal and public facing chain ids + mmtfBean.setChainNameList(ArrayConverters.encodeChainList(decodedDataInterface.getChainNames())); + mmtfBean.setChainIdList(ArrayConverters.encodeChainList(decodedDataInterface.getChainIds())); + // Set the space group information + mmtfBean.setSpaceGroup(decodedDataInterface.getSpaceGroup()); + mmtfBean.setUnitCell(decodedDataInterface.getUnitCell()); + // Set the bioassembly and entity information + mmtfBean.setBioAssemblyList( + EncoderUtils.generateBioassemblies(decodedDataInterface)); + mmtfBean.setEntityList( + EncoderUtils.generateEntityList(decodedDataInterface) + ); + // Set the bond orders and indcices + mmtfBean.setBondOrderList(ArrayConverters.convertIntegersToBytes( + decodedDataInterface.getInterGroupBondOrders())); + mmtfBean.setBondAtomList(ArrayConverters.convertIntegersToFourByte( + decodedDataInterface.getInterGroupBondIndices())); + // Set the version and producer information + mmtfBean.setMmtfProducer("RCSB-PDB Generator---version: "+GetRepoState.getCurrentVersion()); + mmtfBean.setStructureId(decodedDataInterface.getStructureId()); + // Set some header data + mmtfBean.setNumAtoms(decodedDataInterface.getNumAtoms()); + mmtfBean.setNumBonds(decodedDataInterface.getNumBonds()); + mmtfBean.setrFree(decodedDataInterface.getRfree()); + mmtfBean.setrWork(decodedDataInterface.getRwork()); + mmtfBean.setResolution(decodedDataInterface.getResolution()); + mmtfBean.setTitle(decodedDataInterface.getTitle()); + mmtfBean.setExperimentalMethods(decodedDataInterface.getExperimentalMethods()); + mmtfBean.setDepositionDate(decodedDataInterface.getDepositionDate()); + mmtfBean.setReleaseDate(decodedDataInterface.getReleaseDate()); + mmtfBean.setSecStructList(ArrayConverters.convertIntegersToBytes(decodedDataInterface.getSecStructList())); + } + + public MmtfBean getMmtfBean() { + return mmtfBean; + } + +} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/EncoderUtils.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/EncoderUtils.java new file mode 100644 index 0000000..10c8943 --- /dev/null +++ b/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/EncoderUtils.java @@ -0,0 +1,85 @@ +package org.rcsb.mmtf.encoder; + +import java.util.ArrayList; +import java.util.List; + +import org.rcsb.mmtf.api.DecodedDataInterface; +import org.rcsb.mmtf.dataholders.BioAssemblyData; +import org.rcsb.mmtf.dataholders.BioAssemblyTrans; +import org.rcsb.mmtf.dataholders.Entity; +import org.rcsb.mmtf.dataholders.PDBGroup; +import org.rcsb.mmtf.utils.CodecUtils; + +public class EncoderUtils { + + /** + * Regenerate a group map from the decoded data interface. + * @param mmtfDecodedDataInterface the input interface + * @return a list of all the groups in the molecule + */ + public static PDBGroup[] generateGroupMap(DecodedDataInterface mmtfDecodedDataInterface) { + int[] groupTypeIndices = mmtfDecodedDataInterface.getGroupTypeIndices(); + int maxIndex = CodecUtils.findMaxInIntArray(groupTypeIndices); + PDBGroup[] outGroupList = new PDBGroup[maxIndex+1]; + for (int i=0; i generateBioassemblies(DecodedDataInterface mmtfDecodedDataInterface) { + int numBioassemblies = mmtfDecodedDataInterface.getNumBioassemblies(); + List outList = new ArrayList<>(); + for (int i=0; i transformList = new ArrayList<>(); + bioAssemblyData.setTransformList(transformList); + int numTrans = mmtfDecodedDataInterface.getNumTransInBioassembly(i); + for (int j=0; j groupMap; + + /** The group list. */ + private int[] groupList; + + /** The sequence ids of the groups */ + private int[] seqResGroupList; + + /** The public facing chain ids*/ + private String[] publicChainIds; + + /** The number of chains per model*/ + private int[] chainsPerModel; + + /** The number of groups per (internal) chain*/ + private int[] groupsPerChain; + + /** The space group of the structure*/ + private String spaceGroup; + + /** The unit cell of the structure*/ + private float[] unitCell; + + /** The bioassembly information for the structure*/ + private List bioAssembly; + + /** The bond indices for bonds between groups*/ + private List interGroupBondIndices; + + /** The bond orders for bonds between groups*/ + private List interGroupBondOrders; + + /** The chosen list of chain ids */ + private String[] chainList; + + /** The mmtf version */ + private String mmtfVersion = "UNKNOWN"; + + /** The mmtf prodcuer */ + private String mmtfProducer = "UNKNOWN"; + + /** The list of entities in this structure. */ + private Entity[] entityList; + + /** The PDB id */ + private String pdbId; + + /** The reported resolution of the dataset. */ + private Float resolution; + + /** The reported R Free of the model. */ + private Float rFree; + + /** The reported R Work of the model. */ + private Float rWork; + + /** The title of the model. */ + private String title; + + /** The list of experimental methods. */ + private String[] experimentalMethods; + + /** The deposition date of hte structure */ + private String depositionDate; + + /** The release date of the structure */ + private String releaseDate; + + /** The total number of models */ + private int numModels; + + /** The secondary structure information */ + private int[] secStructInfo; + + /** The atom counter */ + int atomIndex = 0; + /** The atom counter within a group*/ + int groupAtomIndex = 0; + /** The current group bond */ + int groupBondIndex = 0; + /** The group counter */ + int groupIndex = 0; + /** The chain counter */ + int chainIndex = 0; + /** The model counter */ + int modelIndex = 0; + /** The entity counter */ + int entityIndex = 0; + /** Add the atom information for the current group */ + PDBGroup pdbGroup; + /** A List for Entities as the number of entities is not defined*/ + List entities; + int totalNumBonds; + List pdbGroupList; + + + @Override + public float[] getxCoords() { + return cartnX; + } + + @Override + public float[] getyCoords() { + return cartnY; + + } + + @Override + public float[] getzCoords() { + return cartnZ; + } + + @Override + public float[] getbFactors() { + return bFactor; + } + + @Override + public float[] getOccupancies() { + return occupancy; + } + + @Override + public int[] getAtomIds() { + return atomId; + } + + @Override + public char[] getAltLocIds() { + return altId; + } + + @Override + public char[] getInsCodes() { + return insertionCodeList; + } + + @Override + public int[] getGroupIds() { + return groupNum; + } + + @Override + public String getGroupName(int groupInd) { + return getGroup(groupInd).getGroupName(); + } + + @Override + public int getNumAtomsInGroup(int groupInd) { + return getGroup(groupInd).getAtomChargeList().length; + } + + @Override + public String[] getGroupAtomNames(int groupInd) { + return getGroup(groupInd).getAtomNameList(); + } + + @Override + public String[] getGroupElementNames(int groupInd) { + return getGroup(groupInd).getElementList(); + + } + + @Override + public int[] getGroupBondOrders(int groupInd) { + return getGroup(groupInd).getBondOrderList(); + + } + + @Override + public int[] getGroupBondIndices(int groupInd) { + return getGroup(groupInd).getBondAtomList(); + } + + @Override + public int[] getGroupAtomCharges(int groupInd) { + return getGroup(groupInd).getAtomChargeList(); + } + + @Override + public char getGroupSingleLetterCode(int groupInd) { + return getGroup(groupInd).getSingleLetterCode(); + } + + @Override + public String getGroupChemCompType(int groupInd) { + return getGroup(groupInd).getChemCompType(); + } + + + @Override + public int[] getGroupTypeIndices() { + return groupList; + } + + @Override + public int[] getGroupSequenceIndices() { + return seqResGroupList; + } + + @Override + public String[] getChainIds() { + return chainList; + } + + @Override + public String[] getChainNames() { + return publicChainIds; + } + + @Override + public int[] getChainsPerModel() { + return chainsPerModel; + } + + @Override + public int[] getGroupsPerChain() { + return groupsPerChain; + } + + @Override + public String getSpaceGroup() { + return spaceGroup; + } + + @Override + public float[] getUnitCell() { + return unitCell; + } + + @Override + public int getNumBioassemblies() { + return bioAssembly.size(); + } + + @Override + public int getNumTransInBioassembly(int bioassemblyIndex) { + return bioAssembly.get(bioassemblyIndex).getTransformList().size(); + } + + @Override + public int[] getChainIndexListForTransform(int bioassemblyIndex, int transformationIndex) { + return bioAssembly.get(bioassemblyIndex).getTransformList().get(transformationIndex).getChainIndexList(); + } + + @Override + public double[] getMatrixForTransform(int bioassemblyIndex, int transformationIndex) { + return bioAssembly.get(bioassemblyIndex).getTransformList().get(transformationIndex).getMatrix(); + } + + @Override + public int[] getInterGroupBondIndices() { + return CodecUtils.convertToIntArray(interGroupBondIndices); + } + + @Override + public int[] getInterGroupBondOrders() { + return CodecUtils.convertToIntArray(interGroupBondOrders); + } + + @Override + public String getMmtfVersion() { + return mmtfVersion; + } + + @Override + public String getMmtfProducer() { + return mmtfProducer; + } + + @Override + public int getNumEntities() { + return entityList.length; + } + + @Override + public String getEntityDescription(int entityInd) { + return entityList[entityInd].getDescription(); + } + + @Override + public String getEntityType(int entityInd) { + return entityList[entityInd].getType(); + + } + + @Override + public int[] getEntityChainIndexList(int entityInd) { + return entityList[entityInd].getChainIndexList(); + } + + @Override + public String getEntitySequence(int entityInd) { + return entityList[entityInd].getSequence(); + } + + @Override + public String getStructureId() { + return pdbId; + } + + @Override + public int getNumModels() { + return numModels; + } + + @Override + public int getNumChains() { + int sum = 0; + for (int numChainsInModel : chainsPerModel) { + sum+=numChainsInModel; + } + return sum; + } + + @Override + public int getNumGroups() { + return insertionCodeList.length; + } + + @Override + public int getNumAtoms() { + return cartnX.length; + } + + @Override + public float getRfree() { + return rFree; + } + + @Override + public float getRwork() { + return rWork; + } + + @Override + public float getResolution() { + return resolution; + } + + @Override + public String getTitle() { + return title; + } + + @Override + public String[] getExperimentalMethods() { + return experimentalMethods; + } + + @Override + public String getDepositionDate() { + return depositionDate; + } + + + @Override + public void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups, + int totalNumChains, int totalNumModels, String structureId) { + this.totalNumBonds = totalNumBonds; + // Intitialise the bond level info + interGroupBondIndices = new ArrayList<>(); + interGroupBondOrders = new ArrayList<>(); + // Intitialise the atom level arrays + cartnX = new float[totalNumAtoms]; + cartnY= new float[totalNumAtoms]; + cartnZ = new float[totalNumAtoms]; + occupancy = new float[totalNumAtoms]; + bFactor = new float[totalNumAtoms]; + atomId = new int[totalNumAtoms]; + altId = new char[totalNumAtoms]; + // Initialise the group level data + groupNum = new int[totalNumGroups]; + // List for storing the group level information + pdbGroupList = new ArrayList<>(); + insertionCodeList = new char[totalNumGroups]; + seqResGroupList = new int[totalNumGroups]; + secStructInfo = new int[totalNumGroups]; + groupList = new int[totalNumGroups]; + // Intialise the chain level data + chainList = new String[totalNumChains]; + publicChainIds = new String[totalNumChains]; + groupsPerChain = new int[totalNumChains]; + // Initialise the model level information + numModels = totalNumModels; + // Set the name + pdbId = structureId; + bioAssembly = new ArrayList<>(); + entities = new ArrayList<>(); + chainsPerModel = new int[totalNumModels]; + } + + @Override + public void finalizeStructure() { + // Convert the entities array to a list + entityList = entities.toArray(new Entity[0]); + // Cleanup the group list + groupMap = new ArrayList<>(new HashSet<>(pdbGroupList)); + for(int i=0; i bioAssemblyTranList; + if (bioAssembly.size()>bioAssemblyIndex) { + bioAssemblyTranList = bioAssembly.get(bioAssemblyIndex).getTransformList(); + } + else{ + bioAssemblyData = new BioAssemblyData(); + bioAssemblyTranList = new ArrayList<>(); + bioAssemblyData.setTransformList(bioAssemblyTranList); + bioAssembly.add(bioAssemblyData); + } + BioAssemblyTrans bioAssemblyTrans = new BioAssemblyTrans(); + bioAssemblyTrans.setChainIndexList(chainIndices); + bioAssemblyTrans.setMatrix(transform); + bioAssemblyTranList.add(bioAssemblyTrans); + } + + @Override + public void setXtalInfo(String spaceGroup, float[] unitCell) { + this.spaceGroup = spaceGroup; + this.unitCell = unitCell; + } + + @Override + public void setGroupBond(int firstAtomIndex, int secondAtomIndex, int bondOrder) { + // Set the bond indices + pdbGroup.getBondAtomList()[groupBondIndex*2] = firstAtomIndex; + pdbGroup.getBondAtomList()[groupBondIndex*2+1] = secondAtomIndex; + // Set the bond order + pdbGroup.getBondOrderList()[groupBondIndex] = bondOrder; + groupBondIndex++; + } + + @Override + public void setInterGroupBond(int firstAtomIndex, int secondAtomIndex, int bondOrder) { + // Set the bond indices + interGroupBondIndices.add(firstAtomIndex); + interGroupBondIndices.add(secondAtomIndex); + // Set the bond order + interGroupBondOrders.add(bondOrder); + } + + @Override + public void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate, String releaseDate, + String[] experimnetalMethods) { + this.rFree = rFree; + this.rWork = rWork; + this.resolution = resolution; + this.title = title; + this.depositionDate = depositionDate; + this.releaseDate = releaseDate; + this.experimentalMethods = experimnetalMethods; + + } + + private PDBGroup getGroup(int groupInd) { + return groupMap.get(groupInd); + } + + + @Override + public int getNumBonds() { + return totalNumBonds; + } + + @Override + public int[] getSecStructList() { + return secStructInfo; + } + + + @Override + public String getReleaseDate() { + return releaseDate; + } + +} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/WriterUtils.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/WriterUtils.java new file mode 100644 index 0000000..c3f30f4 --- /dev/null +++ b/mmtf-encoder/src/main/java/org/rcsb/mmtf/encoder/WriterUtils.java @@ -0,0 +1,37 @@ +package org.rcsb.mmtf.encoder; + +import java.io.FileOutputStream; +import java.io.IOException; + +import org.rcsb.mmtf.serializers.MessagePackSerializer; + +public class WriterUtils { + + /** + * Function to write data to a file. + * @param writerToEncoder the writer to encoder instance + * @param path the full path to write to + * @throws IOException + */ + public static void writeDataToFile(WriterToEncoder writerToEncoder, String path) throws IOException { + byte[] byteArray = getDataAsByteArr(writerToEncoder); + FileOutputStream fos = new FileOutputStream(path); + fos.write(byteArray); + fos.close(); + } + + + /** + * Function to take data from a writer and return as a byte array (MessagePacked serialized). + * @param writerToEncoder the writer to encoder instance + * @return a byte array of the data + * @throws IOException + */ + public static byte[] getDataAsByteArr(WriterToEncoder writerToEncoder) throws IOException { + MessagePackSerializer messagePackSerializer = new MessagePackSerializer(); + // Get to bean + DefaultEncoder getToBean = new DefaultEncoder(writerToEncoder); + return messagePackSerializer.serialize(getToBean.getMmtfBean()); + } + +} diff --git a/mmtf-encoder/src/main/java/org/rcsb/mmtf/serializers/MessagePackSerializer.java b/mmtf-encoder/src/main/java/org/rcsb/mmtf/serializers/MessagePackSerializer.java new file mode 100644 index 0000000..249c06b --- /dev/null +++ b/mmtf-encoder/src/main/java/org/rcsb/mmtf/serializers/MessagePackSerializer.java @@ -0,0 +1,25 @@ +package org.rcsb.mmtf.serializers; + +import org.msgpack.jackson.dataformat.MessagePackFactory; +import org.rcsb.mmtf.dataholders.MmtfBean; + +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * A class to serialize an object to a byte array. + * The byte array accords to message pack. + * @author Anthony Bradley + * + */ +public class MessagePackSerializer { + + public byte[] serialize(MmtfBean object) throws JsonProcessingException { + + ObjectMapper objectMapper = new ObjectMapper(new MessagePackFactory()); + objectMapper.setSerializationInclusion(Include.NON_NULL); + byte[] byteArray = objectMapper.writeValueAsBytes(object); + return byteArray; + } +} diff --git a/mmtf-encoder/src/test/java/org/rcsb/mmtf/arraycompressors/TestArrayCompressors.java b/mmtf-encoder/src/test/java/org/rcsb/mmtf/arraycompressors/TestArrayCompressors.java deleted file mode 100644 index c6ff8e6..0000000 --- a/mmtf-encoder/src/test/java/org/rcsb/mmtf/arraycompressors/TestArrayCompressors.java +++ /dev/null @@ -1,80 +0,0 @@ -package org.rcsb.mmtf.arraycompressors; - -import java.util.ArrayList; - -import org.junit.Test; -import org.rcsb.mmtf.arraycompressors.FindDeltas; -import org.rcsb.mmtf.arraycompressors.RunLengthEncode; -import org.rcsb.mmtf.arraycompressors.RunLengthEncodeString; - -import static org.junit.Assert.*; - -public class TestArrayCompressors { - - @Test - public void deltaTest() { - // Generate the array - FindDeltas fd = new FindDeltas(); - // Create the two arrays - ArrayList inArray = new ArrayList(); - ArrayList testArray = new ArrayList(); - for(int i=0; i<1000;i++){ - inArray.add(i); - if(i==0){ - testArray.add(i); - } - else{ - testArray.add(1); - } - } - // Compress it - ArrayList deltadArray = fd.compressIntArray(inArray); - // Now check - assertEquals(deltadArray, testArray); - - } - - @Test - public void runLengthTest() { - // Generate the array - RunLengthEncode rle = new RunLengthEncode(); - // Create the two arrays - ArrayList inArray = new ArrayList(); - ArrayList testArray = new ArrayList(); - int length = 1000; - for(int i=0; i runlenghtdArray = rle.compressIntArray(inArray); - // Now check - assertEquals(runlenghtdArray, testArray); - } - - - - @Test - public void runLengthStringTest() { - // Generate the array - RunLengthEncodeString rle = new RunLengthEncodeString(); - // Create the two arrays - ArrayList inArray = new ArrayList(); - ArrayList testArray = new ArrayList(); - int length = 1000; - for(int i=0; i runlenghtdArray = rle.compressStringArray(inArray); - // Now check - assertEquals(runlenghtdArray, testArray); - } -} diff --git a/mmtf-encoder/src/test/java/org/rcsb/mmtf/biocompressors/TestBioCompressor.java b/mmtf-encoder/src/test/java/org/rcsb/mmtf/biocompressors/TestBioCompressor.java deleted file mode 100644 index 36ce7dd..0000000 --- a/mmtf-encoder/src/test/java/org/rcsb/mmtf/biocompressors/TestBioCompressor.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.rcsb.mmtf.biocompressors; - -import java.util.ArrayList; - -import org.junit.Test; -import org.rcsb.mmtf.biocompressors.CompressDoubles; - -import static org.junit.Assert.*; - - -public class TestBioCompressor { - - @Test - public void floatToInt() { - // - CompressDoubles cd = new CompressDoubles(); - ArrayList inArray = new ArrayList(); - ArrayList testArray = new ArrayList(); - for(int i=0; i<1000;i++){ - inArray.add((float) 100.12); - testArray.add(10012); - - } - assertEquals(cd.getIntArrayFromFloat(inArray, (float) 100.0),testArray); - - } - - - @Test - public void doubleToInt() { - - // - CompressDoubles cd = new CompressDoubles(); - ArrayList inArray = new ArrayList(); - ArrayList testArray = new ArrayList(); - for(int i=0; i<1000;i++){ - inArray.add(100.12); - testArray.add(10012); - - } - assertEquals(cd.getIntArrayFromDouble(inArray, 100.0),testArray); - - } - -} diff --git a/mmtf-encoder/src/test/java/org/rcsb/mmtf/biocompressors/TestBondFinding.java b/mmtf-encoder/src/test/java/org/rcsb/mmtf/biocompressors/TestBondFinding.java deleted file mode 100644 index 3d7802b..0000000 --- a/mmtf-encoder/src/test/java/org/rcsb/mmtf/biocompressors/TestBondFinding.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.rcsb.mmtf.biocompressors; - -import org.junit.Test; -import static org.junit.Assert.*; - -import java.io.IOException; -import java.util.List; - -import org.biojava.nbio.structure.Atom; -import org.biojava.nbio.structure.Bond; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; -import org.biojava.nbio.structure.align.util.AtomCache; -import org.biojava.nbio.structure.io.FileParsingParameters; -import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; -import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; -import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; - -public class TestBondFinding { - - - @Test - public void testInterGroupBonds() throws IOException, StructureException { - // Normal - assertEquals(getInterBonds("1QMZ"), 2236); - // Disulphide - assertEquals(getInterBonds("2QWO"), 956); - // Covalent ligand - assertEquals(getInterBonds("4QDV"), 2294); - // DNA - assertEquals(getInterBonds("4XSN"), 22); - - } - - public int getInterBonds(String pdbId) throws IOException, StructureException{ - AtomCache cache = new AtomCache(); - cache.setUseMmCif(true); - cache.setFetchBehavior(FetchBehavior.FETCH_FILES); - FileParsingParameters params = cache.getFileParsingParams(); - params.setCreateAtomBonds(true); - params.setAlignSeqRes(true); - params.setParseBioAssembly(true); - DownloadChemCompProvider dcc = new DownloadChemCompProvider(); - ChemCompGroupFactory.setChemCompProvider(dcc); - dcc.checkDoFirstInstall(); - cache.setFileParsingParams(params); - StructureIO.setAtomCache(cache); - int counter =0; - // Now get the structure - Structure newStruc = StructureIO.getStructure(pdbId); - // Now loop through the atoms - for(Chain c: newStruc.getChains()){ - for(Group g: c.getAtomGroups()){ - List theseAtoms = g.getAtoms(); - for(Atom a: theseAtoms){ - List theseBonds = a.getBonds(); - if(theseBonds != null){ - for(Bond b: a.getBonds()){ - Atom other = b.getOther(a); - int indexOther = theseAtoms.indexOf(other); - // Check if the index is within the group - if(indexOther<0 || indexOther >= theseAtoms.size()){ - counter++; - } - } - } - } - } - } - return counter; - } -} diff --git a/mmtf-encoder/src/test/java/org/rcsb/mmtf/biojavaencoder/TestEncoderUtils.java b/mmtf-encoder/src/test/java/org/rcsb/mmtf/biojavaencoder/TestEncoderUtils.java deleted file mode 100644 index dd45d3d..0000000 --- a/mmtf-encoder/src/test/java/org/rcsb/mmtf/biojavaencoder/TestEncoderUtils.java +++ /dev/null @@ -1,106 +0,0 @@ -package org.rcsb.mmtf.biojavaencoder; - - -import org.junit.Test; - -import static org.junit.Assert.*; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.biojava.nbio.structure.Atom; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; - -public class TestEncoderUtils { - - - @Test - public void microHeterogenity() throws IOException, StructureException { - EncoderUtils encoderUtils = new EncoderUtils(); - encoderUtils.setUpBioJava(); - Structure inputStructure = StructureIO.getStructure("4ck4"); - // Count the number of groups - Group before = inputStructure.getChains().get(0).getAtomGroup(17); - assertTrue(inputStructure.getChains().get(0).getAtomGroup(17).hasAltLoc()); - List totalAtoms = new ArrayList<>(encoderUtils.getAllAtoms(inputStructure)); - int totGroups = 0; - int totAtomsCounter = 0; - Set totAtoms = new HashSet<>(); - for (Chain c : inputStructure.getChains()) { - totGroups += c.getAtomGroups().size(); - for (Group g: c.getAtomGroups() ){ - totAtomsCounter+=g.getAtoms().size(); - totAtoms.addAll(g.getAtoms()); - for (Group alt : g.getAltLocs()) { - totAtomsCounter+=alt.getAtoms().size(); - totAtoms.addAll(alt.getAtoms()); - } - } - } - // Now "fix" the microheterogenity - encoderUtils.fixMicroheterogenity(inputStructure); - - assertEquals(before, inputStructure.getChains().get(0).getAtomGroup(17)); - assertFalse(inputStructure.getChains().get(0).getAtomGroup(17).hasAltLoc()); - assertFalse(inputStructure.getChains().get(0).getAtomGroup(18).hasAltLoc()); - int totGroupsAfter = 0; - int totAtomsCounterAfter = 0; - Set totAtomsAfter = new HashSet<>(); - for (Chain c : inputStructure.getChains()) { - totGroupsAfter += c.getAtomGroups().size(); - for (Group g: c.getAtomGroups() ){ - totAtomsCounterAfter+=g.getAtoms().size(); - totAtomsAfter.addAll(g.getAtoms()); - for (Group alt : g.getAltLocs()) { - totAtomsAfter.addAll(alt.getAtoms()); - totAtomsCounterAfter+=alt.getAtoms().size(); - } - } - } - - List totalAtomsAfter = new ArrayList<>(encoderUtils.getAllAtoms(inputStructure)); - System.out.println("Before: "+ totalAtoms.size()); - System.out.println("After: "+ totalAtomsAfter.size()); - System.out.println("Unique before: "+ totAtoms.size()); - System.out.println("Unique after: "+ totAtomsAfter.size()); - // Get all of the duplicate atoms - Set duplicates = findDuplicates(totalAtomsAfter); - System.out.println(duplicates.size()); - for (Atom a : duplicates) { - System.out.println(a); - } - // There should be no duplicates - assertEquals(duplicates.size(), 0); - assertEquals(totalAtoms.size(), totalAtomsAfter.size()); - // Check there are two more groups afterwards - assertEquals(totGroupsAfter-2, totGroups); - // Check there are no more atoms afterwards - assertEquals(totAtomsAfter.size(), totAtoms.size()); - // Check the counter are the same too - assertEquals(totAtomsCounterAfter, totAtomsCounter); - - } - - private Set findDuplicates(List listContainingDuplicates) - { - final Set setToReturn = new HashSet<>(); - final Set set1 = new HashSet<>(); - - for (Atom yourInt : listContainingDuplicates) - { - if (!set1.add(yourInt)) - { - setToReturn.add(yourInt); - } - } - return setToReturn; - } -} - diff --git a/mmtf-encoder/src/test/java/org/rcsb/mmtf/dataholders/FailureBean.java b/mmtf-encoder/src/test/java/org/rcsb/mmtf/dataholders/FailureBean.java deleted file mode 100644 index e826975..0000000 --- a/mmtf-encoder/src/test/java/org/rcsb/mmtf/dataholders/FailureBean.java +++ /dev/null @@ -1,742 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import java.util.List; -import java.util.Map; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -/** - * A class to store the data sent in an MMTF data source. - * - * @author anthony - */ -@JsonIgnoreProperties(ignoreUnknown = true) -public class FailureBean { - - /** The mmtf version. */ - private String mmtfVersion = "0.1"; - - /** The mmtf producer. */ - private String mmtfProducer; - - /** The number of bonds. */ - private int numBonds; - - /** The pdb id. */ - private String pdbId; - - /** The title. */ - private String title; - - /** The number of atoms. */ - private int numAtoms; - - /** The number of chains per model. */ - private int[] chainsPerModel; - - /** The names of the chains. Each chain is allocated four bytes. Chain names can be up to four characters long. 0 bytes indicate the end of the chain name. These are taken from the auth id. */ - private byte[] chainNameList; - - /** The names of the chains. Each chain is allocated four bytes. Chain names can be up to four characters long. 0 bytes indicate the end of the chain name. These are taken from the asym id. */ - private byte[] chainIdList; - - /** The internal groups per chain. */ - private int[] groupsPerChain; - - /** The space group. */ - private String spaceGroup; - - /** The unit cell. */ - private List unitCell; - - /** The bio assembly. */ - private Map bioAssembly; - - /** The bond atom (indices) list. */ - private byte[] bondAtomList; - - /** The bond order list. */ - private byte[] bondOrderList; - - /** The group map. */ - // Map of all the data - private Map groupMap; - - /** The x coord big. 4 byte integers in pairs. */ - private byte[] xCoordBig; - - /** The y coord big. 4 byte integers in pairs. */ - private byte[] yCoordBig; - - /** The z coord big. 4 byte integers in pairs. */ - private byte[] zCoordBig; - - /** The b factor big. 4 byte integers in pairs. */ - private byte[] bFactorBig; - - /** The x coord small. 2 byte integers. */ - private byte[] xCoordSmall; - - /** The y coord small. 2 byte integers.*/ - private byte[] yCoordSmall; - - /** The z coord small. 2 byte integers.*/ - private byte[] zCoordSmall; - - /** The b factor small. 2 byte integers.*/ - private byte[] bFactorSmall; - - /** The secondary structure list. Stored as 1 byte ints. */ - private byte[] secStructList; - - /** The occupancy list. */ - private byte[] occList; - - /** The alt label list. */ - private List altLabelList; - - /** The insertion code list. */ - private List insCodeList; - - /** The group type list. */ - private byte[] groupTypeList; - - /** The group id list. Identifies each group along the chain. */ - private byte[] groupIdList; - - /** The atom id list. */ - private byte[] atomIdList; - - /** The SEQRES sequence, per asym chain. */ - private List chainSeqList; - - /** The SeqRes group ids. */ - private byte[] seqResIdList; - - /** The experimental method(s). */ - private List experimentalMethods; - // ADDED FIELDS TO DELIBERATELY FAIL TESTS - @SuppressWarnings("unused") - private List fieldWithNoGetters; - - private List fieldWithRefactoredGetters; - - - /** - * Gets the space group. - * - * @return the space group - */ - public final String getSpaceGroup() { - return spaceGroup; - } - - /** - * Sets the space group. - * - * @param inputSpaceGroup the new space group - */ - public final void setSpaceGroup(final String inputSpaceGroup) { - this.spaceGroup = inputSpaceGroup; - } - - /** - * Gets the unit cell. - * - * @return the unit cell - */ - public final List getUnitCell() { - return unitCell; - } - - /** - * Sets the unit cell. - * - * @param inputUnitCell the new unit cell - */ - public final void setUnitCell(final List inputUnitCell) { - this.unitCell = inputUnitCell; - } - - /** - * Gets the group num list. - * - * @return the group num list - */ - public final byte[] getGroupIdList() { - return groupIdList; - } - - /** - * Sets the group num list. - * - * @param inputGroupNumList the new group num list - */ - public final void setGroupIdList(final byte[] inputGroupNumList) { - this.groupIdList = inputGroupNumList; - } - - /** - * Gets the x coord big. - * - * @return the x coord big - */ - public final byte[] getxCoordBig() { - return xCoordBig; - } - - /** - * Sets the x coord big. - * - * @param inputXCoordBig the new 4 byte integer x coord array - */ - public final void setxCoordBig(final byte[] inputXCoordBig) { - this.xCoordBig = inputXCoordBig; - } - - /** - * Gets the y coord big. - * - * @return the y coord big - */ - public final byte[] getyCoordBig() { - return yCoordBig; - } - - /** - * Sets the y coord big. - * - * @param inputYCoordBig the new 4 byte integer y coord array - */ - public final void setyCoordBig(final byte[] inputYCoordBig) { - this.yCoordBig = inputYCoordBig; - } - - /** - * Gets the z coord big. - * - * @return the z coord big - */ - public final byte[] getzCoordBig() { - return zCoordBig; - } - - /** - * Sets the z coord big. - * - * @param inputZCoordBig the new 4 byte integer z coord array - */ - public final void setzCoordBig(final byte[] inputZCoordBig) { - this.zCoordBig = inputZCoordBig; - } - - /** - * Gets the x coord small. - * - * @return the x coord small - */ - public final byte[] getxCoordSmall() { - return xCoordSmall; - } - - /** - * Sets the x coord small. - * - * @param inputXCoordSmall the new 2 byte integer x coord array - */ - public final void setxCoordSmall(final byte[] inputXCoordSmall) { - this.xCoordSmall = inputXCoordSmall; - } - - /** - * Gets the y coord small. - * - * @return the y coord small - */ - public final byte[] getyCoordSmall() { - return yCoordSmall; - } - - /** - * Sets the y coord small. - * - * @param inputYCoordSmall the new 2 byte integer y coord array - */ - public final void setyCoordSmall(final byte[] inputYCoordSmall) { - this.yCoordSmall = inputYCoordSmall; - } - - /** - * Gets the z coord small. - * - * @return the z coord small - */ - public final byte[] getzCoordSmall() { - return zCoordSmall; - } - - /** - * Sets the z coord small. - * - * @param inputZCoordSmall the new 2 byte integer z coord array - */ - public final void setzCoordSmall(final byte[] inputZCoordSmall) { - this.zCoordSmall = inputZCoordSmall; - } - - /** - * Gets the b factor big. - * - * @return the b factor big - */ - public final byte[] getbFactorBig() { - return bFactorBig; - } - - /** - * Sets the b factor big. - * - * @param inputBigBFactor the new b factor big - */ - public final void setbFactorBig(final byte[] inputBigBFactor) { - this.bFactorBig = inputBigBFactor; - } - - /** - * Gets the b factor small. - * - * @return the b factor small - */ - public final byte[] getbFactorSmall() { - return bFactorSmall; - } - - /** - * Sets the b factor small. - * - * @param inputSmallBFactor the new b factor 2 byte array - */ - public final void setbFactorSmall(final byte[] inputSmallBFactor) { - this.bFactorSmall = inputSmallBFactor; - } - - /** - * Gets the alt label list. - * - * @return the alt label list - */ - public final List getAltLabelList() { - return altLabelList; - } - - /** - * Sets the alt label list. - * - * @param inputAltIdList the new alt id label list - */ - public final void setAltLabelList(final List inputAltIdList) { - this.altLabelList = inputAltIdList; - } - - /** - * Gets the bio assembly. - * - * @return the bio assembly - */ - public final Map getBioAssembly() { - return bioAssembly; - } - - /** - * Gets the chain names. The byte array indicating the (up to four characters) name of the chain. This is taken from the auth id. - * - * @return the chain list - */ - public final byte[] getChainNameList() { - return chainNameList; - } - - /** - * Sets the chain names. The byte array indicating the (up to four characters) name of the chain. This is taken from the auth id. - * - * @param inputChainList the new chain list - */ - public final void setChainNameList(final byte[] inputChainList) { - this.chainNameList = inputChainList; - } - - /** - * Sets the bioassembly information. - * - * @param inputBioAssembly the bio assembly - */ - public final void setBioAssembly(final Map inputBioAssembly) { - this.bioAssembly = inputBioAssembly; - } - - /** - * Gets the num atoms. - * - * @return the num atoms - */ - public final int getNumAtoms() { - return numAtoms; - } - - /** - * Sets the num atoms. - * - * @param inputNumAtoms the new num atoms - */ - public final void setNumAtoms(final int inputNumAtoms) { - this.numAtoms = inputNumAtoms; - } - - /** - * Gets the occ list. - * - * @return the occ list - */ - public final byte[] getOccList() { - return occList; - } - - /** - * Sets the occ list. - * - * @param occupancy the new occ list - */ - public final void setOccList(final byte[] occupancy) { - this.occList = occupancy; - } - - /** - * Gets the insertion code list. - * - * @return the insertion code list - */ - public final List getInsCodeList() { - return insCodeList; - } - - /** - * Sets the ins code list. - * - * @param inputInsertionCodeList the new insertion code list - */ - public final void setInsCodeList(final List inputInsertionCodeList) { - this.insCodeList = inputInsertionCodeList; - } - - /** - * Gets the group map. - * - * @return the group map - */ - public final Map getGroupMap() { - return groupMap; - } - - /** - * Sets the group map. - * - * @param inputGroupMap the group map - */ - public final void setGroupMap(final Map inputGroupMap) { - this.groupMap = inputGroupMap; - } - - /** - * Gets the sec struct list. - * - * @return the sec struct list - */ - public final byte[] getSecStructList() { - return secStructList; - } - - /** - * Sets the sec struct list. - * - * @param secStruct the new sec struct list - */ - public final void setSecStructList(final byte[] secStruct) { - this.secStructList = secStruct; - } - - /** - * Gets the group type list. - * - * @return the group type list - */ - public final byte[] getGroupTypeList() { - return groupTypeList; - } - - /** - * Sets the group type list. - * - * @param resOrder the new group type list - */ - public final void setGroupTypeList(final byte[] resOrder) { - this.groupTypeList = resOrder; - } - - /** - * Gets the atom id list. - * - * @return the atom id list - */ - public final byte[] getAtomIdList() { - return atomIdList; - } - - /** - * Sets the atom id list. - * - * @param inputAtomIdList the new atom id list - */ - public final void setAtomIdList(final byte[] inputAtomIdList) { - this.atomIdList = inputAtomIdList; - } - - /** - * Gets the title. - * - * @return the title - */ - public final String getTitle() { - return title; - } - - /** - * Sets the title. - * - * @param inputTitle the new title - */ - public final void setTitle(final String inputTitle) { - this.title = inputTitle; - } - - /** - * Gets the pdb id. - * - * @return the pdb id - */ - public final String getPdbId() { - return pdbId; - } - - /** - * Sets the pdb id. - * - * @param pdbCode the new pdb id - */ - public final void setPdbId(final String pdbCode) { - this.pdbId = pdbCode; - } - - /** - * Gets the mmtf producer. - * - * @return the mmtf producer - */ - public final String getMmtfProducer() { - return mmtfProducer; - } - - /** - * Sets the mmtf producer. - * - * @param inputMmtfProducer the new mmtf producer - */ - public final void setMmtfProducer(final String inputMmtfProducer) { - this.mmtfProducer = inputMmtfProducer; - } - - /** - * Gets the mmtf version. - * - * @return the mmtf version - */ - public final String getMmtfVersion() { - return mmtfVersion; - } - - /** - * Sets the mmtf version. - * - * @param inputMmtfVersion the new mmtf version - */ - public final void setMmtfVersion(final String inputMmtfVersion) { - this.mmtfVersion = inputMmtfVersion; - } - - /** - * Gets the num bonds. - * - * @return the num bonds - */ - public final int getNumBonds() { - return numBonds; - } - - /** - * Sets the number of bonds. - * - * @param inputNumBonds the new num bonds - */ - public final void setNumBonds(final int inputNumBonds) { - this.numBonds = inputNumBonds; - } - - /** - * Gets the bond atom list. - * - * @return the bond atom list - */ - public final byte[] getBondAtomList() { - return bondAtomList; - } - - /** - * Sets the bond atom list. - * - * @param inputBondAtomList the new bond atom list - */ - public final void setBondAtomList(final byte[] inputBondAtomList) { - this.bondAtomList = inputBondAtomList; - } - - /** - * Gets the bond order list. - * - * @return the bond order list - */ - public final byte[] getBondOrderList() { - return bondOrderList; - } - - /** - * Sets the bond order list. - * - * @param inputBondOrderList the new bond order list - */ - public final void setBondOrderList(final byte[] inputBondOrderList) { - this.bondOrderList = inputBondOrderList; - } - - /** - * Gets the number of chains per model. Chains are currently specified by asym (internal) chain ids. - * - * @return the list of chains per model. - */ - public final int[] getChainsPerModel() { - return chainsPerModel; - } - - /** - * Sets the number of chains per model. Currently specified by asy (internal) chain ids. - * - * @param inputInternalChainsPerModel the new list of chains per model. - */ - public final void setChainsPerModel(final int[] - inputInternalChainsPerModel) { - this.chainsPerModel = inputInternalChainsPerModel; - } - - /** - * Gets the number of groups per chain. - * - * @return the internal groups per chain - */ - public final int[] getGroupsPerChain() { - return groupsPerChain; - } - - /** - * Sets the number of groups in a chain. - * - * @param inputGroupsPerChain the new internal groups per chain - */ - public final void setGroupsPerChain(final int[] - inputGroupsPerChain) { - this.groupsPerChain = inputGroupsPerChain; - } - - /** - * Gets the internal chain list. - * - * @return the internal chain list - */ - public final byte[] getChainIdList() { - return chainIdList; - } - - /** - * Sets the internal chain list. - * - * @param inputInternalChainList the new internal chain list - */ - public final void setChainIdList(final byte[] inputInternalChainList) { - this.chainIdList = inputInternalChainList; - } - - /** - * @return the experimental methods - */ - public List getExperimentalMethods() { - return experimentalMethods; - } - - /** - * @param experimentalMethods the experimental methods to set - */ - public void setExperimentalMethods(List experimentalMethods) { - this.experimentalMethods = experimentalMethods; - } - - /** - * @return the sequence on a per (asym) chain level. - */ - public List getChainSeqList() { - return chainSeqList; - } - - /** - * @param sequence the list of strings (sequences per asym chain) to set. - */ - public void setChainSeqList(List sequence) { - this.chainSeqList = sequence; - } - - /** - * @return the seqResGroupIds - */ - public byte[] getSeqResIdList() { - return seqResIdList; - } - - /** - * @param seqResGroupIds the seqResGroupIds to set - */ - public void setSeqResIdList(byte[] seqResGroupIds) { - this.seqResIdList = seqResGroupIds; - } - - /** - * @return the fieldWithGetters - */ - public List getFieldWithGetters() { - return fieldWithRefactoredGetters; - } - - /** - * @param fieldWithGetters the fieldWithGetters to set - */ - public void setFieldWithGetters(List fieldWithGetters) { - this.fieldWithRefactoredGetters = fieldWithGetters; - } - -} diff --git a/mmtf-encoder/src/test/java/org/rcsb/mmtf/dataholders/TestDataHolders.java b/mmtf-encoder/src/test/java/org/rcsb/mmtf/dataholders/TestDataHolders.java deleted file mode 100644 index bd2572d..0000000 --- a/mmtf-encoder/src/test/java/org/rcsb/mmtf/dataholders/TestDataHolders.java +++ /dev/null @@ -1,171 +0,0 @@ -package org.rcsb.mmtf.dataholders; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertNotEquals; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.align.util.AtomCache; -import org.biojava.nbio.structure.io.FileParsingParameters; -import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; -import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; -import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; -import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; -import org.junit.Test; -import org.msgpack.jackson.dataformat.MessagePackFactory; -import org.rcsb.mmtf.biojavaencoder.EncoderUtils; -import org.rcsb.mmtf.biojavaencoder.ParseFromBiojava; -import org.unitils.reflectionassert.ReflectionAssert; - -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import uk.co.jemos.podam.api.PodamFactory; -import uk.co.jemos.podam.api.PodamFactoryImpl; - -public class TestDataHolders { - - private EncoderUtils encoderUtils; - private PodamFactory factory; - private AtomCache cache; - private FileParsingParameters params; - - public TestDataHolders() { - encoderUtils = new EncoderUtils(); - factory = new PodamFactoryImpl(); - cache = new AtomCache(); - cache.setUseMmCif(true); - cache.setFetchBehavior(FetchBehavior.FETCH_FILES); - params = cache.getFileParsingParams(); - params.setCreateAtomBonds(true); - params.setAlignSeqRes(true); - params.setParseBioAssembly(true); - DownloadChemCompProvider dcc = new DownloadChemCompProvider(); - ChemCompGroupFactory.setChemCompProvider(dcc); - dcc.setDownloadAll(true); - dcc.checkDoFirstInstall(); - params.setUseInternalChainId(true); - } - - @Test - public void testSerializable() throws JsonParseException, JsonMappingException, IOException { - // MmtfBean - assertTrue(testClass(MmtfBean.class)); - // This one fails - make sure it still does - assertFalse(testClass(BioAssemblyInfo.class)); - // The bean to store calpha data - assertTrue(testClass(CalphaDistBean.class)); - // Now consider the no float data stucuture class - assertTrue(testClass(NoFloatDataStruct.class)); - // Now consider the no bio data structure bean - assertTrue(testClass(BioDataStructBean.class)); - // And the calpha bean - assertTrue(testClass(CalphaBean.class)); - // Now test round tripping data - testDataRoundTrip(MmtfBean.class); - // Now test if all fields in the mmtf are generated - testDataComplete("4cup"); - // Now check that the failure bean fails this - // Now test round tripping data - assertFalse(testDataRoundTrip(FailureBean.class)); - } - - @SuppressWarnings("unchecked") - private boolean testClass(@SuppressWarnings("rawtypes") Class class1) throws IOException { - - - Object inBean = null; - try { - inBean = class1.newInstance(); - } catch (InstantiationException | IllegalAccessException e2) { - // Weirdness - org.junit.Assert.fail("Weirdness in generating instance of generic class"); - } - byte[] outArr = null; - - outArr = encoderUtils.getMessagePack(inBean); - - // - Object outBean = null; - try { - outBean = new ObjectMapper(new MessagePackFactory()).readValue(outArr, class1); - } catch( JsonMappingException jsonE){ - System.out.println("Error reading messagepack - is part of test if test doesn't fail"); - return false; - } - - // Now check they're the same - ReflectionAssert.assertReflectionEquals(inBean, outBean); - return true; - } - - @SuppressWarnings("unchecked") - /** - * Test round tripping dummy data - * @param class1 - */ - private boolean testDataRoundTrip(@SuppressWarnings("rawtypes") Class class1) throws JsonParseException, JsonMappingException, IOException { - Object inBean = factory.manufacturePojo(class1); - byte[] outArr = null; - - outArr = encoderUtils.getMessagePack(inBean); - - - // - Object outBean = null; - outBean = new ObjectMapper(new MessagePackFactory()).readValue(outArr, class1); - - // Make the failure bean fail - try{ - ReflectionAssert.assertPropertyReflectionEquals("fieldWithNoGetters",null, outBean); - ReflectionAssert.assertPropertyReflectionEquals("fieldWithRefactoredGetters",null, outBean); - return false; - } - catch(Exception e){ - - } - // Make sure all fields are re-populated - ReflectionAssert.assertPropertiesNotNull("Some properties are null in re-read object", outBean); - - // Now check they're the same - ReflectionAssert.assertReflectionEquals(inBean, outBean); - return true; - } - - /** - * A specific mmtf test - to make sure none of the fields are empty when the thing is encoded - * @throws StructureException - * @throws IOException - */ - private void testDataComplete(String pdbId) throws IOException { - - // Utility functions for encoding stuff - EncoderUtils eu = new EncoderUtils(); - // Get the utility class to get the structures - ParseFromBiojava parsedDataStruct = new ParseFromBiojava(); - Map totMap = new HashMap(); - // Parse the data into the basic data structure - parsedDataStruct.createFromJavaStruct(pdbId, totMap); - MmtfBean mmtfBean = null; - // Compress the data and get it back out - mmtfBean = eu.compressMainData(parsedDataStruct.getBioStruct(), parsedDataStruct.getHeaderStruct()); - // Make sure all fields are re-populated - ReflectionAssert.assertPropertiesNotNull("Some properties are null in mmtf generated from biojava object", mmtfBean); - // Now check the standard ones have been set - assertNotEquals(mmtfBean.getResolution(), (float) -1.0); - assertNotEquals(mmtfBean.getrFree(), (float) -1.0); - // Check that these integer values are set - assertNotEquals(mmtfBean.getNumAtoms(), -1); - assertNotEquals(mmtfBean.getNumBonds(), -1); - // And finally - check this is working - assertNotEquals(mmtfBean.getMmtfProducer(), "NA"); - } -} - - - diff --git a/mmtf-encoder/src/test/java/org/rcsb/mmtf/encoder/DummyApiImpl.java b/mmtf-encoder/src/test/java/org/rcsb/mmtf/encoder/DummyApiImpl.java new file mode 100644 index 0000000..f3d36a3 --- /dev/null +++ b/mmtf-encoder/src/test/java/org/rcsb/mmtf/encoder/DummyApiImpl.java @@ -0,0 +1,288 @@ +package org.rcsb.mmtf.encoder; + +import org.rcsb.mmtf.api.DecodedDataInterface; + +public class DummyApiImpl implements DecodedDataInterface { + + + public final int numAtoms = 10; + public final int numGroups = 2; + public final int atomsPerGroup = 5; + public final int bondsPerGroup = 5; + public final int interGroupBonds = 2; + public final int numChains = 1; + public final int numModels = 1; + + + @Override + public float[] getxCoords() { + return new float[numAtoms]; + } + + @Override + public float[] getyCoords() { + return new float[numAtoms]; + } + + @Override + public float[] getzCoords() { + return new float[numAtoms]; + } + + @Override + public float[] getbFactors() { + return new float[numAtoms]; + } + + @Override + public float[] getOccupancies() { + return new float[numAtoms]; + } + + @Override + public int[] getAtomIds() { + return new int[numAtoms]; + } + + @Override + public char[] getAltLocIds() { + return new char[numAtoms]; + } + + @Override + public char[] getInsCodes() { + return new char[numGroups]; + } + + @Override + public int[] getGroupIds() { + return new int[numGroups]; + } + + @Override + public String getGroupName(int groupInd) { + return "NAME"; + } + + @Override + public int getNumAtomsInGroup(int groupInd) { + return atomsPerGroup; + } + + @Override + public String[] getGroupAtomNames(int groupInd) { + return new String[atomsPerGroup]; + } + + @Override + public String[] getGroupElementNames(int groupInd) { + return new String[atomsPerGroup]; + } + + @Override + public int[] getGroupBondOrders(int groupInd) { + return new int[bondsPerGroup]; + } + + @Override + public int[] getGroupBondIndices(int groupInd) { + return new int[bondsPerGroup*2]; + } + + @Override + public int[] getGroupAtomCharges(int groupInd) { + return new int[atomsPerGroup]; + } + + @Override + public char getGroupSingleLetterCode(int groupInd) { + return 0; + } + + @Override + public String getGroupChemCompType(int groupInd) { + return "CHEM"; + } + + @Override + public int[] getGroupTypeIndices() { + return new int[numGroups]; + } + + @Override + public int[] getGroupSequenceIndices() { + return new int[numGroups]; + } + + @Override + public String[] getChainIds() { + return new String[numChains]; + } + + @Override + public String[] getChainNames() { + return new String[numChains]; + } + + @Override + public int[] getChainsPerModel() { + + return new int[numModels]; + } + + @Override + public int[] getGroupsPerChain() { + return new int[numChains]; + } + + @Override + public String getSpaceGroup() { + return "SPACE"; + } + + @Override + public float[] getUnitCell() { + return new float[6]; + } + + @Override + public int getNumBioassemblies() { + return 1; + } + + @Override + public int getNumTransInBioassembly(int bioassemblyIndex) { + return 1; + } + + @Override + public int[] getChainIndexListForTransform(int bioassemblyIndex, int transformationIndex) { + return new int[1]; + } + + @Override + public double[] getMatrixForTransform(int bioassemblyIndex, int transformationIndex) { + return new double[1]; + } + + @Override + public int[] getInterGroupBondIndices() { + return new int[interGroupBonds*2]; + + } + + @Override + public int[] getInterGroupBondOrders() { + return new int[interGroupBonds]; + } + + @Override + public String getMmtfVersion() { + return "VERS"; + } + + @Override + public String getMmtfProducer() { + return "PROD"; + } + + @Override + public int getNumEntities() { + return 1; + } + + @Override + public String getEntityDescription(int entityInd) { + return "DESC"; + } + + @Override + public String getEntityType(int entityInd) { + return "TYPE"; + } + + @Override + public int[] getEntityChainIndexList(int entityInd) { + return new int[] {0}; + } + + @Override + public String getEntitySequence(int entityInd) { + return "SEQ"; + } + + @Override + public String getStructureId() { + return "1EG1"; + } + + @Override + public int getNumModels() { + return 1; + } + + @Override + public int getNumBonds() { + return bondsPerGroup*numGroups+interGroupBonds; + } + + @Override + public int getNumChains() { + return numChains; + } + + @Override + public int getNumGroups() { + return numGroups; + } + + @Override + public int getNumAtoms() { + return numAtoms; + } + + @Override + public float getRfree() { + return 1.0f; + + } + + @Override + public float getRwork() { + return 1.0f; + + } + + @Override + public float getResolution() { + return 1.0f; + } + + @Override + public String getTitle() { + return "NA"; + } + + @Override + public String[] getExperimentalMethods() { + return new String[] {"NA"}; + + } + + @Override + public String getDepositionDate() { + return "NA"; + + } + + @Override + public String getReleaseDate() { + return "NA"; + } + + @Override + public int[] getSecStructList() { + return new int[numGroups]; + } + +} diff --git a/mmtf-encoder/src/test/java/org/rcsb/mmtf/encoder/TestArrayConverters.java b/mmtf-encoder/src/test/java/org/rcsb/mmtf/encoder/TestArrayConverters.java new file mode 100644 index 0000000..6d1015d --- /dev/null +++ b/mmtf-encoder/src/test/java/org/rcsb/mmtf/encoder/TestArrayConverters.java @@ -0,0 +1,135 @@ +package org.rcsb.mmtf.encoder; + +import static org.junit.Assert.assertArrayEquals; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; + +import org.junit.Test; + +public class TestArrayConverters { + + + /** + * Test the decoding of the chain list + */ + @Test + public final void testSetChain() { + String[] stringList = {"A","ABC","abc","abcd"}; + byte[] testByteArray = {'A','\0','\0','\0', + 'A','B','C','\0', + 'a','b','c','\0', + 'a','b','c','d'}; + byte[] byteArray = ArrayConverters.encodeChainList(stringList); + assertArrayEquals(testByteArray, byteArray); + } + + + /** + * Test the conversion of the integer array to a float + */ + @Test + public final void testConvertFloatToInt() { + float[] floatArray = {10.001f,100.203f,124.542f}; + int[] testIntArray = {10001,100203,124542}; + float multiplier = 1000.0f; + int[] intArray = ArrayConverters.convertFloatsToInts(floatArray, multiplier); + assertArrayEquals(testIntArray, intArray); + } + + + /** + * Test the conversion of byte arrays to one byte integer arrays + * @throws IOException + */ + @Test + public final void integersToOneByteTest() throws IOException { + int[] intArray = {12,123,24}; + byte[] testByteArray = {(byte) 12,(byte) 123, (byte) 24}; + byte[] byteArray = ArrayConverters.convertIntegersToBytes(intArray); + assertArrayEquals(testByteArray, byteArray); + } + + /** + * Test the conversion of byte arrays to two byte integer arrays + * @throws IOException + */ + @Test + public final void integersToTwoByteTest() throws IOException { + int[] intArray = {1000,1002,546}; + byte[] testByteArray = getByteArray(intArray,2); + byte[] byteArray = ArrayConverters.convertIntegersToTwoBytes(intArray); + assertArrayEquals(testByteArray, byteArray); + } + + /** + * Test the conversion of byte arrays to four byte integer arrays + * @throws IOException + */ + @Test + public final void integersToFourByteTest() throws IOException { + int[] intArray = {32403,11200,100090}; + byte[] testByteArray = getByteArray(intArray,4); + byte[] byteArray = ArrayConverters.convertIntegersToFourByte(intArray); + assertArrayEquals(testByteArray, byteArray); + } + + + /** + * Test the conversion of integer arrays to char arrays. + */ + @Test + public final void splitArraysTest() { + // Initialise the two input arrays + int[] testTwoByteIntArray = {1,2,5,4,50,0}; + int[] testFourByteIntArray = {10002,4,1303040,2}; + // The expected output + int[] combinedArray = {10002,1,2,5,4,1303040,50,0}; + List outVal = ArrayConverters.splitIntegers(combinedArray); + assertArrayEquals(testFourByteIntArray, outVal.get(0)); + assertArrayEquals(testTwoByteIntArray, outVal.get(1)); + } + + + /** + * Test the conversion of integer arrays to char arrays. + */ + @Test + public final void charToIntTest() { + int[] testIntArray = {66,63,67}; + char[] charArray= {'B','?','C'}; + int[] intArray = ArrayConverters.convertCharToIntegers(charArray); + assertArrayEquals(testIntArray, intArray); + } + + /** + * Utiliy function to get a byte array. I don't really like this but at least + * it's an orthogonal approach. + * @param inArray the input int array + * @param numBytes the number of bytes per integer + * @return the output byte array + */ + private byte[] getByteArray(int[] inArray, int numBytes) { + byte[] outBytes = new byte[inArray.length*numBytes]; + for(int i=0; i - 4.0.0 - - org.rcsb - mmtf - 0.0.1-alpha4-SNAPSHOT - - mmtf-update - jar - - mmtf-update - http://maven.apache.org - - - UTF-8 - - - - - org.rcsb - mmtf-api - 0.0.1-alpha4-SNAPSHOT - - - org.rcsb - mmtf-encoder - 0.0.1-alpha4-SNAPSHOT - - - org.rcsb - mmtf-common - 0.0.1-alpha4-SNAPSHOT - - - junit - junit - 4.11 - - - org.biojava - biojava-structure - 5.0.0-alpha3 - - - org.apache.spark - spark-core_2.11 - 1.6.0 - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - true - - - - - org.apache.maven.plugins - maven-shade-plugin - - - package - - shade - - - true - allinone - - - *:* - - - - - reference.conf - - - - akka.Main - - - - - - - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - uber-${project.artifactId}-${project.version} - - - - - - - - - maven-javadoc-plugin - 2.10.1 - - true - true - true - 1.8 - false - true - - - - org.codehaus.mojo - findbugs-maven-plugin - 3.0.3 - - - - - - diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/BiojavaStructureToBiojavaGroups.java b/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/BiojavaStructureToBiojavaGroups.java deleted file mode 100644 index c3c37c7..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/BiojavaStructureToBiojavaGroups.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.rcsb.mmtf.mappers; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.spark.api.java.function.PairFlatMapFunction; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.Structure; - -import scala.Tuple2; - -/** - * Extracts all of the groups from a given Biojava structure - * @author Anthony Bradley - * - */ -public class BiojavaStructureToBiojavaGroups implements PairFlatMapFunction, String, Group>{ - - private static final long serialVersionUID = 9211500299985679809L; - - @Override - public Iterable> call(Tuple2 t) throws Exception { - // The list to return - List> outList = new ArrayList>(); - for(int modelNr=0; modelNr myGroups = c.getAtomGroups(); - // Now loop through these groups and add them to the outputlist - for(Group g: myGroups){ - // Now generate the unique id - String uniqId = modelNr+"_"+c.getChainID()+"_"+g.getResidueNumber(); - outList.add(new Tuple2(uniqId, g)); - } - } - } - return outList; - } -} \ No newline at end of file diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/ByteArrayToBioJavaStructMapper.java b/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/ByteArrayToBioJavaStructMapper.java deleted file mode 100644 index 9eb3393..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/ByteArrayToBioJavaStructMapper.java +++ /dev/null @@ -1,24 +0,0 @@ -package org.rcsb.mmtf.mappers; - -import org.apache.spark.api.java.function.PairFunction; -import org.biojava.nbio.structure.Structure; - -import scala.Tuple2; - -/** - * Maps a String byte[] of a message pack to a String Structure of the biojava structure - * @author Anthony Bradley - * - */ -public class ByteArrayToBioJavaStructMapper implements PairFunction,String, Structure> { - - private static final long serialVersionUID = -1671280971380509379L; - MapperUtils mapperUtils = new MapperUtils(); - - - @Override - public Tuple2 call(Tuple2 t) throws Exception { - // Now return this - return new Tuple2(t._1, mapperUtils.byteArrToBiojavaStruct(t._1, t._2)); - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/ByteWriteToByteArr.java b/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/ByteWriteToByteArr.java deleted file mode 100644 index 0aadfc1..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/ByteWriteToByteArr.java +++ /dev/null @@ -1,25 +0,0 @@ -package org.rcsb.mmtf.mappers; - -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.spark.api.java.function.PairFunction; - -import scala.Tuple2; - -/** - * Maps the input of a Hadoop sequence file (Text/Bytes Writeable) to a String, byte[] - * @author Anthony Bradley - * - */ -public class ByteWriteToByteArr implements PairFunction,String, byte[]> { - - - private static final long serialVersionUID = 1466772536507675533L; - - @Override - public Tuple2 call(Tuple2 t) throws Exception { - // Simply return the byte array - return new Tuple2(t._1.toString(), t._2.copyBytes()); - } - -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/DataStructToByteArrs.java b/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/DataStructToByteArrs.java deleted file mode 100644 index 62b04c1..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/DataStructToByteArrs.java +++ /dev/null @@ -1,51 +0,0 @@ -package org.rcsb.mmtf.mappers; - -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.spark.api.java.function.PairFlatMapFunction; -import org.rcsb.mmtf.biojavaencoder.EncoderUtils; -import org.rcsb.mmtf.biojavaencoder.ParseFromBiojava; -import org.rcsb.mmtf.dataholders.BioDataStruct; -import org.rcsb.mmtf.dataholders.CalphaDistBean; -import org.rcsb.mmtf.dataholders.HeaderBean; - -import scala.Tuple2; - -/** - * A class to generate the three (or more) byte arrays from the data structure parsed by the MMCIF. - * Returns three byte arrays per PDB id. - * @author Anthony Bradley - * - */ -public class DataStructToByteArrs implements PairFlatMapFunction, String, byte[]>{ - - private static final long serialVersionUID = 2066093446043635571L; - - @Override - public Iterable> call(Tuple2 t) throws IOException, IllegalAccessException, InvocationTargetException { - // First generate the list to return - List> outList = new ArrayList>(); - EncoderUtils cm = new EncoderUtils(); - ParseFromBiojava cbs = t._2; - String pdbCode = t._1; - // Now get the header too - HeaderBean headerData = cbs.getHeaderStruct(); - BioDataStruct thisBS = cbs.getBioStruct(); - CalphaDistBean calphaDistStruct = cm.compCAlpha(cbs.getCalphaStruct(), cbs.getHeaderStruct()); - // NOW JUST WRITE THE KEY VALUE PAIRS HERE - byte[] totBytes = cm.getMessagePack(cm.compressMainData(thisBS, headerData)); - byte[] headerBytes = cm.getMessagePack(headerData); - byte[] calphaBytes = cm.getMessagePack(calphaDistStruct); - // Add the total data - outList.add(new Tuple2(pdbCode+"_total", totBytes)); - // Add the header - outList.add(new Tuple2(pdbCode+"_header", headerBytes)); - // Add the calpha - outList.add(new Tuple2(pdbCode+"_calpha", calphaBytes)); - return outList; - - } -} \ No newline at end of file diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/GroupToSDF.java b/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/GroupToSDF.java deleted file mode 100644 index 596f38b..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/GroupToSDF.java +++ /dev/null @@ -1,24 +0,0 @@ -package org.rcsb.mmtf.mappers; - - -import org.apache.spark.api.java.function.PairFunction; -import org.biojava.nbio.structure.Group; - - -import scala.Tuple2; - -public class GroupToSDF implements PairFunction,String, String> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - @Override - public Tuple2 call(Tuple2 t) throws Exception { - // Now just return the group as the SDF - return new Tuple2(t._1, t._2.toSDF()); - } - -} - diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/MapperUtils.java b/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/MapperUtils.java deleted file mode 100644 index b280313..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/MapperUtils.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.rcsb.mmtf.mappers; - -import java.io.Serializable; -import java.util.List; - -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureImpl; -import org.biojava.nbio.structure.io.mmtf.BioJavaStructureDecoder; -import org.rcsb.mmtf.biojavaencoder.EncoderUtils; -import org.rcsb.mmtf.decoder.DecodeStructure; -import org.rcsb.mmtf.decoder.ParsingParams; - - -/** - * A class to preserve the log if the functions in mappers. - * Mappers should not contain logic - as they are hard to test. - * @author Anthony Bradley - * - */ -public class MapperUtils implements Serializable{ - - private static final long serialVersionUID = -4717807367698811030L; - - /** - * Converts a byte array of the messagepack (mmtf) to a Biojava structure. - * @param pdbCodePlus The pdb code is the first four characters. Additional characters can be used. - * @param inputByteArr The message pack bytre array to be decoded. - * @return - */ - public Structure byteArrToBiojavaStruct(String pdbCodePlus, byte[] inputByteArr) { - BioJavaStructureDecoder bjs = new BioJavaStructureDecoder(); - Structure newStruct; - ParsingParams pp = new ParsingParams(); - try{ - DecodeStructure ds = new DecodeStructure(inputByteArr); - ds.getStructFromByteArray(bjs, pp); - newStruct = bjs.getStructure(); - newStruct.setPDBCode(pdbCodePlus.substring(0,4));} - catch(Exception e){ - System.out.println(e); - System.out.println(pdbCodePlus); - Structure thisStruct = new StructureImpl(); - return thisStruct; - } - return newStruct; - } - - /** - * PDB RDD gnerateor. Converts a list of pdb ids to a writeable RDD - * @param sparkContext - * @return - */ - public JavaPairRDD generateRDD(JavaSparkContext sparkContext, List inputList, String inputUrl) { - // Set up Biojava appropriateyl - EncoderUtils encoderUtils = new EncoderUtils(); - encoderUtils.setUpBioJava(inputUrl); - return sparkContext.parallelize(inputList) - .mapToPair(new PdbIdToDataStruct()) - .flatMapToPair(new DataStructToByteArrs()) - .mapToPair(new StringByteToTextByteWriter()); - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/PdbIdToDataStruct.java b/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/PdbIdToDataStruct.java deleted file mode 100644 index 44dbf3e..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/PdbIdToDataStruct.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.rcsb.mmtf.mappers; - - -import java.util.HashMap; -import java.util.Map; -import org.apache.spark.api.java.function.PairFunction; -import org.rcsb.mmtf.biojavaencoder.ParseFromBiojava; -import org.rcsb.mmtf.dataholders.PDBGroup; - -import scala.Tuple2; - -/** - * Generate the internal data structure (using biojava) from a PDB code. - * @author Anthony Bradley - * - */ -public class PdbIdToDataStruct implements PairFunction{ - - private static final long serialVersionUID = 786599975302506694L; - - @Override - public Tuple2 call(String t) throws Exception { - ParseFromBiojava cbs = new ParseFromBiojava(); - Map totMap = new HashMap(); - try{ - cbs.createFromJavaStruct(t, totMap); - } - catch(Exception e){ - // Just return the object - System.out.println(e+" :: "+t); - System.out.println(e.getMessage()); - return new Tuple2(t,cbs); - } - // If it doesn't fail also return the object - return new Tuple2(t,cbs); - } - - -} - diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/StringByteToTextByteWriter.java b/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/StringByteToTextByteWriter.java deleted file mode 100644 index 7482863..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/mappers/StringByteToTextByteWriter.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.rcsb.mmtf.mappers; - -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.spark.api.java.function.PairFunction; - -import scala.Tuple2; - -/** - * Converts a tuple of string and byte array, to a Text and Bytes writeable. - * This is required for writing hadoop sequence files of data in this format. - * @author Anthony Bradley - * - */ -public class StringByteToTextByteWriter implements PairFunction, Text, BytesWritable>{ - - private static final long serialVersionUID = 8149053011560186912L; - - @Override - public Tuple2 call(Tuple2 t) throws Exception { - // TODO Auto-generated method stub - Text outT = new Text(); - outT.set(t._1); - BytesWritable outBytes = new BytesWritable(); - byte[] theseBytes = t._2; - outBytes.set(theseBytes, 0, theseBytes.length); - return new Tuple2(outT,outBytes); - } - -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/postupdatetests/CheckHadoopFile.java b/mmtf-update/src/main/java/org/rcsb/mmtf/postupdatetests/CheckHadoopFile.java deleted file mode 100644 index 9e04810..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/postupdatetests/CheckHadoopFile.java +++ /dev/null @@ -1,61 +0,0 @@ -package org.rcsb.mmtf.postupdatetests; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.Serializable; -import java.util.List; - -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.biojava.nbio.structure.Structure; -import org.rcsb.mmtf.mappers.ByteArrayToBioJavaStructMapper; -import org.rcsb.mmtf.mappers.ByteWriteToByteArr; - - -public class CheckHadoopFile implements Serializable { - - - private static final long serialVersionUID = 3037567648753603114L; - - /** - * A function to read a hadoop sequence file to Biojava structures. - * 1) The input path of the available data - * 2) The output path indicating the number of PDB ids and a list of the ids. - * @param args - * @throws IOException - */ - public static void main(String[] args ) throws IOException - { - // The input path for the data is the fist - String inPath = args[0]; - String pdbIdList = args[1]; - - // This is the default 2 line structure for Spark applications - SparkConf conf = new SparkConf().setMaster("local[*]") - .setAppName(CheckHadoopFile.class.getSimpleName()); - // Set the config for the spark context - JavaSparkContext sc = new JavaSparkContext(conf); - JavaPairRDD jprdd = sc - .sequenceFile(inPath, Text.class, BytesWritable.class, 24) - .mapToPair(new ByteWriteToByteArr()) - // Now get the structure - .mapToPair(new ByteArrayToBioJavaStructMapper()); - JavaRDD values = jprdd.keys(); - List outValues = values.collect(); - // Write the PDB files to a file - BufferedWriter writer = new BufferedWriter(new FileWriter(new File(pdbIdList))); - writer.write(outValues.size()+"\n"); - for (String pdbId : outValues) { - writer.write(pdbId+","); - } - writer.close(); - // Now move the folder - sc.close(); - } -} \ No newline at end of file diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/postupdatetests/CheckServer.java b/mmtf-update/src/main/java/org/rcsb/mmtf/postupdatetests/CheckServer.java deleted file mode 100644 index 57ffb7f..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/postupdatetests/CheckServer.java +++ /dev/null @@ -1,94 +0,0 @@ -package org.rcsb.mmtf.postupdatetests; - -import java.io.File; -import java.io.IOException; - -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; -import org.biojava.nbio.structure.align.util.AtomCache; -import org.biojava.nbio.structure.io.FileParsingParameters; -import org.biojava.nbio.structure.io.mmtf.ParseUsingBioJava; -import org.rcsb.mmtf.biojavaencoder.EncoderUtils; -import org.rcsb.mmtf.decoder.ParsingParams; -import org.rcsb.mmtf.examples.HandleIO; -import org.rcsb.mmtf.testutils.CheckOnBiojava; -import org.rcsb.mmtf.testutils.CheckOnRawApi; -import org.rcsb.mmtf.update.IntegrationTestUtils; - -public class CheckServer { - - private HandleIO handleIo; - private ParseUsingBioJava parseUsingBioJava; - private FileParsingParameters params; - private CheckOnBiojava checkEquiv; - - /** - * Java class to check if data can be parsed from the server. - * 1) Arg one is the server - * @param args - * @throws IOException - */ - public static void main(String[] args) throws IOException { - String baseUrl = args[0]; - String outPutFile = args[1]; - CheckServer checkServer = new CheckServer(); - checkServer.basicParsingTest(baseUrl); - File f = new File(outPutFile); - f.getParentFile().mkdirs(); - f.createNewFile(); - } - - - /** - * Basic test to go through a series of PDBs and make sure they are the same. - * Should be run at the end of the weekly update to ensure data integrity. - * @throws IOException - */ - public void basicParsingTest(String baseUrl) throws IOException { - // Get the class to parse and get data - parseUsingBioJava = new ParseUsingBioJava(); - handleIo = new HandleIO(); - checkEquiv = new CheckOnBiojava(); - EncoderUtils encoderUtils = new EncoderUtils(); - AtomCache cache = encoderUtils.setUpBioJava(); - params = cache.getFileParsingParams(); - // Test it for a series of structures - for (String pdbId : IntegrationTestUtils.TEST_CASES) { - testParsing(pdbId, HandleIO.BASE_URL); - } - } - - /** - * This tests whether the data on the website can be decoded to produce the same - * data as parsing the mmcif data. - * @param inputPdb - * @throws IOException - */ - private void testParsing(String inputPdb, String inputUrl) throws IOException { - System.out.println("TESTING: "+inputPdb); - byte[] inputByteArr = handleIo.getFromUrl(inputPdb, inputUrl); - ParsingParams parsingParms = new ParsingParams(); - parsingParms.setParseInternal(params.isUseInternalChainId()); - Structure mmtfStruct = parseUsingBioJava.getBiojavaStruct(inputByteArr, parsingParms); - // Now parse from the MMCIF file - Structure mmcifStruct; - try { - mmcifStruct = StructureIO.getStructure(inputPdb); - } catch (IOException e) { - // Error accessing mmcif - System.err.println("Error accessing MMCIF"); - e.printStackTrace(); - throw new RuntimeException(); - } catch (StructureException e) { - System.err.println("Error parsing/consuming MMCIF"); - e.printStackTrace(); - throw new RuntimeException(); - } - checkEquiv.checkIfStructuresSame(mmtfStruct, mmcifStruct); - // Now do the checks on the Raw data - CheckOnRawApi checkRaw = new CheckOnRawApi(inputByteArr); - checkRaw.checkRawDataConsistency(mmcifStruct, params); - - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/preupdatetests/DataConsistencyCheck.java b/mmtf-update/src/main/java/org/rcsb/mmtf/preupdatetests/DataConsistencyCheck.java deleted file mode 100644 index 0779a21..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/preupdatetests/DataConsistencyCheck.java +++ /dev/null @@ -1,68 +0,0 @@ -package org.rcsb.mmtf.preupdatetests; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.List; - -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.align.util.AtomCache; -import org.biojava.nbio.structure.io.FileParsingParameters; -import org.rcsb.mmtf.biojavaencoder.EncoderUtils; -import org.rcsb.mmtf.update.ServerUtils; -import org.rcsb.mmtf.update.TestingUtils; -import org.rcsb.mmtf.update.WeeklyUpdateUtils; - -/** - * Is the data to be added on the FTP site available, parseable and consistent when roundtripped. - * @author Anthony Bradley - * - */ -public class DataConsistencyCheck { - - /** - * 1) Argument one is the FTP server for the update lists - * 2) Argumnet two is the server for the mmcif.gz files - * 3) Argument three is the file to write at the end - * 4) The URL for the CCD data - * 5+ The pdbs to ignore - * @param args - * @throws IllegalAccessException - * @throws InvocationTargetException - * @throws IOException - * @throws StructureException - */ - public static void main(String[] args) throws IllegalAccessException, InvocationTargetException, IOException, StructureException { - - - // Set up the atom cache etc - EncoderUtils encoderUtils = new EncoderUtils(); - ServerUtils serverUtils = new ServerUtils(); - AtomCache cache = encoderUtils.setUpBioJava(args[3]); - // Now get the list of PDB ids to ignore - List ignoreList = new ArrayList<>(); - for (int i=4; i listToAdd = weeklyUpdate.getAddedList(); - String[] urlList = new String[listToAdd.size()]; - for (int i =0; i< listToAdd.size(); i++) { - urlList[i] = args[1] + serverUtils.generateDataExtension(listToAdd.get(i)); - } - String outPutFile = args[2]; - TestingUtils testingUtils = new TestingUtils(); - testingUtils.testAll(urlList, params, cache); - File f = new File(outPutFile); - f.getParentFile().mkdirs(); - f.createNewFile(); - } - - -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/preupdatetests/FtpServerTest.java b/mmtf-update/src/main/java/org/rcsb/mmtf/preupdatetests/FtpServerTest.java deleted file mode 100644 index 7bf2450..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/preupdatetests/FtpServerTest.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.rcsb.mmtf.preupdatetests; - -import static org.junit.Assert.*; - -import java.io.File; -import java.io.IOException; - -import org.apache.commons.lang.StringUtils; -import org.rcsb.mmtf.update.PullFtpData; -import org.rcsb.mmtf.update.ServerUtils; - - -public class FtpServerTest { - - /** - * Can we get to the ftp site and download the data - * First argument is the URL - * Second argument is the file to write out - * @throws IOException - */ - public static void main(String[] args) throws IOException { - String url = args[0]; - String outPutFile = args[1]; - ServerUtils serverUtils = new ServerUtils(); - FtpServerTest ftpServerTest = new FtpServerTest(); - PullFtpData pullFtpData = new PullFtpData(url); - // Get the host and port - // First check we can ping the server - assertTrue(serverUtils.pingServer(url)); - // Check this is not empty - String[] currentEntries; - currentEntries = pullFtpData.getAllCurrentEntries(); - ftpServerTest.checkAllPdbIds(currentEntries); - assertNotEquals(currentEntries, null); - assertNotEquals(currentEntries.length, 0); - assertNotEquals(currentEntries.length, 1); - // Now check the others aren't null - currentEntries = pullFtpData.getAllCurrentModels(); - ftpServerTest.checkAllPdbIds(currentEntries); - assertNotEquals(currentEntries, null); - assertNotEquals(currentEntries.length, 0); - assertNotEquals(currentEntries.length, 1); - currentEntries = pullFtpData.getAllObsoleteEntries(); - ftpServerTest.checkAllPdbIds(currentEntries); - assertNotEquals(currentEntries, null); - assertNotEquals(currentEntries.length, 0); - assertNotEquals(currentEntries.length, 1); - // Check there is something to be updated - assertNotEquals(currentEntries, null); - currentEntries = pullFtpData.getAdded(); - assertNotEquals(currentEntries.length, 0); - File f = new File(outPutFile); - f.getParentFile().mkdirs(); - f.createNewFile(); - } - - - private void checkAllPdbIds(String[] inputData) { - // Check they all equal 4 - for (String inputId : inputData) { - assertEquals(inputId.length(), 4); - assertTrue(StringUtils.isAlphanumeric(inputId)); - } - } - -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/preupdatetests/SandboxAccessTest.java b/mmtf-update/src/main/java/org/rcsb/mmtf/preupdatetests/SandboxAccessTest.java deleted file mode 100644 index 8283f49..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/preupdatetests/SandboxAccessTest.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.rcsb.mmtf.preupdatetests; - -import static org.junit.Assert.*; - -import java.io.File; -import java.io.IOException; - -import org.rcsb.mmtf.update.ServerUtils; - - -public class SandboxAccessTest { - public static void main(String[] args) throws IOException { - String url = args[0]; - String outPutFile = args[1]; - ServerUtils serverUtils = new ServerUtils(); - // Assert that we can reach this server - assertTrue(serverUtils.pingServer(url)); - File f = new File(outPutFile); - f.getParentFile().mkdirs(); - f.createNewFile(); - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/testutils/CheckBonds.java b/mmtf-update/src/main/java/org/rcsb/mmtf/testutils/CheckBonds.java deleted file mode 100644 index e729b3d..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/testutils/CheckBonds.java +++ /dev/null @@ -1,316 +0,0 @@ -package org.rcsb.mmtf.testutils; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.biojava.nbio.structure.Atom; -import org.biojava.nbio.structure.Bond; -import org.biojava.nbio.structure.Calc; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.GroupType; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; -import org.biojava.nbio.structure.io.mmcif.model.ChemComp; -import org.biojava.nbio.structure.io.mmcif.model.ChemCompAtom; -import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond; -import org.biojava.nbio.structure.rcsb.GetRepresentatives; -import org.rcsb.mmtf.biojavaencoder.EncoderUtils; - -public class CheckBonds { - - private StringBuilder stringBuilder; - private Structure structure; - - private static final double MAX_NUCLEOTIDE_BOND_LENGTH = 2.2; - private static final double MAX_PEPTIDE_BOND_LENGTH = 1.9; - - public void testAllConsistency() throws IOException, StructureException { - - // Set up biojava - EncoderUtils encoderUtils = new EncoderUtils(); - encoderUtils.setUpBioJava(); - for (String testCase : GetRepresentatives.getAll()) { - Structure structure = StructureIO.getStructure(testCase); - encoderUtils.fixMicroheterogenity(structure); - checkIfBondsExist(structure); - } - } - - /** - * Test to see if bonds exist for all atoms on a chain - * @return - */ - public String checkIfBondsExist(Structure inputStructure) { - // Set the structure as the input to the function - structure = inputStructure; - stringBuilder = new StringBuilder(); - // ONLY DO THE TEST FOR THE FIRST MODEL - for(int i=0;i<1;i++){ - List chainsOne = structure.getChains(i); - // Now loop over - for(int j=0; j groupsOne = chainOne.getAtomGroups(); - for(int k=0; k groupsOne, int k) { - // It has to be a nucleotide - if (!groupOne.getType().equals(GroupType.NUCLEOTIDE)){ - return; - } - // Find the P - Atom groupOneP = groupOne.getAtom("P"); - if (groupOneP==null) { - return; - } - // Find the neighhbouring O3P - // Now find the neighbouring Ns - if(k>0){ - Atom otherO3P = groupsOne.get(k-1).getAtom("O3'"); - if(otherO3P!=null){ - checkNucleotideBond(groupOneP, otherO3P); - } - - } - if(k MAX_NUCLEOTIDE_BOND_LENGTH) { - // It shouldn't have a bond - checkBonded(groupOneP, otherO3P, 1, false); - } - else { - // It should have a bond - checkBonded(groupOneP, otherO3P, 1, true); - } - - } - - /** - * Ensure a group that is part of a polypeptide has a peptide bond to it's next amino acid (if the group and atom exists). - * @param groupOne - * @param k - * @param groupsOne - */ - private void checkPeptideBondsAreMade(Group groupOne, List groupsOne, int k) { - // It has to be an amino acid - if (!groupOne.getType().equals(GroupType.AMINOACID)){ - return; - } - // Find the calpha - Atom groupOneCalpha = groupOne.getAtom("CA"); - // An amino acid without a calpha - skip - if (groupOneCalpha==null) { - return; - } - // Now find the neighbouring Ns - if(k>0){ - Atom otherN = groupsOne.get(k-1).getAtom("N"); - if(otherN!=null){ - checkPeptideBond(groupOneCalpha, otherN); - } - - } - if(k MAX_PEPTIDE_BOND_LENGTH) { - // It shouldn't have a bond - checkBonded(groupOneCalpha, otherN, 1, false); - } - else { - // It should have a bond - checkBonded(groupOneCalpha, otherN, 1, true); - } - } - - /** - * Check that all atoms in a given group have bonds. - * @param groupOne - */ - private void checkAllAtomsHaveBonds(Group groupOne) { - List atomsOne = new ArrayList<>(groupOne.getAtoms()); - for(Group altLocOne: groupOne.getAltLocs()){ - for(Atom atomAltLocOne: altLocOne.getAtoms()){ - atomsOne.add(atomAltLocOne); - } - } - // If it's just a single atom - if (groupOne.getAtoms().size()==1){ - return; - } - // If it's a water - if (groupOne.isWater()) { - return; - } - // If the sum of the occupancy is less than or equal to 1.00 - float occ = (float) 0.0; - for (Atom a: groupOne.getAtoms()) { - occ += a.getOccupancy(); - } - if (occ <= 1.0) { - return; - } - - // Now let's check to see if all atoms have bonds... - for(int l=0;l atomNameList = new ArrayList<>(); - for(ChemCompAtom thisAtom: groupOne.getChemComp().getAtoms()) { - atomNameList.add(thisAtom.getAtom_id()); - } - if (atomsOne.get(l).getBonds()==null){ - // Check if the ATOM is not in the CCD - if(!atomNameList.contains(atomsOne.get(l).getName())){ - stringBuilder.append(structure.getPDBCode()+": ### ATOM NOT IN CCD AND HAS NO BONDS ->> "+atomsOne.get(l).toPDB()+"\n"); - continue; - } - } - // Check that in all cases the other groups are found in this list... - if(!atomNameList.contains(atomsOne.get(l).getName())){ - stringBuilder.append(structure.getPDBCode()+": ### ATOM NOT IN CCD BUT HAS "+atomsOne.get(l).getBonds().size()+" BONDS ->> "+atomsOne.get(l).toPDB()+"\n"); - } - // Bonds should not be null (but they currently are - have this here to remind us so... - if (atomsOne.get(l).getBonds()==null){ - stringBuilder.append(structure.getPDBCode()+": ATOM IN CCD BUT HAS NULL BONDS ->> "+atomsOne.get(l).toPDB()+"\n"); - } - else{ - // And should not be empty lists - assertNotEquals(atomsOne.get(l).getBonds().size(), 0); - } - } - } - - /** - * Function to ensure that all bonds that can be made are being made. - * @param structure - */ - public void checkIfIntraGroupBondsAreCreated(Group inputGroup) { - - List allGroups = new ArrayList<>(inputGroup.getAltLocs()); - allGroups.add(inputGroup); - - for (Group group : allGroups) { - // Get the CCD - ChemComp inputChemComp = group.getChemComp(); - // Get the bonds - for (ChemCompBond inputBond : inputChemComp.getBonds()) { - String atomNameOne = inputBond.getAtom_id_1(); - String atomNameTwo = inputBond.getAtom_id_2(); - Atom atomOne = group.getAtom(atomNameOne); - Atom atomTwo = group.getAtom(atomNameTwo); - if (atomOne != null && atomTwo != null) { - // Ensure that if both atoms exist - the bond exists. - checkBonded(atomOne, atomTwo, inputBond.getNumericalBondOrder(), true); - } - } - } - - } - -/** - * Check that two atoms are bonded - * @param atomOne - * @param atomTwo - */ -private void checkBonded(Atom atomOne, Atom atomTwo, Integer bondOrder, boolean isBonded) { - List otherAtoms = new ArrayList<>(); - List bondInds = new ArrayList<>(); - for (Bond groupBond : atomOne.getBonds()) { - otherAtoms.add(groupBond.getOther(atomOne)); - bondInds.add(groupBond.getBondOrder()); - } - // Assert that it's bonded to the other - assertTrue(otherAtoms.contains(atomTwo)==isBonded); - // And that's it not bonded to itself - assertTrue(!otherAtoms.contains(atomOne)); - if (isBonded) { - // Assert that it's only bonded once to the other - if(otherAtoms.indexOf(atomTwo)!=otherAtoms.lastIndexOf(atomTwo)){ - System.out.println("Bonded more than once: "+atomOne.getGroup().getChain().getStructure().getPDBCode()); - System.out.println(atomOne); - System.out.println(atomTwo); - } - assertEquals(otherAtoms.indexOf(atomTwo), otherAtoms.lastIndexOf(atomTwo)); - // Assert that the bond order is correct - if(bondOrder != bondInds.get(otherAtoms.indexOf(atomTwo))){ - System.out.println("Wrong bond order: "+atomOne.getGroup().getChain().getStructure().getPDBCode()); - System.out.println(atomOne); - System.out.println(atomTwo); - } - assertEquals(bondOrder, bondInds.get(otherAtoms.indexOf(atomTwo))); - } -} -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/testutils/CheckOnBiojava.java b/mmtf-update/src/main/java/org/rcsb/mmtf/testutils/CheckOnBiojava.java deleted file mode 100644 index eb79b34..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/testutils/CheckOnBiojava.java +++ /dev/null @@ -1,278 +0,0 @@ -package org.rcsb.mmtf.testutils; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertArrayEquals; - -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; -import java.util.Map.Entry; - -import org.biojava.nbio.structure.Atom; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.EntityInfo; -import org.biojava.nbio.structure.ExperimentalTechnique; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.PDBHeader; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; -import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; - -public class CheckOnBiojava { - - public CheckOnBiojava() { - } - - /** - * Broad test of atom similarity - * @param structOne - * @param structTwo - * @return - */ - private boolean checkIfAtomsSame(Structure structOne, Structure structTwo) { - // First check the bioassemblies - checkIfBioassemblySame(structOne, structTwo); - // Now check the pdb header - checkIfHederSame(structOne, structTwo); - // Now check the entity information - checkIfEntitiesSame(structOne, structTwo); - int numModels = structOne.nrModels(); - if(numModels!=structTwo.nrModels()){ - System.out.println("Error - diff number models: "+structOne.getPDBCode()); - return false; - } - for(int i=0;i chainsOne = structOne.getChains(i); - List chainsTwo = structTwo.getChains(i); - - if(chainsOne.size()!=chainsTwo.size()){ - System.out.println("Error - diff number chains: "+structOne.getPDBCode()); - return false; - } - // Now loop over - for(int j=0; j groupsOne = chainOne.getAtomGroups(); - List groupsTwo = chainTwo.getAtomGroups(); - if(groupsOne.size()!=groupsTwo.size()){ - System.out.println("Error - diff number groups: "+structOne.getPDBCode()); - return false; - } - for(int k=0; k atomsOne = new ArrayList<>(groupOne.getAtoms()); - List atomsTwo = new ArrayList<>(groupTwo.getAtoms()); - if(groupOne.getAltLocs().size()!=0){ - if(groupTwo.getAltLocs().size()!=groupOne.getAltLocs().size()){ - System.out.println("Error - diff number alt locs: "+structOne.getPDBCode()); - System.out.println(groupOne.getAltLocs().size()); - System.out.println(groupTwo.getAltLocs().size()); - - } - // Now go over the alt locs - for(Group altLocOne: groupOne.getAltLocs()){ - for(Atom atomAltLocOne: altLocOne.getAtoms()){ - atomsOne.add(atomAltLocOne); - } - } - for(Group altLocTwo: groupTwo.getAltLocs()){ - for(Atom atomAltLocTwo: altLocTwo.getAtoms()){ - atomsTwo.add(atomAltLocTwo); - } - } - } - if(atomsOne.size()!=atomsTwo.size()){ - System.out.println("Error - diff number atoms: "+structOne.getPDBCode()); - System.out.println(groupOne.getPDBName()+" vs "+groupTwo.getPDBName()); - System.out.println(atomsOne.size()+" vs "+atomsTwo.size()); - return false; - } - // Now sort the atoms - atomsOne.sort(new Comparator() { - - @Override - public int compare(Atom o1, Atom o2) { - // - if (o1.getPDBserial()() { - - @Override - public int compare(Atom o1, Atom o2) { - // - if (o1.getPDBserial() "+atomOne.toPDB()); - System.out.println("mmcif -> "+atomTwo.toPDB()); - return false; - } - - } - if(i==0){ - if(atomOne.getBonds()==null){ - if(atomTwo.getBonds()!=null){ - return false; - } - } - else if(atomTwo.getBonds()==null){ - return false; - } - else if(atomOne.getBonds().size()!=atomTwo.getBonds().size()){ - System.out.println("Error different number of bonds: "+structOne.getPDBCode()); - System.out.println(atomOne.getBonds().size()+" vs. "+atomTwo.getBonds().size()); - System.out.println(atomOne); - System.out.println(atomTwo); - - return false; - } - } - } - } - - } - } - return true; - - } - - /** - * Loop through entity info - and check that the fields set by MMTF are the same. - * @param structOne - * @param structTwo - */ - private void checkIfEntitiesSame(Structure structOne, Structure structTwo) { - List entityListOne = structOne.getEntityInfos(); - List entityListTwo = structTwo.getEntityInfos(); - assertEquals(entityListOne.size(), entityListTwo.size()); - for (int i=0; i bioassembliesOne = structOne.getPDBHeader().getBioAssemblies(); - Map bioassembliesTwo = structTwo.getPDBHeader().getBioAssemblies(); - assertEquals(bioassembliesOne.keySet(), bioassembliesTwo.keySet()); - for(Entry entry: bioassembliesOne.entrySet()){ - // Get the bioassembly info - BioAssemblyInfo valueOne = entry.getValue(); - BioAssemblyInfo valueTwo = bioassembliesTwo.get(entry.getKey()); - assertEquals(valueOne.getId(), valueTwo.getId()); - // Check there's the same number of transforms - assertEquals(valueOne.getTransforms().size(), valueTwo.getTransforms().size()); - // Build a map of chain id to matrix 4d - Set keySetOne = new TreeSet<>(); - Set keySetTwo = new TreeSet<>(); - Set valSetOne = new TreeSet<>(); - Set valSetTwo = new TreeSet<>(); - for(int i= 0; i< valueOne.getTransforms().size();i++){ - BiologicalAssemblyTransformation transformOne = valueOne.getTransforms().get(i); - BiologicalAssemblyTransformation transformTwo = valueTwo.getTransforms().get(i); - // Check these are the same - keySetOne.add(transformOne.getChainId()); - keySetTwo.add(transformTwo.getChainId()); - valSetOne.add(transformOne.getTransformationMatrix().toString()); - valSetTwo.add(transformTwo.getTransformationMatrix().toString()); - } - assertEquals(keySetOne, keySetTwo); - assertEquals(valSetOne, valSetTwo); - - } - } - - - - /** - * Check if all features between the two structures are the same - * @param biojavaStruct the input biojava structure parsed from the mmcif file - * @param structTwo the BioJava structure parsed from the MMTF file - */ - public void checkIfStructuresSame(Structure biojavaStruct, Structure structTwo){ - assertTrue(checkIfAtomsSame(biojavaStruct, structTwo)); - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/testutils/CheckOnRawApi.java b/mmtf-update/src/main/java/org/rcsb/mmtf/testutils/CheckOnRawApi.java deleted file mode 100644 index 1576c9a..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/testutils/CheckOnRawApi.java +++ /dev/null @@ -1,127 +0,0 @@ -package org.rcsb.mmtf.testutils; - -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; - -import java.util.ArrayList; -import java.util.List; - -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.EntityInfo; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.io.FileParsingParameters; -import org.rcsb.mmtf.api.DataApiInterface; -import org.rcsb.mmtf.dataholders.Entity; -import org.rcsb.mmtf.decoder.SimpleDataApi; - -/** - * Class to test the raw API - * @author Anthony Bradley - * - */ -public class CheckOnRawApi { - DataApiInterface dataApi; - public CheckOnRawApi(byte[] inputData) { - dataApi = new SimpleDataApi(inputData); - } - - /** - * Check that required data is available the way we would expect. - * @param biojavaStruct The input structure (parsed from MMCIF) that can be used to compare. - * @param params The input file parsing parameters. - */ - public void checkRawDataConsistency(Structure biojavaStruct, FileParsingParameters params) { - // Series of tests on expected values from the raw API - assertNotEquals(dataApi.getMmtfProducer(), null); - assertNotEquals(dataApi.getMmtfVersion(), null); - checkIfSeqResInfoSame(biojavaStruct, params); - checkIfEntityInfoSame(biojavaStruct); - // Check other features in the data - } - - - /** - * Test to see if the roundtripped entity data is the same as is found in the MMCIF - */ - public void checkIfEntityInfoSame(Structure biojavaStruct) { - - // Fist check it's not null - assertNotEquals(dataApi.getEntityList(), null); - // Second check it's the same length - assertEquals(dataApi.getEntityList().length, biojavaStruct.getEntityInfos().size()); - List totChains = new ArrayList<>(); - for (int i=0; i < biojavaStruct.nrModels(); i++) { - totChains.addAll(biojavaStruct.getChains(i)); - } - // Now check it has the same information as BioJava - for(int i=0; i bioJavaChains = biojavaEntity.getChains(); - int[] mmtfList = mmtfEntity.getChainIndexList(); - assertEquals(mmtfList.length, bioJavaChains.size()); - int[] testList = new int[bioJavaChains.size()]; - for(int j=0; j thisChainSeqResList = new ArrayList<>(); - for(Group seqResGroup : currentChain.getSeqResGroups()){ - thisChainSeqResList.add(seqResGroup); - } - // Now go through and check the indices line up - for(int i = 0; i < currentChain.getAtomGroups().size(); i++){ - // Get the group - Group testGroup = currentChain.getAtomGroup(i); - int testGroupInd = thisChainSeqResList.indexOf(testGroup); - assertEquals(testGroupInd, decodedSeqResGroupList[groupCounter]); - groupCounter++; - } - chainCounter++; - } - } - // Otherwise we need to parse in a different - else{ - System.out.println("Using public facing chain ids -> seq res not tested"); - } - - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/BuildFirstDataSet.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/BuildFirstDataSet.java deleted file mode 100644 index 036a329..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/BuildFirstDataSet.java +++ /dev/null @@ -1,76 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; -import java.util.SortedSet; - -import org.apache.hadoop.io.BytesWritable; - -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; - -import org.biojava.nbio.structure.StructureIO; -import org.biojava.nbio.structure.align.util.AtomCache; -import org.biojava.nbio.structure.io.FileParsingParameters; -import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; -import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; -import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; -import org.biojava.nbio.structure.rcsb.GetRepresentatives; -import org.rcsb.mmtf.mappers.DataStructToByteArrs; -import org.rcsb.mmtf.mappers.PdbIdToDataStruct; -import org.rcsb.mmtf.mappers.StringByteToTextByteWriter; - -public class BuildFirstDataSet { - - public static void main(String[] args ) - { - - // The arguments indicate where the PDB cache is AND what the version is. - if (args.length<2) { - System.err.println("NOT ENOUGH ARGUMENTS PROVIDED"); - throw new RuntimeException(); - } - - // This is the default 2 line structure for Spark applications - SparkConf conf = new SparkConf().setMaster("local[*]") - .setAppName(BuildFirstDataSet.class.getSimpleName()); - // Set the config - JavaSparkContext sc = new JavaSparkContext(conf); - - // A hack to make sure we're not downloading the whole pdb - Properties sysProps = System.getProperties(); - sysProps.setProperty("PDB_CACHE_DIR", args[0]); - sysProps.setProperty("PDB_DIR", args[0]); - AtomCache cache = new AtomCache(); - cache.setUseMmCif(true); - cache.setFetchBehavior(FetchBehavior.FETCH_FILES); - FileParsingParameters params = cache.getFileParsingParams(); - params.setCreateAtomBonds(true); - params.setAlignSeqRes(true); - params.setParseBioAssembly(true); - DownloadChemCompProvider dcc = new DownloadChemCompProvider(); - ChemCompGroupFactory.setChemCompProvider(dcc); - dcc.checkDoFirstInstall(); - dcc.setDownloadAll(true); - params.setUseInternalChainId(true); - cache.setFileParsingParams(params); - StructureIO.setAtomCache(cache); - // Get all the PDB IDs - SortedSet thisSet = GetRepresentatives.getAll(); - List pdbCodeList = new ArrayList(thisSet); - // Now read this list in - JavaPairRDD distData = - sc.parallelize(pdbCodeList) - .mapToPair(new PdbIdToDataStruct()) - .flatMapToPair(new DataStructToByteArrs()) - .mapToPair(new StringByteToTextByteWriter()); - // Now save this as a Hadoop sequence file - String uri = args[1]; - distData.saveAsHadoopFile(uri, Text.class, BytesWritable.class, SequenceFileOutputFormat.class, org.apache.hadoop.io.compress.BZip2Codec.class); - sc.close(); - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/BuildSDFFile.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/BuildSDFFile.java deleted file mode 100644 index 0279b74..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/BuildSDFFile.java +++ /dev/null @@ -1,62 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.Serializable; -import java.util.List; - -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.rcsb.mmtf.mappers.ByteArrayToBioJavaStructMapper; -import org.rcsb.mmtf.mappers.ByteWriteToByteArr; -import org.rcsb.mmtf.mappers.GroupToSDF; -import org.rcsb.mmtf.mappers.BiojavaStructureToBiojavaGroups; - -public class BuildSDFFile implements Serializable{ - - /** - * - */ - private static final long serialVersionUID = -3141464711627304724L; - - // private static final Logger logger = LoggerFactory.getLogger(SparkRead.class); - public static void main(String[] args ) throws IOException - { - String path = "/home/ubuntu/data/Total.hadoop.maindata.tested.bzip2"; - // This is the default 2 line structure for Spark applications - SparkConf conf = new SparkConf().setMaster("local[*]") - .setAppName(BuildSDFFile.class.getSimpleName()); - // Set the config - JavaSparkContext sc = new JavaSparkContext(conf); - // Time the proccess - long start = System.nanoTime(); - JavaPairRDD jprdd = sc - // Read the file - .sequenceFile(path, Text.class, BytesWritable.class, 12) - // Now get the structure - .mapToPair(new ByteWriteToByteArr()) - .mapToPair(new ByteArrayToBioJavaStructMapper()) - .flatMapToPair(new BiojavaStructureToBiojavaGroups()) -// .filter(new IsLigand()) - .mapToPair(new GroupToSDF()); - - File sdfFile = new File("/home/ubuntu/data/Total.sdf"); - BufferedWriter writer = new BufferedWriter(new FileWriter(sdfFile)); - - List output = jprdd.values().collect(); - for(String sdFile: output){ - writer.write(sdFile); - writer.write("$$$$\n"); - writer.flush(); - } - writer.close(); - sc.stop(); - sc.close(); - System.out.println("Time: " + (System.nanoTime() - start)/1E9 + " sec."); - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/FileWriters.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/FileWriters.java deleted file mode 100644 index 3b3bfe4..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/FileWriters.java +++ /dev/null @@ -1,63 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.Map; -import java.util.Map.Entry; - -public class FileWriters { - - - /** - * Function to write out flat files - * to the file system - * @param collectAsMap - * @throws IOException - */ - public void writeOutFlatFiles(Map dataMap, String basePath) throws IOException { - for(Entry entry:dataMap.entrySet()){ - // Get the key value pairs - String pdbCode = entry.getKey(); - byte[] byteArr = entry.getValue(); - // Make the new dir - File theDir = new File(basePath+"/"+pdbCode.substring(1, 3)); - if(theDir.exists() == false){ - theDir.mkdirs(); - } - // Write the file - FileOutputStream fos = null; - // Try and except - try{ - fos = new FileOutputStream(basePath+"/"+pdbCode.substring(1, 3)+"/"+pdbCode); - fos.write(byteArr); - } - finally{ - fos.close(); - } - } - } - - public void writeOutFlatFilesToPath(String basePath, Map dataMap) throws IOException { - for(Entry entry:dataMap.entrySet()){ - // Get the key value pairs - String pdbCode = entry.getKey(); - byte[] byteArr = entry.getValue(); - // Make the new dir - File theDir = new File(basePath+"/"+pdbCode.substring(1, 3)); - if(theDir.exists() == false){ - theDir.mkdirs(); - } - // Write the file - FileOutputStream fos = null; - // Try and except - try{ - fos = new FileOutputStream(basePath+"/"+pdbCode.substring(1, 3)+"/"+pdbCode); - fos.write(byteArr); - } - finally{ - fos.close(); - } - } - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/IntegrationTestUtils.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/IntegrationTestUtils.java deleted file mode 100644 index be274d9..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/IntegrationTestUtils.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.UUID; - -public class IntegrationTestUtils { - - public static final String[] TEST_CASES = new String[] { - //Standard structure - "4cup", - // Weird NMR structure - "1o2f", - // Another weird structure (jose's suggestion) - "3zyb", - // B-DNA structure - "1bna", - // DNA structure - "4y60", - // Sugar structure - "1skm", - // Ribosome - "4v5a", - // Biosynthetic protein - "5emg", - // Calpha atom is missing (not marked as calpha) - "1lpv", - // NMR structure with multiple models - one of which has chain missing - "1msh", - // No ATOM records just HETATM records (in PDB). Opposite true for MMCif. It's a D-Peptide. - "1r9v", - // Micro heterogenity - "4ck4", - // Negative residue numbers - "5esw", - // A weird case with incorrect residue numbers. - // "3th3" - }; - - public Path returnTempDir() { - Path tmpDir; - String uuid = UUID.randomUUID().toString(); - try { - tmpDir = Files.createTempDirectory(uuid); - } catch (IOException e) { - System.err.println("Error in making temp directory"); - e.printStackTrace(); - throw new RuntimeException(); - } - return tmpDir; - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/OldFilter.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/OldFilter.java deleted file mode 100644 index 8b13789..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/OldFilter.java +++ /dev/null @@ -1 +0,0 @@ - diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/ProcessHadoopFile.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/ProcessHadoopFile.java deleted file mode 100644 index c5c1c6c..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/ProcessHadoopFile.java +++ /dev/null @@ -1,128 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.io.File; -import java.io.IOException; -import java.io.Serializable; - -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.Function; -import org.rcsb.mmtf.mappers.ByteWriteToByteArr; -import org.rcsb.mmtf.mappers.StringByteToTextByteWriter; - -import scala.Tuple2; - -public class ProcessHadoopFile implements Serializable { - - - private static final long serialVersionUID = 1L; - - /** - * First argument is the path for the hadoop sequence file to read in. - * Second argument is the base path to write files out to. - * Third argument is the path of the file to write to demonstrate the work is done. - * @param args - * @throws IOException - */ - public static void main(String[] args) throws IOException{ - - String inputUri = args[0]; - String basePath = args[1]; - String outPutFile = args[2]; - - - // Helper classes for writing files - WriteHashMap sparkHadoopHashMapWriter = new WriteHashMap(); - // The path of the hadoop file - // This is the default 2 line structure for Spark applications - SparkConf conf = new SparkConf().setMaster("local[*]") - .setAppName(ProcessHadoopFile.class.getSimpleName()); - conf.set("spark.driver.maxResultSize", "14g"); - // Set the config - JavaSparkContext sc = new JavaSparkContext(conf); - // Read in with spark - JavaPairRDD totalDataset = sc - .sequenceFile(inputUri, Text.class, BytesWritable.class, 24 * 3) - .mapToPair(new ByteWriteToByteArr()); - // GET THE TOTAL MAP - JavaPairRDD mainMap = totalDataset.filter(t -> t._1.endsWith("_total")) - .mapToPair(new RemoveSuffixAndGzip()); - - // NOW GET THE HEADER MAP - JavaPairRDD headerMap = totalDataset.filter(new Function, Boolean>() { - - /** - * - */ - private static final long serialVersionUID = 7574230201179927345L; - - @Override - public Boolean call(Tuple2 v1) throws Exception { - if(v1._1.endsWith("_header")==true){ - return true; - } - return false; - } - }).mapToPair(new RemoveSuffixAndGzip()); - - /// NOW GET THE CALPHA MAP - JavaPairRDD calphaMap = totalDataset.filter(new Function, Boolean>() { - - /** - * - */ - private static final long serialVersionUID = -8312184119385524L; - - @Override - public Boolean call(Tuple2 v1) throws Exception { - if(v1._1.endsWith("_calpha")==true){ - return true; - } - return false; - } - }).mapToPair(new RemoveSuffixAndGzip()); - - // Now write the hadoop sequence file as the whole pdb - // Now collect these as maps - - sparkHadoopHashMapWriter.writeHashMapToFile(headerMap.collectAsMap(), basePath+"headerMap.map"); - sparkHadoopHashMapWriter.writeHashMapToFile(calphaMap.collectAsMap(), basePath+"calphaMap.map"); - // Now do the main map - // Now write this out as a hash map - sparkHadoopHashMapWriter.writeHashMapToFile(mainMap.collectAsMap(), basePath+"mainMap.map"); - - // Now get the total dataset - without gzip and write to a hadoop sequence file - JavaPairRDD mainMapNoGzip = totalDataset.filter(new Function, Boolean>() { - - /** - * - */ - private static final long serialVersionUID = -7172364344277495432L; - - @Override - public Boolean call(Tuple2 v1) throws Exception { - if(v1._1.endsWith("_total")==true){ - return true; - } - return false; - } - }).mapToPair(new RemoveSuffix()); - - JavaPairRDD mainDataset = mainMapNoGzip.mapToPair(new StringByteToTextByteWriter()); - String outURI = basePath+"hadoopFullData"; - mainDataset.saveAsHadoopFile(outURI, Text.class, BytesWritable.class, SequenceFileOutputFormat.class, org.apache.hadoop.io.compress.BZip2Codec.class); - sc.close(); - File f = new File(outPutFile); - f.getParentFile().mkdirs(); - f.createNewFile(); - - } - - -} - - diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/PullFtpData.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/PullFtpData.java deleted file mode 100644 index e0f422b..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/PullFtpData.java +++ /dev/null @@ -1,258 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -/** - * Current status - * all current entries and models - * all obsolete entries and models - - Weekly deltas - * added entries (and models = 0?) - * modified entries (and models=0?) - * reloaded entries (and models=0?) - * obsolete entries (and models=0?) - * - */ - -public class PullFtpData { - - - private String baseUrl; - - /** - * Constructor takes the url. - * @param inputUrl - */ - public PullFtpData(String inputUrl) { - this.setBaseUrl(inputUrl); - } - - /** - * - * @return A list of all the current PDB ids of PDB and NMR models (before the update) - */ - public String[] getAllCurrentEntries(){ - /** - all-pdb-list 2016-Jan-29 12:09:35 565.2K application/octet-stream - */ - List totList = new ArrayList(); - - // Get the NMR and the PDB - String[] pdbAll = readFile("all-pdb-list"); - totList.add(pdbAll); - - return joinLists(totList); - } - - /** - * - * @return A list of all the current PDB ids of computational models (before the update) - */ - public String[] getAllCurrentModels(){ - /** - all-model-list 2016-Jan-29 12:09:35 6.7K application/octet-stream - */ - List totList = new ArrayList(); - - // Get all the models - String[] allModels = readFile("all-model-list"); - totList.add(allModels); - - return joinLists(totList); - } - - /** - * - * @return A list of all obsolete computational models - */ - public String[] getAllObsoleteModels(){ - /** - obsolete-model-list 2016-Jan-29 12:09:35 0.1K application/octet-stream - */ - List totList = new ArrayList(); - - // Get the NMR and the PDB models that have ever been obsoleted - String[] modelObsolete = readFile("obsolete-model-list"); - totList.add(modelObsolete); - - return joinLists(totList); - } - - /** - * - * @return A list of all obsolete PDB and NMR models - */ - public String[] getAllObsoleteEntries(){ - /** - obsolete-pdb-list 2016-Jan-29 12:09:35 16.5K application/octet-stream - */ - List totList = new ArrayList(); - - // Get the NMR and the PDB models that have ever been obsoleted - String[] pdbObsolete = readFile("obsolete-pdb-list"); - totList.add(pdbObsolete); - - return joinLists(totList); - - - } - - - /** - * - * @return A list of all PDB and NMR models to be added this weekly update - */ - public String[] getAdded(){ - /** - added-entries 2016-Jan-29 12:09:35 1.0K application/octet-stream - */ - List totList = new ArrayList(); - // Get the NMR and the PDB - String[] pdbAdded = readFile("added-entries"); - totList.add(pdbAdded); - return joinLists(totList); - - } - - - /** - * - * @return A list of all PDB and NMR models to be modified this weekly update - */ - public String[] getModifiedUpdate(){ - /** - modified-entries 2016-Jan-29 12:09:35 0.9K application/octet-stream - */ - - List totList = new ArrayList(); - - // Get the NMR and the PDB - String[] pdbModified = readFile("modified-entries"); - totList.add(pdbModified); - - return joinLists(totList); - } - - /** - * - * @return A list of all PDB and NMR models to be reloaded this weekly update - */ - public String[] getReloadedUpdate(){ - /** - reload-entries 2016-Jan-29 12:09:35 0.0K application/octet-stream - */ - List totList = new ArrayList(); - - // Get the NMR and the PDB - String[] reloadEntries = readFile("reload-entries"); - totList.add(reloadEntries); - return joinLists(totList); - } - - - /** - * - * @return A list of all PDB and NMR models to be obsoloted this weekly update - */ - public String[] getObsoleteUpdate(){ - /** - obsolete-entries 2016-Jan-29 12:09:35 0.1K application/octet-stream - */ - - List totList = new ArrayList(); - - // Get the NMR and the PDB - String[] pdbObsolete = readFile("obsolete-entries"); - totList.add(pdbObsolete); - return joinLists(totList); - - } - - - /** - * Now a series of helper functions - */ - - - /** - * Takes a list of string arrays and joins them into one. - * @param totList - * @return - */ - private String[] joinLists(List totList) { - // Now join the lists together - // If it's empty then jusr return an empty list - if(totList.size()==0){ - return new String[0]; - } - // - List strings = new ArrayList(Arrays.asList(totList.get(0))); - if(totList.size()>1){ - for(int i=1;i outList = new ArrayList(); - // Check it's been set - String urlIn = baseUrl+fileIn; - try - { - // create a url object - URL url = new URL(urlIn); - - // create a urlconnection object - URLConnection urlConnection = url.openConnection(); - - // wrap the urlconnection in a bufferedreader - BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream())); - - String line; - - // read from the urlconnection via the bufferedreader - while ((line = bufferedReader.readLine()) != null) - { - outList.add(line); - } - bufferedReader.close(); - } - catch(Exception e) - { - e.printStackTrace(); - } - - return outList.toArray(new String[0]); - } - - /** - * @return the baseUrl - */ - public String getBaseUrl() { - return baseUrl; - } - - /** - * @param baseUrl the baseUrl to set - */ - public void setBaseUrl(String baseUrl) { - this.baseUrl = baseUrl; - } - -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/RemoveSuffix.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/RemoveSuffix.java deleted file mode 100644 index 7eac0f6..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/RemoveSuffix.java +++ /dev/null @@ -1,23 +0,0 @@ -package org.rcsb.mmtf.update; - - -import org.apache.spark.api.java.function.PairFunction; - -import scala.Tuple2; - -public class RemoveSuffix implements PairFunction, String,byte[]> { - - /** - * - */ - private static final long serialVersionUID = -8038296891190383974L; - - @Override - public Tuple2 call(Tuple2 t) throws Exception { - // Now return the array with just the PDB code - // And gzip compress the byte arr - return new Tuple2(t._1.substring(0,4),t._2); - } - - -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/RemoveSuffixAndGzip.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/RemoveSuffixAndGzip.java deleted file mode 100644 index 10a01ba..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/RemoveSuffixAndGzip.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.zip.GZIPOutputStream; - -import org.apache.spark.api.java.function.PairFunction; - -import scala.Tuple2; - -public class RemoveSuffixAndGzip implements PairFunction, String,byte[]> { - - /** - * - */ - private static final long serialVersionUID = -8038296891190383974L; - - @Override - public Tuple2 call(Tuple2 t) throws Exception { - // Now return the array with just the PDB code - // And gzip compress the byte arr - return new Tuple2(t._1.substring(0,4),gzipCompress(t._2)); - } - - private byte[] gzipCompress(byte[] dataToCompress) throws IOException { - // Function to gzip compress the data for the hashmaps - ByteArrayOutputStream byteStream = - new ByteArrayOutputStream(dataToCompress.length); - try - { - GZIPOutputStream zipStream = - new GZIPOutputStream(byteStream); - try - { - zipStream.write(dataToCompress); - } - finally - { - zipStream.close(); - } - } - finally - { - byteStream.close(); - } - - byte[] compressedData = byteStream.toByteArray(); - return compressedData; - - } - -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/ServerUtils.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/ServerUtils.java deleted file mode 100644 index cad3865..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/ServerUtils.java +++ /dev/null @@ -1,41 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.io.IOException; -import java.net.InetSocketAddress; -import java.net.MalformedURLException; -import java.net.Socket; -import java.net.URL; - -public class ServerUtils { - - - /** - * General function to a ping a generic server. - * @param host - * @param port - * @return - * @throws MalformedURLException - */ - public boolean pingServer(String urlInput) throws MalformedURLException { - URL thisUrl = new URL(urlInput); - String host = thisUrl.getHost(); - int port = thisUrl.getPort(); - System.out.println("Pinging: "+host+" on port: "+port); - try (Socket socket = new Socket()) { - socket.connect(new InetSocketAddress(host, port), 100); - return true; - } catch (IOException e) { - return false; // Either timeout or unreachable or failed DNS lookup. - } - - } - - /** - * Build the extension to the url for accessing data. - * @param inputCode - * @return - */ - public String generateDataExtension(String inputCode) { - return inputCode.substring(1,3)+"/"+inputCode+"/"+inputCode+".cif.gz"; - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/TestingUtils.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/TestingUtils.java deleted file mode 100644 index f0fb0f7..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/TestingUtils.java +++ /dev/null @@ -1,89 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; - -import org.apache.commons.io.FileUtils; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; -import org.biojava.nbio.structure.align.util.AtomCache; -import org.biojava.nbio.structure.io.FileParsingParameters; -import org.biojava.nbio.structure.io.mmtf.BioJavaStructureDecoder; -import org.rcsb.mmtf.biojavaencoder.EncodeStructure; -import org.rcsb.mmtf.decoder.DecodeStructure; -import org.rcsb.mmtf.decoder.ParsingParams; -import org.rcsb.mmtf.testutils.CheckOnBiojava; -import org.rcsb.mmtf.testutils.CheckOnRawApi; - -public class TestingUtils { - /** - * Function to round trip everything based on the current params - * @throws IllegalAccessException - * @throws InvocationTargetException - * @throws IOException - * @throws StructureException - */ - public void testAll(String[] inputList, FileParsingParameters params, AtomCache cache) throws IllegalAccessException, InvocationTargetException, IOException, StructureException{ - ParsingParams mmtfParams = new ParsingParams(); - mmtfParams.setParseInternal(params.isUseInternalChainId()); - StructureIO.setAtomCache(cache); - for (String pdbId : inputList){ - testOne(pdbId, params, cache, mmtfParams); - } - - } - - /** - * Function just to test one structure. - * @param pdbId - * @param params - * @param cache - * @param mmtfParams - * @throws IllegalAccessException - * @throws InvocationTargetException - * @throws IOException - * @throws StructureException - */ - public void testOne(String pdbId, FileParsingParameters params, AtomCache cache, ParsingParams mmtfParams) throws IllegalAccessException, InvocationTargetException, IOException, StructureException { - CheckOnBiojava checkEquiv = new CheckOnBiojava(); - Structure structure = StructureIO.getStructure(pdbId); - checkEquiv.checkIfStructuresSame(structure,roundTripStruct(pdbId, mmtfParams, params, cache)); - } - - /** - * - * @return - * @throws IOException - * @throws StructureException - * @throws InvocationTargetException - * @throws IllegalAccessException - */ - public Structure roundTripStruct(String pdbId, ParsingParams pp, FileParsingParameters params, AtomCache cache) throws IOException, IllegalAccessException, InvocationTargetException, StructureException{ - // We need to set the parsing params to this - boolean oldValue = params.isUseInternalChainId(); - params.setUseInternalChainId(true); - cache.setFileParsingParams(params); - StructureIO.setAtomCache(cache); - EncodeStructure es = new EncodeStructure(); - Structure mmcifStruct = StructureIO.getStructure(pdbId); - FileUtils.writeByteArrayToFile(new File("pathname"), es.encodeFromBiojava(mmcifStruct)); - byte[] inArr = FileUtils.readFileToByteArray(new File("pathname")); - // Now do the checks on the Raw data - CheckOnRawApi checkRaw = new CheckOnRawApi(inArr); - checkRaw.checkRawDataConsistency(mmcifStruct, params); - // Now decode the data and return this new structure - BioJavaStructureDecoder bjsi = new BioJavaStructureDecoder(); - DecodeStructure decodeStructure = new DecodeStructure(inArr); - decodeStructure.getStructFromByteArray(bjsi, pp); - Structure struct = bjsi.getStructure(); - // Revert back - params.setUseInternalChainId(oldValue); - cache.setFileParsingParams(params); - StructureIO.setAtomCache(cache); - return struct; - } - - -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/WeeklyUpdateRun.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/WeeklyUpdateRun.java deleted file mode 100644 index ddfe166..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/WeeklyUpdateRun.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.rcsb.mmtf.mappers.MapperUtils; - -public class WeeklyUpdateRun { - - /** - * First argument is the path in the file system - * Second argument is the URL to the FTP site - * Third argument is the URL for the MMCifdata - * Fourth argument the output file - * Fifth+ pdb ids to ignore - * @param args - */ - public static void main(String args[]) { - - WeeklyUpdateUtils weeklyUpdate = new WeeklyUpdateUtils(); - MapperUtils mapperUtils = new MapperUtils(); - - String inputUri = args[0]; - String ftpSiteUrl = args[1]; - String mmcifDataUrl = args[2]; - String outputUri = args[3]; - String inputCcdUrl = args[4]; - - // Now get the list of PDB ids to ignore - List ignoreList = new ArrayList<>(); - for (int i=5; i totalDataset = weeklyUpdate.filterElements(sparkContext, inputUri); - List urlPdbList = new ArrayList<>(); - for (String pdbId : weeklyUpdate.getAddedList()) { - urlPdbList.add(mmcifDataUrl+"/"+pdbId+".cif.gz"); - } - JavaPairRDD distData = mapperUtils.generateRDD(sparkContext, urlPdbList, inputCcdUrl); - // Now join them - weeklyUpdate.joinDataSet(outputUri, totalDataset, distData); - sparkContext.close(); - } - -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/WeeklyUpdateUtils.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/WeeklyUpdateUtils.java deleted file mode 100644 index d4fd1e6..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/WeeklyUpdateUtils.java +++ /dev/null @@ -1,116 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.rcsb.mmtf.mappers.ByteWriteToByteArr; -import org.rcsb.mmtf.mappers.StringByteToTextByteWriter; - -public class WeeklyUpdateUtils implements Serializable { - - - private static final long serialVersionUID = 4909752674491325131L; - private List removeList; - private List addedList; - - /** - * Function to filter out elements from a hadoop seqeunce file - * @param sparkContext The spark context for reading the file - * @param fileUri The full path for the hadoop seqeunce file - * @return The RDD of the data after filtering - */ - public JavaPairRDD filterElements(JavaSparkContext sparkContext, String fileUri) { - return sparkContext.sequenceFile(fileUri, Text.class, BytesWritable.class, 12) - .mapToPair(new ByteWriteToByteArr()) - .filter(t -> removeList.contains(t._1)==false) - .mapToPair(new StringByteToTextByteWriter()); - } - - /** - * Join to RDDs together into one larger one - * @param outUri - * @param datasetOne - * @param datasetTwo - */ - public void joinDataSet(String outUri, JavaPairRDD datasetOne, JavaPairRDD datasetTwo) { - // Now join them together - datasetOne.join(datasetTwo); - // Now save this as a Hadoop sequence file - datasetOne.saveAsHadoopFile(outUri, Text.class, BytesWritable.class, SequenceFileOutputFormat.class, org.apache.hadoop.io.compress.BZip2Codec.class); - } - - /** - * Get the update lists from the FTP server. Join them into remove lists and added lists. - * Updated items are in both lists. - * @param ignoreList - */ - public void getUpdateLists(String[] obsolete, String[] modified, String[] reloaded, String[] added, List ignoreList) { - // Now make the added and removed list - removeList = joinLists(reloaded,modified,obsolete); - List inputAddedList = joinLists(added,reloaded,modified); - for (String ignoreId : ignoreList) { - inputAddedList.remove(ignoreId.toLowerCase()); - inputAddedList.remove(ignoreId.toUpperCase()); - } - setAddedList(inputAddedList); - } - - - /** - * Retrieve the data from the FTP site and populate the added and remove lists. - */ - public void getDataFromFtpSite(String inputUrl, List ignoreList){ - // Get the class of functions here - PullFtpData pullFtpData = new PullFtpData(inputUrl); - // **** DO THIS - String[] obsolete = pullFtpData.getObsoleteUpdate(); - String[] modified = pullFtpData.getModifiedUpdate(); - String[] reloaded = pullFtpData.getReloadedUpdate(); - String[] added = pullFtpData.getAdded(); - // Now get these lists from the other lists - getUpdateLists(obsolete, modified, reloaded, added, ignoreList); - } - - /** - * Joins a series of three lists into a single lists. - * @param listOne - * @param listTwo - * @param listThree - * @return - */ - public List joinLists(String[] listOne, String[] listTwo, String[] listThree) { - List strings = new ArrayList(Arrays.asList(listOne)); - for(String item: listTwo){ - strings.add(item); - } - for(String item: listThree){ - strings.add(item); - } - return strings; - } - - /** - * The getters and setters - */ - - - public List getAddedList() { - return addedList; - } - - public void setAddedList(List addedList) { - this.addedList = addedList; - } - - - public List getRemoveList() { - return removeList; - } -} diff --git a/mmtf-update/src/main/java/org/rcsb/mmtf/update/WriteHashMap.java b/mmtf-update/src/main/java/org/rcsb/mmtf/update/WriteHashMap.java deleted file mode 100644 index 3bb2274..0000000 --- a/mmtf-update/src/main/java/org/rcsb/mmtf/update/WriteHashMap.java +++ /dev/null @@ -1,112 +0,0 @@ -package org.rcsb.mmtf.update; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.ObjectOutputStream; -import java.util.Map; - -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.Function; -import org.rcsb.mmtf.mappers.ByteWriteToByteArr; - -import scala.Tuple2; - - - -/* - * Class to write files that can be read in by the Java server - */ -public class WriteHashMap { - - public static void main(String[] args) throws Exception { - WriteHashMap sdhw = new WriteHashMap(); - // The path of the hadoop file - String uri = "/home/anthony/src/codec-devel/Total.hadoop.TRIAL.bzip2"; - // This is the default 2 line structure for Spark applications - SparkConf conf = new SparkConf().setMaster("local[*]") - .setAppName(WriteHashMap.class.getSimpleName()); - conf.set("spark.driver.maxResultSize", "14g"); - // Set the config - JavaSparkContext sc = new JavaSparkContext(conf); - // Read in with spark - JavaPairRDD totalDataset = sc - .sequenceFile(uri, Text.class, BytesWritable.class, 4 * 3) - .mapToPair(new ByteWriteToByteArr()); - - // - JavaPairRDD mainMap = totalDataset.filter(new Function, Boolean>() { - - /** - * - */ - private static final long serialVersionUID = -7172364344277495432L; - - @Override - public Boolean call(Tuple2 v1) throws Exception { - if(v1._1.endsWith("_total")==true){ - return true; - } - return false; - } - }); - JavaPairRDD headerMap = totalDataset.filter(new Function, Boolean>() { - - /** - * - */ - private static final long serialVersionUID = 7574230201179927345L; - - @Override - public Boolean call(Tuple2 v1) throws Exception { - if(v1._1.endsWith("_header")==true){ - return true; - } - return false; - } - }); - JavaPairRDD calphaMap = totalDataset.filter(new Function, Boolean>() { - - /** - * - */ - private static final long serialVersionUID = -8312184119385524L; - - @Override - public Boolean call(Tuple2 v1) throws Exception { - if(v1._1.endsWith("_calpha")==true){ - return true; - } - return false; - } - }); - // Now collect these as maps - sdhw.writeHashMapToFile(headerMap.collectAsMap(), "headerMap.map"); - sdhw.writeHashMapToFile(calphaMap.collectAsMap(), "calphaMap.map"); - sdhw.writeHashMapToFile(mainMap.collectAsMap(), "mainMap.map"); - // Close the spark context - sc.close(); - } - - /** - * - * @param mapToWrite - * @param fileName - * @throws IOException - */ - public void writeHashMapToFile(Map mapToWrite, String fileName) throws IOException{ - - File file = new File(fileName); - FileOutputStream f = new FileOutputStream(file); - ObjectOutputStream s = new ObjectOutputStream(f); - s.writeObject(mapToWrite); - s.close(); - - } - - -} diff --git a/mmtf-update/src/test/java/org/rcsb/mmtf/examples/TestHandleIO.java b/mmtf-update/src/test/java/org/rcsb/mmtf/examples/TestHandleIO.java deleted file mode 100644 index fe59508..0000000 --- a/mmtf-update/src/test/java/org/rcsb/mmtf/examples/TestHandleIO.java +++ /dev/null @@ -1,76 +0,0 @@ -package org.rcsb.mmtf.examples; - -import org.junit.Test; -import org.rcsb.mmtf.update.IntegrationTestUtils; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; - -import java.io.IOException; -import java.nio.file.Path; - -import static org.junit.Assert.assertArrayEquals; - -public class TestHandleIO { - - private HandleIO handleIo; - - public TestHandleIO() { - // Generate this utility class - handleIo = new HandleIO(); - } - - - /** - * Basic test of being able to reach the server and get a byte array back - * @throws IOException - */ - @Test - public void testGetFileFromServer() throws IOException { - - // Get the base url - byte[] inputArr = handleIo.getFromUrl("4cup"); - assertNotEquals(inputArr, null); - assertNotEquals(inputArr.length, 1); - assertNotEquals(inputArr.length, 0); - // Get the base url - byte[] inputArrTwo = handleIo.getFromUrl("4cup"); - assertArrayEquals(inputArr, inputArrTwo); - // What if we give an nonsense PDB code - byte[] inputArrThree = handleIo.getFromUrl("12345"); - assertEquals(inputArrThree, null); - } - - /** - * Test whether we can read write the files where we want - */ - @Test - public void testReadWriteFiles() { - - IntegrationTestUtils integrationTestUtils = new IntegrationTestUtils(); - Path tmpDir = integrationTestUtils.returnTempDir(); - // If we set it as the tmp directory - System.setProperty("PDB_CACHE_DIR", tmpDir.toAbsolutePath().toString()); - // The input code - String inputCode = "4cup"; - // If we don't set where the cache is this should be empty - assertEquals(handleIo.getFromFile(inputCode), null); - byte[] urlData = handleIo.getByteArrFromUrlOrFile(inputCode); - byte[] fileData = handleIo.getFromFile(inputCode); - // First check neither are null - assertNotEquals(urlData, null); - assertNotEquals(fileData, null); - // Now check they are the same - assertArrayEquals(urlData, fileData); - } - - /** - * Test whether we can update files from the server and the updated file is created. - * Not implemented yet. As functionality isn't there. - */ - @Test - public void testUpdateFiles() { - - } - -} diff --git a/mmtf-update/src/test/java/org/rcsb/mmtf/integrationtest/TestBonds.java b/mmtf-update/src/test/java/org/rcsb/mmtf/integrationtest/TestBonds.java deleted file mode 100644 index fbd75d7..0000000 --- a/mmtf-update/src/test/java/org/rcsb/mmtf/integrationtest/TestBonds.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.rcsb.mmtf.integrationtest; - -import java.io.IOException; - -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; -import org.junit.Test; -import org.rcsb.mmtf.biojavaencoder.EncoderUtils; -import org.rcsb.mmtf.testutils.CheckBonds; -import org.rcsb.mmtf.update.IntegrationTestUtils; - -public class TestBonds { - - - /** - * Test whether all atoms (other than waters) have at least one bond. - * They should. - * @throws StructureException - * @throws IOException - */ - @Test - public void testBondConsistency() throws IOException, StructureException { - CheckBonds testUtils = new CheckBonds(); - // Set up biojava - EncoderUtils encoderUtils = new EncoderUtils(); - encoderUtils.setUpBioJava(); - for (String testCase : IntegrationTestUtils.TEST_CASES) { - Structure structure = StructureIO.getStructure(testCase); - testUtils.checkIfBondsExist(structure); - } - } - -} diff --git a/mmtf-update/src/test/java/org/rcsb/mmtf/integrationtest/TestParseMMCif.java b/mmtf-update/src/test/java/org/rcsb/mmtf/integrationtest/TestParseMMCif.java deleted file mode 100644 index 87af822..0000000 --- a/mmtf-update/src/test/java/org/rcsb/mmtf/integrationtest/TestParseMMCif.java +++ /dev/null @@ -1,47 +0,0 @@ -package org.rcsb.mmtf.integrationtest; - -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; - -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; -import org.biojava.nbio.structure.align.util.AtomCache; -import org.biojava.nbio.structure.io.FileParsingParameters; -import org.junit.Test; -import org.rcsb.mmtf.biojavaencoder.EncoderUtils; -import org.rcsb.mmtf.update.IntegrationTestUtils; -import org.rcsb.mmtf.update.TestingUtils; - -public class TestParseMMCif { - - private AtomCache cache; - private FileParsingParameters params; - private TestingUtils testingUtils = new TestingUtils(); - - public TestParseMMCif(){ - // Set up the atom cache etc - EncoderUtils encoderUtils = new EncoderUtils(); - cache = encoderUtils.setUpBioJava(); - params = cache.getFileParsingParams(); - } - - - @Test - public void testAsymChainIds() throws IOException, StructureException, IllegalAccessException, InvocationTargetException { - // Set the params - params.setUseInternalChainId(true); - cache.setFileParsingParams(params); - StructureIO.setAtomCache(cache); - testingUtils.testAll(IntegrationTestUtils.TEST_CASES, params, cache); - } - - @Test - public void testAuthChainIds() throws IOException, StructureException, IllegalAccessException, InvocationTargetException { - // Set the param - params.setUseInternalChainId(false); - cache.setFileParsingParams(params); - StructureIO.setAtomCache(cache); - testingUtils.testAll(IntegrationTestUtils.TEST_CASES, params, cache); - } - -} diff --git a/mmtf-update/src/test/java/org/rcsb/mmtf/update/TestReadWrite.java b/mmtf-update/src/test/java/org/rcsb/mmtf/update/TestReadWrite.java deleted file mode 100644 index 5ec4671..0000000 --- a/mmtf-update/src/test/java/org/rcsb/mmtf/update/TestReadWrite.java +++ /dev/null @@ -1,278 +0,0 @@ -package org.rcsb.mmtf.update; - -import static org.junit.Assert.*; - -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Random; -import java.util.concurrent.ThreadLocalRandom; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; - -import org.junit.Test; -import org.rcsb.mmtf.mappers.ByteWriteToByteArr; -import org.rcsb.mmtf.mappers.StringByteToTextByteWriter; - -import scala.Tuple2; -/** - * Functions to test the weekly update read in and read out. - * From very basic can we update the data. - * Some integration testing on reading in and out structures. - * Some consistency testing between updates. - * To be run before a run to check we're not going to mess stuff up. - * @author Anthony Bradley - * - */ -public class TestReadWrite { - - // Set some data to remove - private static final String[] OBSOLETE = new String[] {"1abc"}; - // Set some data to update - private static final String[] MODIFIED = new String[] {"2abc"}; - private static final String[] RELOADED = new String[] {"3abc"}; - // Set some data to add - private static final String[] ADDED = new String[] {"4abc"}; - - - IntegrationTestUtils integrationTestUtils; - // App name - private static final String APP_NAME = "thisAppName"; - - public TestReadWrite() { - integrationTestUtils = new IntegrationTestUtils(); - } - - @Test - /** - * Can we write out then read back in a Hadoop sequence file of data. - * @throws IOException - */ - public void writeReadFlatFile() throws IOException { - FileWriters fileWriters = new FileWriters(); - Map dataMap = getDummyData("abc"); - // First write this out - Path tmpDir = integrationTestUtils.returnTempDir(); - String tmpPathString = tmpDir.toAbsolutePath().toString(); - // Write out these flat files - fileWriters.writeOutFlatFiles(dataMap, tmpPathString); - // Check that they all exist - for ( Entry currentSet : dataMap.entrySet()) { - // Now read in the byte array - FileInputStream fileInput = new FileInputStream(tmpPathString+"/ab/"+currentSet.getKey()); - byte[] inputArr = IOUtils.toByteArray(fileInput); - assertNotNull(inputArr); - assertArrayEquals(currentSet.getValue(), inputArr); - } - } - - @Test - /** - * Can we write out then read back in a Hadoop sequence file of data. - */ - public void writeReadHadoopFile() { - - Map inputData = getDummyData("abc"); - // Get the spark context - JavaSparkContext sparkContext = getSparkContext(); - JavaPairRDD mainDataset = JavaPairRDD.fromJavaRDD(sparkContext.parallelize(convertToTuple(inputData))). - mapToPair(new StringByteToTextByteWriter()); - Path tmpDir = integrationTestUtils.returnTempDir(); - String tmpPathString = tmpDir.toAbsolutePath().toString(); - String outURI = tmpPathString+"/outData.bzip2"; - mainDataset.saveAsHadoopFile(outURI, Text.class, BytesWritable.class, SequenceFileOutputFormat.class, org.apache.hadoop.io.compress.BZip2Codec.class); - // Now read it in - List> outPutData = sparkContext.sequenceFile(outURI, Text.class, BytesWritable.class, 24 * 3) - .mapToPair(new ByteWriteToByteArr()).collect(); - // Now check we're the same - for (Tuple2 thisEntry : outPutData) { - // Check the byte arrs are the same - assertArrayEquals(inputData.get(thisEntry._1), thisEntry._2); - } - sparkContext.close(); - } - - - - @Test - /** - * Can we update the Hadoop file reasonably - */ - public void updateHadoopFile() { - // Should add three files to the output, read it back in and have the set number - JavaSparkContext sparkContext = getSparkContext(); - // - Map totalDataMap = new HashMap<>(); - - Map inputData = getDummyData("abc"); - Path tmpDir = integrationTestUtils.returnTempDir(); - String tmpPathString = tmpDir.toAbsolutePath().toString(); - String outURI = tmpPathString+"/outData.bzip2"; - JavaPairRDD mainDataset = JavaPairRDD.fromJavaRDD(sparkContext.parallelize(convertToTuple(inputData))). - mapToPair(new StringByteToTextByteWriter()); - mainDataset.saveAsHadoopFile(outURI, Text.class, BytesWritable.class, SequenceFileOutputFormat.class, org.apache.hadoop.io.compress.BZip2Codec.class); - // Now read this hadoop file and filter an element out - List dataToRemove = new ArrayList<>(); - dataToRemove.addAll(inputData.keySet()); - Collections.shuffle(dataToRemove); - dataToRemove = dataToRemove.subList(0, 4); - final List thisDataToRemove = new ArrayList<>(dataToRemove); - JavaPairRDD filteredDataset = sparkContext - .sequenceFile(outURI, Text.class, BytesWritable.class, 4 * 3) - .mapToPair(new ByteWriteToByteArr()) - .filter(t -> thisDataToRemove.contains(t._1)==false) - .mapToPair(new StringByteToTextByteWriter()); - - Map updateData = getDummyData("def"); - // Now generate the update data and convert - JavaPairRDD updateDataset = JavaPairRDD.fromJavaRDD(sparkContext.parallelize(convertToTuple(updateData))). - mapToPair(new StringByteToTextByteWriter()); - // Now join them together - filteredDataset.join(updateDataset); - String updateURI = tmpPathString+"/updateData.bzip2"; - filteredDataset.saveAsHadoopFile(updateURI, Text.class, BytesWritable.class, SequenceFileOutputFormat.class, org.apache.hadoop.io.compress.BZip2Codec.class); - - // Read it back in again - List> totalData = sparkContext - .sequenceFile(updateURI, Text.class, BytesWritable.class, 4 * 3) - .mapToPair(new ByteWriteToByteArr()).collect(); - // GENEREATE THE TEST DATA - for (Entry thisEntry : inputData.entrySet()) { - if (thisDataToRemove.contains(thisEntry.getKey()) ){ - System.out.println(thisEntry.getKey()); - continue; - } - totalDataMap.put(thisEntry.getKey(),thisEntry.getValue()); - } - for (Entry thisEntry : updateData.entrySet()) { - if (thisDataToRemove.contains(thisEntry.getKey()) ){ - continue; - } - totalDataMap.put(thisEntry.getKey(),thisEntry.getValue()); - } - // - for ( Tuple2 dataEntry : totalData) { - System.out.println(dataEntry._1); - assertArrayEquals(totalDataMap.get(dataEntry._1), dataEntry._2); - - } - - - - // Close spark down - sparkContext.close(); - - } - - - @Test - /** - * Test that the function to join lists actually works - */ - public void testJoinLists() { - WeeklyUpdateUtils weeklyUpdate = new WeeklyUpdateUtils(); - List testedList = weeklyUpdate.joinLists(new String[] {"A"}, new String[] {"B"}, new String[] {"C"}); - List testList = new ArrayList(); - testList.add("A"); - testList.add("B"); - testList.add("C"); - assertEquals(testList.size(), testedList.size()); - assertArrayEquals(testedList.toArray(new String[3]), testList.toArray(new String[3])); - } - - @Test - /** - * Test the generation of added and removed lists in the weekly update. - * Possibly add some integration tests to check - */ - public void testGenLists() { - WeeklyUpdateUtils weeklyUpdate = getLists(); - // Now get these and check they're the same as the should be - List addedList = weeklyUpdate.getAddedList(); - Collections.sort(addedList); - assertArrayEquals(addedList.toArray( new String[addedList.size()]), new String[] {"2abc", "3abc", "4abc"}); - List removedList = weeklyUpdate.getRemoveList(); - Collections.sort(removedList); - assertArrayEquals(removedList.toArray( new String[removedList.size()]), new String[] {"1abc","2abc", "3abc"}); - } - - @Test - /** - * Can we write and read the hashmaps - * @throws IOException - */ - public void writeReadHashMaps() throws IOException { - Map dataMap = getDummyData("abc"); - Path tmpDir = integrationTestUtils.returnTempDir(); - String tmpPathString = tmpDir.toAbsolutePath().toString(); - WriteHashMap sparkSDSCHashMapWriter = new WriteHashMap(); - // Write it out - sparkSDSCHashMapWriter.writeHashMapToFile(dataMap, tmpPathString+"mainMap.map"); - } - - /** - * Find the dummy data of the PDB - as would be produced from Encoding - * @return - */ - private Map getDummyData(String suffix) { - - Map dataMap = new HashMap<>(); - // First build the datamap -> random strings and random bytes - for (int i=0; i < 10; i++) { - byte[] b = new byte[ThreadLocalRandom.current().nextInt(1024, 4048)]; - new Random().nextBytes(b); - String dummyPdbCode = i+suffix; - dataMap.put(dummyPdbCode, b); - } - return dataMap; - - } - - /** - * Convert a dictionary to a tuple for spark. - * Input is a string/bytearr and output is text byteswriteable - * @param dummyData The input map of dummy string value pairs - * @return The output list of Tuples - */ - private List> convertToTuple(Map dummyData) { - // Set the out list - List> outList = new ArrayList<>(); - for ( Entry thisEntry : dummyData.entrySet() ) { - outList.add(new Tuple2(thisEntry.getKey(), thisEntry.getValue())); - } - return outList; - } - - private JavaSparkContext getSparkContext() { - SparkConf conf = new SparkConf().setMaster("local[*]") - .setAppName(APP_NAME); - // Set the config for the spark context - JavaSparkContext sparkContext = new JavaSparkContext(conf); - return sparkContext; - } - - /** - * Get the lists indicate which items should be removed or added - * for the weekly update. - * @return - */ - private WeeklyUpdateUtils getLists() { - WeeklyUpdateUtils weeklyUpdate = new WeeklyUpdateUtils(); - // Now get these lists - weeklyUpdate.getUpdateLists(OBSOLETE, MODIFIED, RELOADED, ADDED, new ArrayList()); - return weeklyUpdate; - } - -} diff --git a/mmtf-update/src/test/java/org/rcsb/mmtf/update/TestSpark.java b/mmtf-update/src/test/java/org/rcsb/mmtf/update/TestSpark.java deleted file mode 100644 index 6aa010f..0000000 --- a/mmtf-update/src/test/java/org/rcsb/mmtf/update/TestSpark.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.rcsb.mmtf.update; - -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import static org.junit.Assert.*; -import org.junit.Test; - -public class TestSpark { - - JavaSparkContext sc; - // App name - private static final String APP_NAME = "thisAppName"; - - public TestSpark() { - SparkConf conf = new SparkConf().setMaster("local[*]") - .setAppName(APP_NAME); - // Set the config for the spark context - sc = new JavaSparkContext(conf); - } - - @Test - /** - * Basic test that we can even get spark running - */ - public void testSparkContextSetup() { - assertEquals(sc.appName(), APP_NAME); - sc.close(); - sc.stop(); - } -} diff --git a/pom.xml b/pom.xml index ad7dd2f..ea6a12e 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,5 @@ - + @@ -35,7 +36,7 @@ UTF-8 UTF-8 512M - 1.7.14 + 1.7.20 2.5 @@ -135,10 +136,6 @@ -Xdoclint:none 256m -
<script src="http://www.google-analytics.com/urchin.js" - type="text/javascript"></script><script - type="text/javascript">_uacct = - "UA-1326640-1";urchinTracker();</script>
@@ -218,21 +215,21 @@ - org.codehaus.mojo - cobertura-maven-plugin - 2.7 - - xml - 256m - - true - + org.codehaus.mojo + cobertura-maven-plugin + 2.7 + + xml + 256m + + true + - org.eluder.coveralls - coveralls-maven-plugin - 4.1.0 - + org.eluder.coveralls + coveralls-maven-plugin + 4.1.0 + org.sonatype.plugins @@ -327,9 +324,63 @@ 2.6 -
- + + + + uk.co.jemos.podam + podam + 2.3.5.RELEASE + test + + + org.unitils + unitils-core + 3.4.2 + test + + + junit + junit + 4.12 + test + + + commons-beanutils + commons-beanutils + 1.9.2 + test + + + org.slf4j + slf4j-api + ${slf4j.version} + + + + org.apache.logging.log4j + log4j-slf4j-impl + ${log4j.version} + + runtime + + + + org.apache.logging.log4j + log4j-api + ${log4j.version} + + runtime + + + org.apache.logging.log4j + log4j-core + ${log4j.version} + + runtime + + + @@ -434,7 +485,6 @@ mmtf-api mmtf-decoder mmtf-encoder - mmtf-update 2015 @@ -442,8 +492,7 @@ https://github.com/rcsb/mmtf-java/issues - CruiseControl - http://ccpublic.rcsb.org/ + Travis + https://travis-ci.org/ -