diff --git a/mmtf-codec/src/main/java/org/rcsb/mmtf/encoder/ReducedEncoder.java b/mmtf-codec/src/main/java/org/rcsb/mmtf/encoder/ReducedEncoder.java index 264c099..d748d93 100644 --- a/mmtf-codec/src/main/java/org/rcsb/mmtf/encoder/ReducedEncoder.java +++ b/mmtf-codec/src/main/java/org/rcsb/mmtf/encoder/ReducedEncoder.java @@ -1,173 +1,208 @@ package org.rcsb.mmtf.encoder; -import java.util.ArrayList; -import java.util.List; +import java.io.Serializable; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; -import org.apache.commons.lang.ArrayUtils; import org.rcsb.mmtf.api.StructureDataInterface; import org.rcsb.mmtf.decoder.DecoderUtils; +import org.rcsb.mmtf.encoder.AdapterToStructureData; +import org.rcsb.mmtf.encoder.EncoderUtils; /** - * Convert a full format of the file to a reduced format. + * Converts a full (all-atom) MMTF structure data representation to a reduced version. + * The reduced version contains only the C-alpha atoms of polypeptide and the + * P atom of polynucleotide chains. Alternative locations of the C-alpha and P atoms + * are excluded, as well as any water molecules. + * * @author Anthony Bradley + * @author Peter Rose * */ -public class ReducedEncoder { - +public class ReducedEncoder implements Serializable { + private static final long serialVersionUID = -528732924956087110L; + private static final String CALPHA_NAME = "CA"; private static final String CARBON_ELEMENT = "C"; private static final String PHOSPHATE_NAME = "P"; private static final String PHOSPHATE_ELEMENT = "P"; /** - * Get the reduced form of the input {@link StructureDataInterface}. - * @param structureDataInterface the input {@link StructureDataInterface} + * Gets the reduced form of the input {@link StructureDataInterface}. + * @param full the input {@link StructureDataInterface} * @return the reduced form of the {@link StructureDataInterface} as another {@link StructureDataInterface} */ - public static StructureDataInterface getReduced(StructureDataInterface structureDataInterface) { - // The transmission of the data goes through this - AdapterToStructureData adapterToStructureData = new AdapterToStructureData(); - SummaryData dataSummary = getDataSummaryData(structureDataInterface); - adapterToStructureData.initStructure(dataSummary.numBonds, dataSummary.numAtoms, dataSummary.numGroups, - dataSummary.numChains, structureDataInterface.getNumModels(), structureDataInterface.getStructureId()); - DecoderUtils.addXtalographicInfo(structureDataInterface, adapterToStructureData); - DecoderUtils.addHeaderInfo(structureDataInterface, adapterToStructureData); - DecoderUtils.generateBioAssembly(structureDataInterface, adapterToStructureData); - DecoderUtils.addEntityInfo(structureDataInterface, adapterToStructureData); - // Loop through the Structure data interface this with the appropriate data - int atomCounter= - 1; - int redAtomCounter = -1; - int groupCounter= - 1; - int chainCounter= - 1; - List interGroupBondsToAdd = new ArrayList<>(); - List interGroupRedIndsToAdd = new ArrayList<>(); - for (int i=0; i atomIndicesToAdd = getIndicesToAdd(structureDataInterface, groupType, chainType); - int bondsToAdd = findBondsToAdd(atomIndicesToAdd, structureDataInterface, groupType,atomCounter+1); - // If there's an atom to add in this group - add it - if(atomIndicesToAdd.size()>0){ - adapterToStructureData.setGroupInfo(structureDataInterface.getGroupName(groupType), structureDataInterface.getGroupIds()[groupCounter], - structureDataInterface.getInsCodes()[groupCounter], structureDataInterface.getGroupChemCompType(groupType), atomIndicesToAdd.size(), - bondsToAdd, structureDataInterface.getGroupSingleLetterCode(groupType), structureDataInterface.getGroupSequenceIndices()[groupCounter], - structureDataInterface.getSecStructList()[groupCounter]); - numGroups++; + public static StructureDataInterface getReduced(StructureDataInterface full) { + // maps atom indices in full structure to reduced structure + Map atomMap = new HashMap<>(); + + // get indices to C-alpha and P atoms for polypeptides and polynucleotides, respectively + Integer[] centerAtomIndices = getCenterAtomGroupIndices(full); + + // Set header and metadata + AdapterToStructureData reduced = new AdapterToStructureData(); + + reduced.setMmtfProducer(full.getMmtfProducer()); + SummaryData dataSummary = getDataSummaryData(full, centerAtomIndices); + reduced.initStructure(dataSummary.numBonds, dataSummary.numAtoms, dataSummary.numGroups, + dataSummary.numChains, full.getNumModels(), full.getStructureId()); + + DecoderUtils.addXtalographicInfo(full, reduced); + DecoderUtils.addHeaderInfo(full, reduced); + DecoderUtils.generateBioAssembly(full, reduced); + DecoderUtils.addEntityInfo(full, reduced); + + // traverse data structure and copy data to reduced representation. + // Note, atomCount, groupCount, and chainCount keep track of the total number of atoms, groups, and chains. + // They are required to index the data structure. + + for (int i = 0, atomCount = 0, groupCount = 0, chainCount = 0, reducedAtomCount = 0; i atomIndicesToAdd = getIndicesToAdd(full, groupType, chainType, centerAtomIndices); + int bondsToAdd = getNumIntraGroupBonds(atomIndicesToAdd, full, groupType, centerAtomIndices); + + if (atomIndicesToAdd.size() > 0) { + + // Set Group information + reduced.setGroupInfo(full.getGroupName(groupType), full.getGroupIds()[groupCount], + full.getInsCodes()[groupCount], full.getGroupChemCompType(groupType), atomIndicesToAdd.size(), + bondsToAdd, full.getGroupSingleLetterCode(groupType), full.getGroupSequenceIndices()[groupCount], + full.getSecStructList()[groupCount]); + + reducedGroupsPerChain ++; } - for(int l=0; l0){ - for(int l=0; l 0){ + + // Set bond information + for(int l=0; l atomMap) { + + for (int i = 0; i < full.getInterGroupBondOrders().length; i++) { + int bondIndOne = full.getInterGroupBondIndices()[i*2]; + int bondIndTwo = full.getInterGroupBondIndices()[i*2+1]; + int bondOrder = full.getInterGroupBondOrders()[i]; + + // some atoms may not exist in the reduced structure. + // check the atom map to see if both atoms of a bond still exist. + Integer indexOne = atomMap.get(bondIndOne); + + if (indexOne != null) { + Integer indexTwo = atomMap.get(bondIndTwo); + if (indexTwo != null) { + reduced.setInterGroupBond(indexOne, indexTwo, bondOrder); + } } } - adapterToStructureData.finalizeStructure(); - // Return the AdapterToStructureData - return adapterToStructureData; } /** - * Find if bonds need adding - to be used in later processing. + * Gets the number of intramolecular bonds for a specified group type. * @param indicesToAdd the indices of the atoms to add * @param structureDataInterface the {@link StructureDataInterface} of the total structure * @param groupType the index of the groupType - * @param atomCounter the current atom counter position * @return the integer number of bonds to add */ - private static int findBondsToAdd(List indicesToAdd, StructureDataInterface structureDataInterface, int groupType, int atomCounter) { - // Add the bonds if we've copied all the elements - int interGroupBonds = 0; - if(indicesToAdd.size()>1){ - if (structureDataInterface.getGroupChemCompType(groupType).toUpperCase().contains("SACCHARIDE")){ - for(int i=0; i indicesToAdd, StructureDataInterface structureDataInterface, int groupType, Integer[] centerAtomIndices) { + + if (indicesToAdd.size() == 1 && centerAtomIndices[groupType] != null) { + // in case there is only 1 atom (c-Alpha or P) and it's in a polymer, there cannot be any bonds + return 0; + } else if (indicesToAdd.size() == 0) { + return 0; + } else { + return structureDataInterface.getGroupBondOrders(groupType).length; } - return 0; } - - - /** - * Get the number of bonds, atoms and groups as a map. + * Gets the number of chains, groups, atoms, and bonds in the reduced structure * @param structureDataInterface the input {@link StructureDataInterface} * @return the {@link SummaryData} object describing the data */ - private static SummaryData getDataSummaryData(StructureDataInterface structureDataInterface) { + private static SummaryData getDataSummaryData(StructureDataInterface structureDataInterface, Integer[] centerAtomIndices) { SummaryData summaryData = new SummaryData(); summaryData.numChains = 0; summaryData.numGroups = 0; summaryData.numAtoms = 0; summaryData.numBonds = 0; - int groupCounter = -1; - int chainCounter=-1; - int atomCounter = 0; - for (int i=0; i indicesToAdd = getIndicesToAdd(structureDataInterface, groupType, chainType); - // If there's an atom to add in this group - add it - if(indicesToAdd.size()>0){ + String chainType = EncoderUtils.getTypeFromChainId(structureDataInterface, chainCount); + + for (int k = 0; k < structureDataInterface.getGroupsPerChain()[chainCount]; k++, groupCount++){ + int groupType = structureDataInterface.getGroupTypeIndices()[groupCount]; + Set indicesToAdd = getIndicesToAdd(structureDataInterface, groupType, chainType, centerAtomIndices); + + if (indicesToAdd.size() > 0) { summaryData.numGroups++; } - for(int l=0; l getIndicesToAdd(StructureDataInterface structureDataInterface, int groupType, - String chainType) { - // The list to return - List outList = new ArrayList<>(); + private static Set getIndicesToAdd(StructureDataInterface structure, int groupIndex, + String chainType, Integer[] centerAtomIndices) { + + Set atomIndices = Collections.emptySet(); + + Integer atomIndex = centerAtomIndices[groupIndex]; + // Get chain type - if(chainType.equals("polymer")){ - for(int i=0; i(structure.getNumAtomsInGroup(groupIndex)); + for(int i = 0; i < structure.getNumAtomsInGroup(groupIndex); i++) { + atomIndices.add(i); } } - } - // Check if it's a non-polymer - else if (chainType.equals("non-polymer")){ - for(int i=0; i(structure.getNumAtomsInGroup(groupIndex)); + for (int i = 0; i < structure.getNumAtomsInGroup(groupIndex); i++){ + atomIndices.add(i); } } - else if(chainType.equals("water")){ - // We skip water - } - else{ - System.err.println("Unrecoginised entity type: "+chainType); - } - return outList; + + return atomIndices; } + /** + * Returns an index to the position to the C-alpha atom in a group with the specified group index. + * @param structureDataInterface + * @param groupIndex index of group + * @return index of C-alpha atom if present, otherwise null + */ + private static Integer indexOfcAlpha(StructureDataInterface structureDataInterface, int groupIndex) { + for(int i = 0; i traceNames = Arrays.asList("CA","P"); + + // this checks only first model, since the data are interleaved for multiple models, they cannot be compared directly + int n = reduced.getNumGroups()/reduced.getNumModels(); + assertArrayEquals(structureId + ":GroupIds", Arrays.copyOf(full.getGroupIds(),n), Arrays.copyOf(reduced.getGroupIds(),n)); + assertTrue(structureId + ":GroupSequenceIndices", full.getGroupSequenceIndices().length >= reduced.getGroupSequenceIndices().length); + assertArrayEquals(structureId + ":GroupSequenceIndices", Arrays.copyOf(full.getGroupSequenceIndices(),n), Arrays.copyOf(reduced.getGroupSequenceIndices(),n)); + assertEquals(structureId + ":NumGroups", full.getNumGroups(), full.getGroupSequenceIndices().length); + assertEquals(structureId + ":NumGroups", reduced.getNumGroups(), reduced.getGroupSequenceIndices().length); + assertEquals(structureId + ":NumGroups", full.getNumGroups(), full.getGroupTypeIndices().length); + assertEquals(structureId + ":NumGroups", reduced.getNumGroups(), reduced.getGroupTypeIndices().length); + + for (int i = 0; i < reduced.getNumGroups()/reduced.getNumModels(); i++) { + int fId = full.getGroupTypeIndices()[i]; + int rId = reduced.getGroupTypeIndices()[i]; + + assertEquals(structureId + ":GroupChemCompType", full.getGroupChemCompType(fId), reduced.getGroupChemCompType(rId)); + assertEquals(structureId + ":GroupName", full.getGroupName(fId), reduced.getGroupName(rId)); + assertEquals(structureId + ":GroupSingleLetterCode", full.getGroupSingleLetterCode(fId), reduced.getGroupSingleLetterCode(rId)); + + // there will be fewer atoms and bonds per group for peptide and nucleotide groups + assertTrue(structureId + ":NumAtomsInGroup", full.getNumAtomsInGroup(fId) >= reduced.getNumAtomsInGroup(rId)); + assertTrue(structureId + ":GroupAtomCharges", full.getGroupAtomCharges(fId).length >= reduced.getGroupAtomCharges(rId).length); + assertTrue(structureId + ":GroupAtomNames", full.getGroupAtomNames(fId).length >= reduced.getGroupAtomNames(rId).length); + assertTrue(structureId + ":GroupElementNames", full.getGroupElementNames(fId).length >= reduced.getGroupElementNames(rId).length); + + // if number of atoms per group are the same, then these data must be identical + if (full.getNumAtomsInGroup(fId) == reduced.getNumAtomsInGroup(rId)) { + assertArrayEquals(structureId + ":GroupAtomCharges", full.getGroupAtomCharges(fId), reduced.getGroupAtomCharges(rId)); + assertArrayEquals(structureId + ":GroupAtomNames", full.getGroupAtomNames(fId), reduced.getGroupAtomNames(rId)); + } else { + assertTrue(structureId + ":GroupAtomNames", traceNames.containsAll(Arrays.asList(reduced.getGroupAtomNames(rId)))); + } + + assertTrue(structureId + ":GroupBondIndices", full.getGroupBondIndices(fId).length >= reduced.getGroupBondIndices(rId).length); + assertTrue(structureId + ":GroupBondOrders", full.getGroupBondOrders(fId).length >= reduced.getGroupBondOrders(rId).length); + + // for all other groups, the bond info should be identical + if (full.getGroupBondIndices(fId).length == reduced.getGroupBondIndices(rId).length) { + assertArrayEquals(structureId + ":GroupBondIndices", full.getGroupBondIndices(fId), reduced.getGroupBondIndices(rId)); + for (int bo: reduced.getGroupBondOrders(rId)) { + assertTrue(structureId + ":GroupBondOrders", bo >0 && bo < 5); + } + assertArrayEquals(structureId + ":GroupBondOrders", full.getGroupBondOrders(fId), reduced.getGroupBondOrders(rId)); + } + } + } + + private static void compareInterBondData(String structureId, StructureDataInterface full, StructureDataInterface reduced) { + assertTrue(structureId + ":InterGroupBondIndices", full.getInterGroupBondIndices().length >= reduced.getInterGroupBondIndices().length); + assertTrue(structureId + ":InterGroupBondOrder", full.getInterGroupBondOrders().length >= reduced.getInterGroupBondOrders().length); + for (int bo: reduced.getInterGroupBondOrders()) { + assertTrue(structureId + ":InterGroupBondOrders", bo >0 && bo < 5); + } + } + + /** + * Get the default data for the full format. + * @return a {@link StructureDataInterface} for the full data. + * @throws IOException + */ + private StructureDataInterface getDefaultFullData() throws IOException, ParseException { + Path p = Utils.getResource("/mmtf/4cup.mmtf"); + return new GenericDecoder(ReaderUtils.getDataFromFile(p)); + } +}