Skip to content

Commit

Permalink
Hack to fix AF3 mmCIF _citation.title breaking gemmi
Browse files Browse the repository at this point in the history
  • Loading branch information
milot-mirdita committed May 9, 2024
1 parent 9ef6d18 commit f5b9588
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
38 changes: 36 additions & 2 deletions src/strucclustutils/GemmiWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include "foldcomp.h"
#include "cif.hpp"

#include <algorithm>

GemmiWrapper::GemmiWrapper(){
threeAA2oneAA = {{"ALA",'A'}, {"ARG",'R'}, {"ASN",'N'}, {"ASP",'D'},
{"CYS",'C'}, {"GLN",'Q'}, {"GLU",'E'}, {"GLY",'G'},
Expand All @@ -33,6 +35,7 @@ GemmiWrapper::GemmiWrapper(){
{"CSD",'C'}, {"SEC",'C'},
// unknown
{"UNK",'X'}};
fixupBuffer = NULL;
}

std::unordered_map<std::string, int> getEntityTaxIDMapping(gemmi::cif::Document& doc) {
Expand Down Expand Up @@ -99,7 +102,20 @@ bool GemmiWrapper::load(const std::string& filename, Format format) {
std::unordered_map<std::string, int> entity_to_tax_id;
switch (format) {
case Format::Mmcif: {
gemmi::cif::Document doc = gemmi::cif::read(infile);
gemmi::CharArray mem = read_into_buffer(infile);
char* data = mem.data();
size_t dataSize = mem.size();

// hack to fix broken _citation.title in AF3
const char* target = "_citation.title";
size_t target_len = strlen(target);
char* it = std::search(data, data + dataSize, target, target + target_len);
if (it != data + dataSize) {
it[0] = '#';
it[1] = ' ';
}

gemmi::cif::Document doc = gemmi::cif::read_memory(mem.data(), mem.size(), infile.path().c_str());
entity_to_tax_id = getEntityTaxIDMapping(doc);
st = gemmi::make_structure(doc);
break;
Expand Down Expand Up @@ -161,7 +177,25 @@ bool GemmiWrapper::loadFromBuffer(const char * buffer, size_t bufferSize, const
st = gemmi::pdb_impl::read_pdb_from_stream(gemmi::MemoryStream(buffer, bufferSize), name, gemmi::PdbReadOptions());
break;
case Format::Mmcif: {
gemmi::cif::Document doc = gemmi::cif::read_memory(buffer, bufferSize, name.c_str());
const char* targetBuffer = buffer;
// hack to fix broken _citation.title in AF3
const char* target = "_citation.title";
size_t target_len = strlen(target);
const char* it = std::search(targetBuffer, targetBuffer + bufferSize, target, target + target_len);
if (it != targetBuffer + bufferSize) {
if (fixupBuffer == NULL) {
fixupBufferSize = bufferSize;
fixupBuffer = (char*)malloc(fixupBufferSize);
} else if (bufferSize > fixupBufferSize) {
fixupBufferSize = bufferSize * 1.5;
fixupBuffer = (char*)realloc(fixupBuffer, fixupBufferSize);
}
memcpy(fixupBuffer, targetBuffer, bufferSize);
*(fixupBuffer + (it - targetBuffer)) = '#';
*(fixupBuffer + (it - targetBuffer) + 1) = ' ';
targetBuffer = fixupBuffer;
}
gemmi::cif::Document doc = gemmi::cif::read_memory(targetBuffer, bufferSize, name.c_str());
entity_to_tax_id = getEntityTaxIDMapping(doc);
st = gemmi::make_structure(doc);
break;
Expand Down
9 changes: 9 additions & 0 deletions src/strucclustutils/GemmiWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ class GemmiWrapper {
};

GemmiWrapper();
~GemmiWrapper() {
if (fixupBuffer) {
delete fixupBuffer;
}
}

bool loadFromBuffer(const char * buffer, size_t bufferSize, const std::string& name, Format format = Format::Detect);

Expand All @@ -42,6 +47,10 @@ class GemmiWrapper {
std::vector<std::pair<size_t ,size_t>> chain;
std::vector<int> taxIds;
std::string title;

char* fixupBuffer;
size_t fixupBufferSize;

private:
std::unordered_map<std::string,char> threeAA2oneAA;
int modelIt;
Expand Down

0 comments on commit f5b9588

Please sign in to comment.