From 9d8d51524dd4cc1d28a48ea45918ee0de291a9dc Mon Sep 17 00:00:00 2001 From: Danny Diaz Date: Wed, 30 Jun 2021 13:30:37 -0500 Subject: [PATCH 01/11] fixed bug--unable to read cif that deviates from the pdb_id.cif format. --- src/cif.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cif.cc b/src/cif.cc index 5d66387..2ced714 100644 --- a/src/cif.cc +++ b/src/cif.cc @@ -400,7 +400,9 @@ find_doc_idx(std::string filename) std::transform(filename.begin(), filename.end(), filename.begin(), tolower); for (int i = 0; i != docs.size(); ++i) { - if (filename.find(docs[i].source) != std::string::npos) { + // only compare the first 4 characters of the doc.source value + // this allows different file suffix and extensions + if (filename.find(docs[i].source.substr(0,4)) != std::string::npos) { return i; } } @@ -616,7 +618,8 @@ populate_freesasa_result_vectors(gemmi::cif::Table &table, freesasa_node *result atom = freesasa_node_children(residue); while (atom) { auto cName = std::string(1, freesasa_node_atom_chain(atom)); - auto rNum = freesasa_node_atom_residue_number(atom); + // TODO figure out why this returns decimal string sometimes + auto rNum = std::to_string(std::atoi(freesasa_node_atom_residue_number(atom))); auto rName = freesasa_node_atom_residue_name(atom); auto aName = freesasa_node_name(atom); auto area = freesasa_node_area(atom); @@ -626,7 +629,7 @@ populate_freesasa_result_vectors(gemmi::cif::Table &table, freesasa_node *result if (rowNum == FREESASA_FAIL) freesasa_fail( "In %s(), unable to find freesasa_node atom (%d, %s, %s, %s, %s) in cif %s", - __func__, model, cName.c_str(), rNum, rName, aName, table.bloc.name.c_str()); + __func__, model, cName.c_str(), rNum.c_str(), rName, aName, table.bloc.name.c_str()); sasa_vals[rowNum] = std::to_string(area->total); sasa_radii[rowNum] = std::to_string(radius); From 1410a3880cb3826f296c3fc97c6bf27f877406bc Mon Sep 17 00:00:00 2001 From: Danny Diaz Date: Sun, 25 Jul 2021 19:53:29 -0500 Subject: [PATCH 02/11] patched floating point exception bug in gemmi --- src/cif.cc | 83 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 23 deletions(-) diff --git a/src/cif.cc b/src/cif.cc index 2ced714..0de18e4 100644 --- a/src/cif.cc +++ b/src/cif.cc @@ -402,7 +402,7 @@ find_doc_idx(std::string filename) for (int i = 0; i != docs.size(); ++i) { // only compare the first 4 characters of the doc.source value // this allows different file suffix and extensions - if (filename.find(docs[i].source.substr(0,4)) != std::string::npos) { + if (filename.find(docs[i].source.substr(0, 4)) != std::string::npos) { return i; } } @@ -644,10 +644,29 @@ populate_freesasa_result_vectors(gemmi::cif::Table &table, freesasa_node *result } } -static void -write_cif_block(std::ostream &out, gemmi::cif::Table &table, - std::vector &sasa_vals, - std::vector &sasa_radii) +static int +replace_sasa_columns(gemmi::cif::Table &table, + const std::vector &sasa_vals, + const std::vector &sasa_radii, + const std::vector &sasa_tags) +{ + std::vector> sasa_columns = {sasa_vals, sasa_radii}; + + assert(sasa_columns.size() == sasa_tags.size()); + + for (unsigned i = 0; i != sasa_tags.size(); ++i) { + auto column = table.bloc.find_loop(sasa_tags[i]); + std::copy(sasa_columns[i].begin(), sasa_columns[i].end(), column.begin()); + } + + return FREESASA_SUCCESS; +} + +static int +append_sasa_columns(gemmi::cif::Table &table, + const std::vector &sasa_vals, + const std::vector &sasa_radii, + const std::vector &sasa_tags) { auto &loop = *table.get_loop(); @@ -656,27 +675,42 @@ write_cif_block(std::ostream &out, gemmi::cif::Table &table, // Creates a new table full of empty strings with the correct number of dimensions // Outside vector size is the # of columns, inside vector size is the # of rows. - std::vector> newCols(new_tag_size, {loop.length(), {"Empty"}}); + std::vector> new_columns(new_tag_size, {loop.length(), {"Empty"}}); // Copies data from original columns to their respecitve column in the new table filled with empty strings. // Leaving only the new appended columns as empty strings - for (unsigned int i = 0; i != orig_tag_size; ++i) { - auto iCol = table.bloc.find_loop(loop.tags[i]); - std::copy(iCol.begin(), iCol.end(), newCols[i].begin()); + for (unsigned i = 0; i != orig_tag_size; ++i) { + auto column = table.bloc.find_loop(loop.tags[i]); + std::copy(column.begin(), column.end(), new_columns[i].begin()); } - newCols[new_tag_size - 2] = std::move(sasa_vals); - newCols[new_tag_size - 1] = std::move(sasa_radii); + new_columns[new_tag_size - 2] = std::move(sasa_vals); + new_columns[new_tag_size - 1] = std::move(sasa_radii); + + for (const auto &tag : sasa_tags) + loop.tags.push_back(tag); + + loop.set_all_values(new_columns); + + return FREESASA_SUCCESS; +} - std::vector new_tags{ +static int +rewrite_atom_site(gemmi::cif::Table &table, + std::vector &sasa_vals, + std::vector &sasa_radii) +{ + std::vector sasa_tags{ "_atom_site.FreeSASA_value", "_atom_site.FreeSASA_radius"}; - for (auto tag : new_tags) - loop.tags.push_back(tag); - loop.set_all_values(newCols); + auto &loop = *table.get_loop(); - gemmi::cif::write_cif_block_to_stream(out, table.bloc); + if (loop.has_tag(sasa_tags[0]) && loop.has_tag(sasa_tags[1])) { + return replace_sasa_columns(table, sasa_vals, sasa_radii, sasa_tags); + } else { + return append_sasa_columns(table, sasa_vals, sasa_radii, sasa_tags); + } } static int @@ -699,12 +733,6 @@ write_result(std::ostream &out, freesasa_node *root) auto &block = docs[doc_idx].sole_block(); - append_freesasa_params_to_block(block, result); - - if (append_freesasa_result_summary_to_block(block, result) == FREESASA_FAIL) { - return freesasa_fail("Unable to build CIF output"); - } - auto table = block.find("_atom_site.", atom_site_columns); if (prev_doc_idx != doc_idx) { sasa_vals = std::vector{table.length(), "?"}; @@ -713,6 +741,15 @@ write_result(std::ostream &out, freesasa_node *root) populate_freesasa_result_vectors(table, result, sasa_vals, sasa_radii); + int added_data = rewrite_atom_site(table, sasa_vals, sasa_radii); + int added_summary = append_freesasa_result_summary_to_block(block, result); + + if (added_data == FREESASA_FAIL || added_summary == FREESASA_FAIL) { + return freesasa_fail("Unable to build CIF output"); + } + + append_freesasa_params_to_block(block, result); + prev_doc_idx = doc_idx; result = freesasa_node_next(result); @@ -727,7 +764,7 @@ write_result(std::ostream &out, freesasa_node *root) write = false; } - if (write) write_cif_block(out, table, sasa_vals, sasa_radii); + if (write) gemmi::cif::write_cif_block_to_stream(out, block); } return FREESASA_SUCCESS; } From 01612092f046071e73dce2da7a1d485504c923cf Mon Sep 17 00:00:00 2001 From: Danny Diaz Date: Fri, 30 Jul 2021 12:23:55 -0500 Subject: [PATCH 03/11] refactored how the freesasa tree is mapped to the appropriate gemmi document for writing. --- src/cif.cc | 90 ++++++++++++++----------------------------------- src/cif.hh | 2 ++ src/freesasa.h | 16 +++++++++ src/main.cc | 7 ++-- src/node.c | 10 ++++++ src/structure.c | 14 ++++++++ 6 files changed, 71 insertions(+), 68 deletions(-) diff --git a/src/cif.cc b/src/cif.cc index 0de18e4..0947dbc 100644 --- a/src/cif.cc +++ b/src/cif.cc @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,7 @@ #include "cif.hh" #include "freesasa.h" -static std::vector docs; +static std::map docs; struct ModelDiscriminator { ModelDiscriminator(const std::string &model_name, @@ -149,11 +150,13 @@ freesasa_atom_from_site(const gemmi::cif::Table::Row &site) template static freesasa_structure * structure_from_pred(const gemmi::cif::Document &doc, + const std::string cif_name, const T &discriminator, const freesasa_classifier *classifier, int structure_options) { freesasa_structure *structure = freesasa_structure_new(); + freesasa_structure_set_cif_filename(structure, cif_name.c_str()); std::string auth_atom_id; char prevAltId = '.'; @@ -186,29 +189,19 @@ structure_from_pred(const gemmi::cif::Document &doc, } static gemmi::cif::Document & -generate_gemmi_doc(std::FILE *input) +generate_gemmi_doc(std::FILE *input, const std::string name) { - docs.emplace_back(gemmi::cif::read_cstream(input, 8192, "cif-input")); - auto &doc = docs.back(); - - gemmi::Structure gemmi_struct = gemmi::make_structure_from_block(doc.blocks[0]); - - if (gemmi_struct.name.find(".cif") != std::string::npos) { - doc.source = gemmi_struct.name; - } else { - doc.source = gemmi_struct.name + ".cif"; - } - transform(doc.source.begin(), doc.source.end(), doc.source.begin(), tolower); - - return doc; + docs.emplace(name, gemmi::cif::read_cstream(input, 8192, "cif-input")); + return docs[name]; } freesasa_structure * freesasa_structure_from_cif(std::FILE *input, + const std::string &cif_name, const freesasa_classifier *classifier, int structure_options) { - auto &doc = generate_gemmi_doc(input); + auto &doc = generate_gemmi_doc(input, cif_name); const auto models = get_models(doc); @@ -220,33 +213,35 @@ freesasa_structure_from_cif(std::FILE *input, auto singleModel = std::set{*firstModel}; discriminator = std::make_unique(singleModel); } - return structure_from_pred(doc, *discriminator, classifier, structure_options); + return structure_from_pred(doc, cif_name, *discriminator, classifier, structure_options); } static freesasa_structure * structure_from_model(const gemmi::cif::Document &doc, + const std::string &cif_name, const std::string &model_name, const freesasa_classifier *classifier, int structure_options) { const ModelDiscriminator discriminator(model_name); - return structure_from_pred(doc, discriminator, classifier, structure_options); - freesasa_structure *structure = freesasa_structure_new(); + return structure_from_pred(doc, cif_name, discriminator, classifier, structure_options); } static freesasa_structure * structure_from_chain(const gemmi::cif::Document doc, + const std::string &cif_name, const std::string &model_name, const std::string &chain_name, const freesasa_classifier *classifier, int structure_options) { const ChainDiscriminator discriminator(model_name, chain_name); - return structure_from_pred(doc, discriminator, classifier, structure_options); + return structure_from_pred(doc, cif_name, discriminator, classifier, structure_options); } std::vector freesasa_cif_structure_array(std::FILE *input, + const std::string &cif_name, int *n, const freesasa_classifier *classifier, int options) @@ -255,7 +250,7 @@ freesasa_cif_structure_array(std::FILE *input, std::vector ss; - auto &doc = generate_gemmi_doc(input); + auto &doc = generate_gemmi_doc(input, cif_name); gemmi::Structure gemmi_struct = gemmi::make_structure_from_block(doc.blocks[0]); @@ -281,7 +276,7 @@ freesasa_cif_structure_array(std::FILE *input, ss.reserve(n_new_chains); for (auto &chain_name : *chain_names) { - freesasa_structure *structure = structure_from_chain(doc, models[i].name, chain_name, classifier, options); + freesasa_structure *structure = structure_from_chain(doc, cif_name, models[i].name, chain_name, classifier, options); if (freesasa_structure_n(structure) == 0) { --n_chains; free(structure); @@ -301,7 +296,7 @@ freesasa_cif_structure_array(std::FILE *input, ss.reserve(n_models); for (int i = 0; i < n_models; ++i) { ss.emplace_back( - structure_from_model(doc, models[i].name, classifier, options)); + structure_from_model(doc, cif_name, models[i].name, classifier, options)); freesasa_structure_set_model(ss.back(), i + 1); } *n = n_models; @@ -374,41 +369,6 @@ struct freesasa_MCRA { const std::string &_chain, &_residue, &_res_num, &_atom; }; -static std::string -get_cif_filename(std::string filename) -{ - // Remove : if present in the cif filename - if (filename.find(".cif") != std::string::npos) - return filename.substr(0, filename.find(".cif") + 4); - return filename; -} - -static int -find_doc_idx(std::string filename) -{ - filename = get_cif_filename(filename); - - if (filename.find("stdin") != std::string::npos) { - if (docs.size() == 1) - return 0; - else - freesasa_fail( - "In %s(), CIF input is from stdin but there are more than 1 gemmi documents to choose from. " - "Unable to select correct doc. exiting...", - __func__); - } - - std::transform(filename.begin(), filename.end(), filename.begin(), tolower); - for (int i = 0; i != docs.size(); ++i) { - // only compare the first 4 characters of the doc.source value - // this allows different file suffix and extensions - if (filename.find(docs[i].source.substr(0, 4)) != std::string::npos) { - return i; - } - } - return FREESASA_WARN; -} - static void append_freesasa_params_to_block(gemmi::cif::Block &block, freesasa_node *result) { @@ -718,23 +678,23 @@ write_result(std::ostream &out, freesasa_node *root) { freesasa_node *result{freesasa_node_children(root)}; - int prev_doc_idx = -1, doc_idx = 0; + std::string cif_name, prev_cif_name = ""; bool write = false; std::vector sasa_vals, sasa_radii; while (result) { - doc_idx = find_doc_idx(freesasa_node_name(result)); - if (doc_idx == FREESASA_WARN) { + cif_name = freesasa_node_structure_cif_filename(freesasa_node_children(result)); + if (docs.find(cif_name) == docs.end()) { freesasa_warn("In %s(), unable to find gemmi doc for result node: %s. Skipping...", __func__, freesasa_node_name(result)); result = freesasa_node_next(result); continue; } - auto &block = docs[doc_idx].sole_block(); + auto &block = docs[cif_name].sole_block(); auto table = block.find("_atom_site.", atom_site_columns); - if (prev_doc_idx != doc_idx) { + if (prev_cif_name != cif_name) { sasa_vals = std::vector{table.length(), "?"}; sasa_radii = std::vector{table.length(), "?"}; } @@ -750,13 +710,13 @@ write_result(std::ostream &out, freesasa_node *root) append_freesasa_params_to_block(block, result); - prev_doc_idx = doc_idx; + prev_cif_name = cif_name; result = freesasa_node_next(result); if (!result) { // There is no next result so write out current file. write = true; - } else if (find_doc_idx(freesasa_node_name(result)) != prev_doc_idx) { + } else if (freesasa_node_structure_cif_filename(freesasa_node_children(result)) != prev_cif_name) { // Next result node is from a new file so write out current file. write = true; } else { diff --git a/src/cif.hh b/src/cif.hh index 237e9b4..2b33217 100644 --- a/src/cif.hh +++ b/src/cif.hh @@ -13,11 +13,13 @@ freesasa_structure * freesasa_structure_from_cif(std::FILE *input, + const std::string &cif_name, const freesasa_classifier *classifier, int structure_options); std::vector freesasa_cif_structure_array(std::FILE *input, + const std::string &cif_name, int *n, const freesasa_classifier *classifier, int options); diff --git a/src/freesasa.h b/src/freesasa.h index 102d7e6..52ac954 100644 --- a/src/freesasa.h +++ b/src/freesasa.h @@ -1563,6 +1563,17 @@ int freesasa_node_structure_n_atoms(const freesasa_node *node); const char * freesasa_node_structure_chain_labels(const freesasa_node *node); +/** + CIF file read to create the structure. + + @param node A node of type ::FREESASA_NODE_STRUCTURE. + @return cif filename as null-terminated string. + + @ingroup node + */ +const char * +freesasa_node_structure_cif_filename(const freesasa_node *node); + /** Model number of a structure (from input PDB) @@ -1654,6 +1665,11 @@ int freesasa_select_area(const char *command, void freesasa_structure_set_model(freesasa_structure *structure, int model); +void freesasa_structure_set_cif_filename(freesasa_structure *structure, + const char *filename); + +const char *freesasa_structure_cif_filename(freesasa_structure *structure); + #ifdef __cplusplus } #endif diff --git a/src/main.cc b/src/main.cc index 2cc2aec..7e25a47 100644 --- a/src/main.cc +++ b/src/main.cc @@ -260,6 +260,7 @@ exit_with_help(void) static std::vector get_structures(std::FILE *input, + const char *name, int *n, const struct cli_state *state) { @@ -271,7 +272,7 @@ get_structures(std::FILE *input, if ((state->structure_options & FREESASA_SEPARATE_CHAINS) || (state->structure_options & FREESASA_SEPARATE_MODELS)) { if (state->cif) { - structures = freesasa_cif_structure_array(input, n, state->classifier, state->structure_options); + structures = freesasa_cif_structure_array(input, name, n, state->classifier, state->structure_options); } else { // TODO this hack needed since PDB implementation is in C freesasa_structure **db_ptr_structs = freesasa_structure_array(input, n, state->classifier, state->structure_options); @@ -283,7 +284,7 @@ get_structures(std::FILE *input, } else { *n = 1; if (state->cif) { - structures.emplace_back(freesasa_structure_from_cif(input, state->classifier, state->structure_options)); + structures.emplace_back(freesasa_structure_from_cif(input, name, state->classifier, state->structure_options)); } else { structures.emplace_back(freesasa_structure_from_pdb(input, state->classifier, state->structure_options)); } @@ -331,7 +332,7 @@ run_analysis(FILE *input, if (name_i == NULL) abort_msg("memory failure"); /* read PDB file */ - structures = get_structures(input, &n, state); + structures = get_structures(input, name, &n, state); if (n == 0) abort_msg("invalid input"); /* perform calculation on each structure */ diff --git a/src/node.c b/src/node.c index b4e88bf..ab29648 100644 --- a/src/node.c +++ b/src/node.c @@ -34,6 +34,7 @@ struct structure_properties { int n_atoms; int model; char *chain_labels; + char *cif_filename; freesasa_result *result; freesasa_selection **selection; // NULL terminated array }; @@ -120,6 +121,7 @@ node_free(freesasa_node *node) break; case FREESASA_NODE_STRUCTURE: free(node->properties.structure.chain_labels); + free(node->properties.structure.cif_filename); freesasa_result_free(node->properties.structure.result); sel = node->properties.structure.selection; if (sel) { @@ -377,6 +379,7 @@ node_structure(const freesasa_structure *structure, node->properties.structure.selection = NULL; node->properties.structure.chain_labels = strdup(freesasa_structure_chain_labels(structure)); node->properties.structure.model = freesasa_structure_model(structure); + node->properties.structure.cif_filename = strdup(freesasa_structure_cif_filename(structure)); if (node->properties.structure.chain_labels == NULL) { mem_fail(); @@ -655,6 +658,13 @@ freesasa_node_structure_result(const freesasa_node *node) return node->properties.structure.result; } +const char * +freesasa_node_structure_cif_filename(const freesasa_node *node) +{ + assert(node->type == FREESASA_NODE_STRUCTURE); + return node->properties.structure.cif_filename; +} + int freesasa_node_structure_add_selection(freesasa_node *node, const freesasa_selection *selection) { diff --git a/src/structure.c b/src/structure.c index 7d13108..3053c10 100644 --- a/src/structure.c +++ b/src/structure.c @@ -78,6 +78,7 @@ struct freesasa_structure { char *classifier_name; coord_t *xyz; int model; /* model number */ + char *cif_filename; }; static int @@ -1213,3 +1214,16 @@ void freesasa_structure_set_model(freesasa_structure *structure, { structure->model = model; } + +void freesasa_structure_set_cif_filename(freesasa_structure *structure, + const char *filename) +{ + assert(structure); + structure->cif_filename = strdup(filename); +} + +const char *freesasa_structure_cif_filename(freesasa_structure *structure) +{ + assert(structure); + return structure->cif_filename; +} From dfcd2482bf0f8f76f8e738e940f9a3d1365713ed Mon Sep 17 00:00:00 2001 From: Danny Diaz Date: Sat, 14 Aug 2021 12:06:08 -0500 Subject: [PATCH 04/11] forgot to free freesasa_structure->cif_filename --- src/structure.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/structure.c b/src/structure.c index 3053c10..82b0a41 100644 --- a/src/structure.c +++ b/src/structure.c @@ -386,6 +386,7 @@ void freesasa_structure_free(freesasa_structure *s) chains_dealloc(&s->chains); if (s->xyz != NULL) freesasa_coord_free(s->xyz); free(s->classifier_name); + free(s->cif_filename); free(s); } } From 165009fce23c3742b795f6a518c6bde6a574e005 Mon Sep 17 00:00:00 2001 From: Simon Mitternacht Date: Sun, 5 Sep 2021 12:46:20 +0200 Subject: [PATCH 05/11] Use a unique integer to identify gemmi docs, instead of filename. Also move all declarations of code related to cif document references to freesa_internal.h. --- src/cif.cc | 63 ++++++++++++++++++++++++----------------- src/cif.hh | 31 ++++++++++++++++++-- src/freesasa.h | 16 ----------- src/freesasa_internal.h | 34 +++++++++++++++++++++- src/main.cc | 7 +++-- src/node.c | 11 ++++--- src/structure.c | 14 ++++----- 7 files changed, 114 insertions(+), 62 deletions(-) diff --git a/src/cif.cc b/src/cif.cc index 0947dbc..3478c87 100644 --- a/src/cif.cc +++ b/src/cif.cc @@ -17,8 +17,9 @@ #include "cif.hh" #include "freesasa.h" +#include "freesasa_internal.h" -static std::map docs; +static std::map docs; struct ModelDiscriminator { ModelDiscriminator(const std::string &model_name, @@ -150,13 +151,11 @@ freesasa_atom_from_site(const gemmi::cif::Table::Row &site) template static freesasa_structure * structure_from_pred(const gemmi::cif::Document &doc, - const std::string cif_name, const T &discriminator, const freesasa_classifier *classifier, int structure_options) { freesasa_structure *structure = freesasa_structure_new(); - freesasa_structure_set_cif_filename(structure, cif_name.c_str()); std::string auth_atom_id; char prevAltId = '.'; @@ -188,22 +187,26 @@ structure_from_pred(const gemmi::cif::Document &doc, return structure; } -static gemmi::cif::Document & -generate_gemmi_doc(std::FILE *input, const std::string name) +static std::pair +generate_gemmi_doc(std::FILE *input) { - docs.emplace(name, gemmi::cif::read_cstream(input, 8192, "cif-input")); - return docs[name]; + static size_t index = 1; + + // theoretically there could be thread safety issues here, not sure they're relevant. + size_t my_idx = index++; + + docs.emplace(my_idx, gemmi::cif::read_cstream(input, 8192, "cif-input")); + return std::make_pair(std::ref(docs[my_idx]), my_idx); } freesasa_structure * freesasa_structure_from_cif(std::FILE *input, - const std::string &cif_name, const freesasa_classifier *classifier, int structure_options) { - auto &doc = generate_gemmi_doc(input, cif_name); + auto doc_idx_pair = generate_gemmi_doc(input); - const auto models = get_models(doc); + const auto models = get_models(doc_idx_pair.first); std::unique_ptr discriminator; if (structure_options & FREESASA_JOIN_MODELS) { @@ -213,35 +216,36 @@ freesasa_structure_from_cif(std::FILE *input, auto singleModel = std::set{*firstModel}; discriminator = std::make_unique(singleModel); } - return structure_from_pred(doc, cif_name, *discriminator, classifier, structure_options); + + auto structure = structure_from_pred(doc_idx_pair.first, *discriminator, classifier, structure_options); + freesasa_structure_set_cif_ref(structure, doc_idx_pair.second); + + return structure; } static freesasa_structure * structure_from_model(const gemmi::cif::Document &doc, - const std::string &cif_name, const std::string &model_name, const freesasa_classifier *classifier, int structure_options) { const ModelDiscriminator discriminator(model_name); - return structure_from_pred(doc, cif_name, discriminator, classifier, structure_options); + return structure_from_pred(doc, discriminator, classifier, structure_options); } static freesasa_structure * structure_from_chain(const gemmi::cif::Document doc, - const std::string &cif_name, const std::string &model_name, const std::string &chain_name, const freesasa_classifier *classifier, int structure_options) { const ChainDiscriminator discriminator(model_name, chain_name); - return structure_from_pred(doc, cif_name, discriminator, classifier, structure_options); + return structure_from_pred(doc, discriminator, classifier, structure_options); } std::vector freesasa_cif_structure_array(std::FILE *input, - const std::string &cif_name, int *n, const freesasa_classifier *classifier, int options) @@ -250,7 +254,9 @@ freesasa_cif_structure_array(std::FILE *input, std::vector ss; - auto &doc = generate_gemmi_doc(input, cif_name); + // We don't care about the index here, for now, since exporting structure + // arrays to CIF output is not supported. + auto doc = generate_gemmi_doc(input).first; gemmi::Structure gemmi_struct = gemmi::make_structure_from_block(doc.blocks[0]); @@ -276,7 +282,7 @@ freesasa_cif_structure_array(std::FILE *input, ss.reserve(n_new_chains); for (auto &chain_name : *chain_names) { - freesasa_structure *structure = structure_from_chain(doc, cif_name, models[i].name, chain_name, classifier, options); + freesasa_structure *structure = structure_from_chain(doc, models[i].name, chain_name, classifier, options); if (freesasa_structure_n(structure) == 0) { --n_chains; free(structure); @@ -296,7 +302,7 @@ freesasa_cif_structure_array(std::FILE *input, ss.reserve(n_models); for (int i = 0; i < n_models; ++i) { ss.emplace_back( - structure_from_model(doc, cif_name, models[i].name, classifier, options)); + structure_from_model(doc, models[i].name, classifier, options)); freesasa_structure_set_model(ss.back(), i + 1); } *n = n_models; @@ -678,23 +684,23 @@ write_result(std::ostream &out, freesasa_node *root) { freesasa_node *result{freesasa_node_children(root)}; - std::string cif_name, prev_cif_name = ""; + size_t cif_ref, prev_cif_ref = 0; bool write = false; std::vector sasa_vals, sasa_radii; while (result) { - cif_name = freesasa_node_structure_cif_filename(freesasa_node_children(result)); - if (docs.find(cif_name) == docs.end()) { + cif_ref = freesasa_node_structure_cif_ref(freesasa_node_children(result)); + if (docs.find(cif_ref) == docs.end()) { freesasa_warn("In %s(), unable to find gemmi doc for result node: %s. Skipping...", __func__, freesasa_node_name(result)); result = freesasa_node_next(result); continue; } - auto &block = docs[cif_name].sole_block(); + auto &block = docs[cif_ref].sole_block(); auto table = block.find("_atom_site.", atom_site_columns); - if (prev_cif_name != cif_name) { + if (prev_cif_ref != cif_ref) { sasa_vals = std::vector{table.length(), "?"}; sasa_radii = std::vector{table.length(), "?"}; } @@ -710,13 +716,13 @@ write_result(std::ostream &out, freesasa_node *root) append_freesasa_params_to_block(block, result); - prev_cif_name = cif_name; + prev_cif_ref = cif_ref; result = freesasa_node_next(result); if (!result) { // There is no next result so write out current file. write = true; - } else if (freesasa_node_structure_cif_filename(freesasa_node_children(result)) != prev_cif_name) { + } else if (freesasa_node_structure_cif_ref(freesasa_node_children(result)) != prev_cif_ref) { // Next result node is from a new file so write out current file. write = true; } else { @@ -764,3 +770,8 @@ int freesasa_export_tree_to_cif(const char *filename, return ret; } + +void freesasa_cif_clear_docs() +{ + docs.clear(); +} diff --git a/src/cif.hh b/src/cif.hh index 2b33217..d0ffe2b 100644 --- a/src/cif.hh +++ b/src/cif.hh @@ -11,21 +11,46 @@ #include "freesasa.h" #include "freesasa_internal.h" +/** + Generate ::freesasa_structure from CIF document. + A copy of the document is stored in memory to allow reusing it later in + ::freesasa_export_tree_to_cif. + To free that memory call ::freesasa_cif_clear_docs(); + + @param input Input file + @param classifier Classifier to use + @param structure_options Options (see ::freesasa_structure_add_atom) +*/ freesasa_structure * freesasa_structure_from_cif(std::FILE *input, - const std::string &cif_name, const freesasa_classifier *classifier, int structure_options); +/** + Generate a set of structures from one file. See ::freesasa_structure_array. + + @param input Input file + @param n Output variable for size + @param classifier Classifier to use + @param options Options (see ::freesasa_structure_array) + */ std::vector freesasa_cif_structure_array(std::FILE *input, - const std::string &cif_name, int *n, const freesasa_classifier *classifier, int options); -/// If filename is NULL output will be written to stdout +/** + Output calculation results as part of a CIF document. + Takes the original CIF document, and adds the data where appropriate. + + @param filename Output filename. If NULL output will be written to stdout. + @param root Result tree to generate output from +*/ int freesasa_export_tree_to_cif(const char *filename, freesasa_node *root); +/// Clear map of stored cif docs. After it is cleared +void freesasa_cif_clear_docs(); + #endif /* CIF_HH */ diff --git a/src/freesasa.h b/src/freesasa.h index 52ac954..102d7e6 100644 --- a/src/freesasa.h +++ b/src/freesasa.h @@ -1563,17 +1563,6 @@ int freesasa_node_structure_n_atoms(const freesasa_node *node); const char * freesasa_node_structure_chain_labels(const freesasa_node *node); -/** - CIF file read to create the structure. - - @param node A node of type ::FREESASA_NODE_STRUCTURE. - @return cif filename as null-terminated string. - - @ingroup node - */ -const char * -freesasa_node_structure_cif_filename(const freesasa_node *node); - /** Model number of a structure (from input PDB) @@ -1665,11 +1654,6 @@ int freesasa_select_area(const char *command, void freesasa_structure_set_model(freesasa_structure *structure, int model); -void freesasa_structure_set_cif_filename(freesasa_structure *structure, - const char *filename); - -const char *freesasa_structure_cif_filename(freesasa_structure *structure); - #ifdef __cplusplus } #endif diff --git a/src/freesasa_internal.h b/src/freesasa_internal.h index b149367..a4ffcbf 100644 --- a/src/freesasa_internal.h +++ b/src/freesasa_internal.h @@ -261,6 +261,39 @@ const char * freesasa_structure_atom_pdb_line(const freesasa_structure *structure, int i); +/** + Add a reference number to the document used to generate the structure from a CIF file, + used when exporting to CIF file. + + @param structure A structure. + @param ref Reference number for document. >= 1. + @return ::FREESASA_SUCCESS if success. + ::FREESASA_FAIL if memory allocation error. +*/ +void freesasa_structure_set_cif_ref(freesasa_structure *structure, + size_t ref); + +/** + Retrieve the reference number for the CIF document used to generate the structure + from a CIF file. + + @param structure A structure. + @return The reference number. 0 if structure is not from CIF file. +*/ +size_t freesasa_structure_cif_ref(const freesasa_structure *structure); + +/** + Retrieve the reference number for the CIF document used to generate the structure + from a CIF file, in a structure node. + + @param node A node of type ::FREESASA_NODE_STRUCTURE. + @return Reference to CIF document. + + @ingroup node + */ +size_t +freesasa_node_structure_cif_ref(const freesasa_node *node); + const freesasa_nodearea * freesasa_structure_residue_reference(const freesasa_structure *structure, int r_i); @@ -417,7 +450,6 @@ int freesasa_fail_wloc(const char *file, const char *format, ...); - #ifdef __cplusplus } #endif diff --git a/src/main.cc b/src/main.cc index 7e25a47..45f25f3 100644 --- a/src/main.cc +++ b/src/main.cc @@ -260,7 +260,7 @@ exit_with_help(void) static std::vector get_structures(std::FILE *input, - const char *name, + const char *name, int *n, const struct cli_state *state) { @@ -272,7 +272,7 @@ get_structures(std::FILE *input, if ((state->structure_options & FREESASA_SEPARATE_CHAINS) || (state->structure_options & FREESASA_SEPARATE_MODELS)) { if (state->cif) { - structures = freesasa_cif_structure_array(input, name, n, state->classifier, state->structure_options); + structures = freesasa_cif_structure_array(input, n, state->classifier, state->structure_options); } else { // TODO this hack needed since PDB implementation is in C freesasa_structure **db_ptr_structs = freesasa_structure_array(input, n, state->classifier, state->structure_options); @@ -284,7 +284,7 @@ get_structures(std::FILE *input, } else { *n = 1; if (state->cif) { - structures.emplace_back(freesasa_structure_from_cif(input, name, state->classifier, state->structure_options)); + structures.emplace_back(freesasa_structure_from_cif(input, state->classifier, state->structure_options)); } else { structures.emplace_back(freesasa_structure_from_pdb(input, state->classifier, state->structure_options)); } @@ -759,6 +759,7 @@ int main(int argc, if (state.output_format & FREESASA_CIF) { ret = freesasa_export_tree_to_cif(state.output_filename, tree); + freesasa_cif_clear_docs(); } else { ret = freesasa_tree_export(state.output, tree, state.output_format | state.output_depth | (state.no_rel ? FREESASA_OUTPUT_SKIP_REL : 0)); } diff --git a/src/node.c b/src/node.c index ab29648..7c855f4 100644 --- a/src/node.c +++ b/src/node.c @@ -34,7 +34,7 @@ struct structure_properties { int n_atoms; int model; char *chain_labels; - char *cif_filename; + size_t cif_ref; freesasa_result *result; freesasa_selection **selection; // NULL terminated array }; @@ -121,7 +121,6 @@ node_free(freesasa_node *node) break; case FREESASA_NODE_STRUCTURE: free(node->properties.structure.chain_labels); - free(node->properties.structure.cif_filename); freesasa_result_free(node->properties.structure.result); sel = node->properties.structure.selection; if (sel) { @@ -379,7 +378,7 @@ node_structure(const freesasa_structure *structure, node->properties.structure.selection = NULL; node->properties.structure.chain_labels = strdup(freesasa_structure_chain_labels(structure)); node->properties.structure.model = freesasa_structure_model(structure); - node->properties.structure.cif_filename = strdup(freesasa_structure_cif_filename(structure)); + node->properties.structure.cif_ref = freesasa_structure_cif_ref(structure); if (node->properties.structure.chain_labels == NULL) { mem_fail(); @@ -658,11 +657,11 @@ freesasa_node_structure_result(const freesasa_node *node) return node->properties.structure.result; } -const char * -freesasa_node_structure_cif_filename(const freesasa_node *node) +size_t +freesasa_node_structure_cif_ref(const freesasa_node *node) { assert(node->type == FREESASA_NODE_STRUCTURE); - return node->properties.structure.cif_filename; + return node->properties.structure.cif_ref; } int freesasa_node_structure_add_selection(freesasa_node *node, diff --git a/src/structure.c b/src/structure.c index 82b0a41..16d0cef 100644 --- a/src/structure.c +++ b/src/structure.c @@ -78,7 +78,7 @@ struct freesasa_structure { char *classifier_name; coord_t *xyz; int model; /* model number */ - char *cif_filename; + size_t cif_ref; }; static int @@ -368,6 +368,7 @@ freesasa_structure_new(void) s->xyz = freesasa_coord_new(); s->model = 1; s->classifier_name = NULL; + s->cif_ref = 0; if (s->xyz == NULL) goto memerr; @@ -386,7 +387,6 @@ void freesasa_structure_free(freesasa_structure *s) chains_dealloc(&s->chains); if (s->xyz != NULL) freesasa_coord_free(s->xyz); free(s->classifier_name); - free(s->cif_filename); free(s); } } @@ -1216,15 +1216,15 @@ void freesasa_structure_set_model(freesasa_structure *structure, structure->model = model; } -void freesasa_structure_set_cif_filename(freesasa_structure *structure, - const char *filename) +void freesasa_structure_set_cif_ref(freesasa_structure *structure, + size_t ref) { assert(structure); - structure->cif_filename = strdup(filename); + structure->cif_ref = ref; } -const char *freesasa_structure_cif_filename(freesasa_structure *structure) +size_t freesasa_structure_cif_ref(const freesasa_structure *structure) { assert(structure); - return structure->cif_filename; + return structure->cif_ref; } From b6eb189743b2791f0109c5774eecb96fb77df169 Mon Sep 17 00:00:00 2001 From: Simon Mitternacht Date: Sun, 5 Sep 2021 13:03:31 +0200 Subject: [PATCH 06/11] Remove unused code. --- src/main.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main.cc b/src/main.cc index 45f25f3..590b621 100644 --- a/src/main.cc +++ b/src/main.cc @@ -260,7 +260,6 @@ exit_with_help(void) static std::vector get_structures(std::FILE *input, - const char *name, int *n, const struct cli_state *state) { @@ -332,7 +331,7 @@ run_analysis(FILE *input, if (name_i == NULL) abort_msg("memory failure"); /* read PDB file */ - structures = get_structures(input, name, &n, state); + structures = get_structures(input, &n, state); if (n == 0) abort_msg("invalid input"); /* perform calculation on each structure */ From 355a2458c258c2fd4ae2e76e83f6e3b1f79bf8fa Mon Sep 17 00:00:00 2001 From: Simon Mitternacht Date: Sat, 11 Sep 2021 11:13:03 +0200 Subject: [PATCH 07/11] Allow freesasa_structure_free to release CIF doc via function pointer. --- src/cif.cc | 12 +++---- src/cif.hh | 3 -- src/freesasa_internal.h | 6 +++- src/main.cc | 71 +++++++++++++++++++++-------------------- src/structure.c | 17 ++++++++-- 5 files changed, 62 insertions(+), 47 deletions(-) diff --git a/src/cif.cc b/src/cif.cc index 3478c87..0b37bca 100644 --- a/src/cif.cc +++ b/src/cif.cc @@ -199,6 +199,11 @@ generate_gemmi_doc(std::FILE *input) return std::make_pair(std::ref(docs[my_idx]), my_idx); } +static void release_gemmi_doc(size_t doc_ref) +{ + docs.erase(doc_ref); +} + freesasa_structure * freesasa_structure_from_cif(std::FILE *input, const freesasa_classifier *classifier, @@ -218,7 +223,7 @@ freesasa_structure_from_cif(std::FILE *input, } auto structure = structure_from_pred(doc_idx_pair.first, *discriminator, classifier, structure_options); - freesasa_structure_set_cif_ref(structure, doc_idx_pair.second); + freesasa_structure_set_cif_ref(structure, doc_idx_pair.second, &release_gemmi_doc); return structure; } @@ -770,8 +775,3 @@ int freesasa_export_tree_to_cif(const char *filename, return ret; } - -void freesasa_cif_clear_docs() -{ - docs.clear(); -} diff --git a/src/cif.hh b/src/cif.hh index d0ffe2b..03f9052 100644 --- a/src/cif.hh +++ b/src/cif.hh @@ -50,7 +50,4 @@ freesasa_cif_structure_array(std::FILE *input, int freesasa_export_tree_to_cif(const char *filename, freesasa_node *root); -/// Clear map of stored cif docs. After it is cleared -void freesasa_cif_clear_docs(); - #endif /* CIF_HH */ diff --git a/src/freesasa_internal.h b/src/freesasa_internal.h index a4ffcbf..2db7069 100644 --- a/src/freesasa_internal.h +++ b/src/freesasa_internal.h @@ -267,11 +267,15 @@ freesasa_structure_atom_pdb_line(const freesasa_structure *structure, @param structure A structure. @param ref Reference number for document. >= 1. + @param release_func A function that can be called when freeing the structure, to trigger + the destructor for the CIF document. This can be NULL if one wants another mode + of control over the CIF document. @return ::FREESASA_SUCCESS if success. ::FREESASA_FAIL if memory allocation error. */ void freesasa_structure_set_cif_ref(freesasa_structure *structure, - size_t ref); + size_t ref, + void (*release_func)(size_t)); /** Retrieve the reference number for the CIF document used to generate the structure diff --git a/src/main.cc b/src/main.cc index 590b621..5b6f7cc 100644 --- a/src/main.cc +++ b/src/main.cc @@ -260,28 +260,26 @@ exit_with_help(void) static std::vector get_structures(std::FILE *input, - int *n, const struct cli_state *state) { - int i, j, n2; + int n = 0, n2 = 0; std::vector structures; freesasa_structure *tmp; - *n = 0; if ((state->structure_options & FREESASA_SEPARATE_CHAINS) || (state->structure_options & FREESASA_SEPARATE_MODELS)) { if (state->cif) { - structures = freesasa_cif_structure_array(input, n, state->classifier, state->structure_options); + structures = freesasa_cif_structure_array(input, &n, state->classifier, state->structure_options); } else { // TODO this hack needed since PDB implementation is in C - freesasa_structure **db_ptr_structs = freesasa_structure_array(input, n, state->classifier, state->structure_options); - structures.reserve(*n); - for (i = 0; i < *n; ++i) { + freesasa_structure **db_ptr_structs = freesasa_structure_array(input, &n, state->classifier, state->structure_options); + structures.reserve(n); + for (int i = 0; i < n; ++i) { structures.push_back(std::move(db_ptr_structs[i])); } } } else { - *n = 1; + n = 1; if (state->cif) { structures.emplace_back(freesasa_structure_from_cif(input, state->classifier, state->structure_options)); } else { @@ -294,9 +292,9 @@ get_structures(std::FILE *input, /* get chain-groups (if requested) */ if (state->n_chain_groups > 0) { - n2 = *n; - for (i = 0; i < state->n_chain_groups; ++i) { - for (j = 0; j < *n; ++j) { + n2 = n; + for (int i = 0; i < state->n_chain_groups; ++i) { + for (int j = 0; j < n; ++j) { // TODO make this function pdb and cif compatible. tmp = freesasa_structure_get_chains(structures[j], state->chain_groups[i], state->classifier, state->structure_options); @@ -309,38 +307,37 @@ get_structures(std::FILE *input, } } } - *n = n2; + n = n2; + } + + if (n == 0) { + abort_msg("invalid input"); } + return structures; } static freesasa_node * -run_analysis(FILE *input, +run_analysis(std::vector structures, const char *name, const struct cli_state *state) { int name_len = strlen(name); - std::vector structures; freesasa_node *tree = freesasa_tree_new(), *tmp_tree, *structure_node; const freesasa_result *result; freesasa_selection *sel; - int n = 0, i, c; char *name_i = (char *)malloc(name_len + 10); if (tree == NULL) abort_msg("failed to initialize result-tree"); if (name_i == NULL) abort_msg("memory failure"); - /* read PDB file */ - structures = get_structures(input, &n, state); - if (n == 0) abort_msg("invalid input"); - /* perform calculation on each structure */ - for (i = 0; i < n; ++i) { + for (auto structure : structures) { strcpy(name_i, name); - if (n > 1 && (state->structure_options & FREESASA_SEPARATE_MODELS)) - sprintf(name_i + strlen(name_i), ":%d", freesasa_structure_model(structures[i])); + if (structures.size() > 1 && (state->structure_options & FREESASA_SEPARATE_MODELS)) + sprintf(name_i + strlen(name_i), ":%d", freesasa_structure_model(structure)); - tmp_tree = freesasa_calc_tree(structures[i], &state->parameters, name_i); + tmp_tree = freesasa_calc_tree(structure, &state->parameters, name_i); if (tmp_tree == NULL) abort_msg("can't calculate SASA"); structure_node = @@ -349,8 +346,8 @@ run_analysis(FILE *input, /* Calculate selections for each structure */ if (state->n_select > 0) { - for (c = 0; c < state->n_select; ++c) { - sel = freesasa_selection_new(state->select_cmd[c], structures[i], result); + for (int c = 0; c < state->n_select; ++c) { + sel = freesasa_selection_new(state->select_cmd[c], structure, result); if (sel != NULL) { freesasa_node_structure_add_selection(structure_node, sel); } else { @@ -363,8 +360,6 @@ run_analysis(FILE *input, if (freesasa_tree_join(tree, &tmp_tree) != FREESASA_SUCCESS) { abort_msg("failed joining result-trees"); } - - freesasa_structure_free(structures[i]); } return tree; @@ -389,11 +384,10 @@ state_add_chain_groups(const char *cmd, struct cli_state *state) int err = 0; char *str; const char *token; - size_t i; char a; /* check that string is valid */ - for (i = 0; i < strlen(cmd); ++i) { + for (size_t i = 0; i < strlen(cmd); ++i) { a = cmd[i]; if (a != '+' && !(a >= 'a' && a <= 'z') && !(a >= 'A' && a <= 'Z') && @@ -732,7 +726,7 @@ int main(int argc, { struct cli_state state; FILE *input = NULL; - int optind = 0, i, ret; + int optind = 0, ret; freesasa_node *tree = freesasa_tree_new(), *tmp; if (tree == NULL) abort_msg("error initializing calculation"); @@ -740,30 +734,37 @@ int main(int argc, init_state(&state); optind = parse_arg(argc, argv, &state); + std::vector structures; if (argc > optind) { - for (i = optind; i < argc; ++i) { + for (int i = optind; i < argc; ++i) { input = fopen_werr(argv[i], "r"); - tmp = run_analysis(input, argv[i], &state); + structures = get_structures(input, &state); + tmp = run_analysis(structures, argv[i], &state); freesasa_tree_join(tree, &tmp); fclose(input); } } else { if (!isatty(STDIN_FILENO)) { - tmp = run_analysis(stdin, "stdin", &state); + structures = get_structures(stdin, &state); + tmp = run_analysis(structures, "stdin", &state); freesasa_tree_join(tree, &tmp); - } else + } else { abort_msg("no input", program_name); + } } if (state.output_format & FREESASA_CIF) { ret = freesasa_export_tree_to_cif(state.output_filename, tree); - freesasa_cif_clear_docs(); } else { ret = freesasa_tree_export(state.output, tree, state.output_format | state.output_depth | (state.no_rel ? FREESASA_OUTPUT_SKIP_REL : 0)); } freesasa_node_free(tree); + for (auto structure : structures) { + freesasa_structure_free(structure); + } + release_state(&state); if (ret == FREESASA_FAIL) { diff --git a/src/structure.c b/src/structure.c index 16d0cef..d300bfd 100644 --- a/src/structure.c +++ b/src/structure.c @@ -79,6 +79,7 @@ struct freesasa_structure { coord_t *xyz; int model; /* model number */ size_t cif_ref; + void (*release_cif_ref)(size_t); }; static int @@ -369,6 +370,7 @@ freesasa_structure_new(void) s->model = 1; s->classifier_name = NULL; s->cif_ref = 0; + s->release_cif_ref = 0; if (s->xyz == NULL) goto memerr; @@ -385,8 +387,17 @@ void freesasa_structure_free(freesasa_structure *s) atoms_dealloc(&s->atoms); residues_dealloc(&s->residues); chains_dealloc(&s->chains); - if (s->xyz != NULL) freesasa_coord_free(s->xyz); + + if (s->xyz != NULL) { + freesasa_coord_free(s->xyz); + } + free(s->classifier_name); + + if (s->cif_ref > 0 && s->release_cif_ref != NULL) { + s->release_cif_ref(s->cif_ref); + } + free(s); } } @@ -1217,10 +1228,12 @@ void freesasa_structure_set_model(freesasa_structure *structure, } void freesasa_structure_set_cif_ref(freesasa_structure *structure, - size_t ref) + size_t ref, + void (*release_func)(size_t)) { assert(structure); structure->cif_ref = ref; + structure->release_cif_ref = release_func; } size_t freesasa_structure_cif_ref(const freesasa_structure *structure) From 89020b21b4ceef38069ab0d7c82ba3265d6e3aed Mon Sep 17 00:00:00 2001 From: Simon Mitternacht Date: Sat, 11 Sep 2021 11:42:37 +0200 Subject: [PATCH 08/11] Add test that checks for CIF output idempotency. (Broken) --- tests/test-cli.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test-cli.in b/tests/test-cli.in index 313d2cb..d167915 100644 --- a/tests/test-cli.in +++ b/tests/test-cli.in @@ -325,6 +325,10 @@ assert_pass "test '$rel' = 'N/A N/A N/A N/A N/A'" # deprecated assert_pass "$cli --rsa $datadir/1ubq.pdb > $dump" +echo "== Verify idempotency of CIF format ==" +# does not work yet +# assert_equal_opt "$cli --cif --format=cif $datadir/1ubq.cif" "" " | $cli --cif --format=cif" + # XML // very basic testing, just to make sure XML is valid and that the right tags are present if [[ use_xml -eq 1 ]] ; then echo From d290ce9119a3b554c42f644cf620792b2fe13dbd Mon Sep 17 00:00:00 2001 From: Danny Diaz Date: Wed, 20 Oct 2021 17:02:52 -0500 Subject: [PATCH 09/11] Implemented idempotency for the freesasa tables --- src/cif.cc | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/cif.cc b/src/cif.cc index 0b37bca..33d07b6 100644 --- a/src/cif.cc +++ b/src/cif.cc @@ -684,6 +684,28 @@ rewrite_atom_site(gemmi::cif::Table &table, } } +static int +reset_freesasa_tables(gemmi::cif::Block &block) +{ + std::vector result_tables{ + block.find_mmcif_category("_freeSASA_results."), + block.find_mmcif_category("_freeSASA_rsa."), + block.find_mmcif_category("_freeSASA_parameters.")}; + + for (auto &table : result_tables) { + if (table.ok()) { + // Table exist. Making sure its a loop. + if (!table.loop_item) { + // Table is a pair so turning it into a loop + table.convert_pair_to_loop(); + } + table.get_loop()->clear(); + } + } + + return FREESASA_SUCCESS; +} + static int write_result(std::ostream &out, freesasa_node *root) { @@ -704,6 +726,10 @@ write_result(std::ostream &out, freesasa_node *root) auto &block = docs[cif_ref].sole_block(); + if (cif_ref != prev_cif_ref) { + reset_freesasa_tables(block); + } + auto table = block.find("_atom_site.", atom_site_columns); if (prev_cif_ref != cif_ref) { sasa_vals = std::vector{table.length(), "?"}; From ad59c039b9df1768e2ae88a09094719e26b30ae4 Mon Sep 17 00:00:00 2001 From: Simon Mitternacht Date: Sat, 23 Oct 2021 12:25:50 +0200 Subject: [PATCH 10/11] Make --separate-models work again for CIF output. Add test for CIF idempotency. --- src/cif.cc | 17 ++++++++++------- src/main.cc | 2 ++ tests/test-cli.in | 9 ++++----- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/cif.cc b/src/cif.cc index 33d07b6..d700588 100644 --- a/src/cif.cc +++ b/src/cif.cc @@ -259,9 +259,8 @@ freesasa_cif_structure_array(std::FILE *input, std::vector ss; - // We don't care about the index here, for now, since exporting structure - // arrays to CIF output is not supported. - auto doc = generate_gemmi_doc(input).first; + auto doc_idx_pair = generate_gemmi_doc(input); + auto doc = doc_idx_pair.first; gemmi::Structure gemmi_struct = gemmi::make_structure_from_block(doc.blocks[0]); @@ -297,6 +296,7 @@ freesasa_cif_structure_array(std::FILE *input, ss.push_back(std::move(structure)); freesasa_structure_set_model(ss.back(), i + 1); + // we don't set cif_ref here because output is not defined for --separate-chains } } if (n_chains == 0) @@ -309,9 +309,13 @@ freesasa_cif_structure_array(std::FILE *input, ss.emplace_back( structure_from_model(doc, models[i].name, classifier, options)); freesasa_structure_set_model(ss.back(), i + 1); + freesasa_structure_set_cif_ref(ss.back(), doc_idx_pair.second, NULL); } *n = n_models; } + + // this is a hack, we only want to release docs once per input + freesasa_structure_set_cif_ref(ss.back(), doc_idx_pair.second, &release_gemmi_doc); return ss; } @@ -718,10 +722,9 @@ write_result(std::ostream &out, freesasa_node *root) while (result) { cif_ref = freesasa_node_structure_cif_ref(freesasa_node_children(result)); if (docs.find(cif_ref) == docs.end()) { - freesasa_warn("In %s(), unable to find gemmi doc for result node: %s. Skipping...", - __func__, freesasa_node_name(result)); - result = freesasa_node_next(result); - continue; + return freesasa_fail("In %s(), unable to find gemmi doc for result node: %s." + "This can happen when using the --separate-chains option", + __func__, freesasa_node_name(result)); } auto &block = docs[cif_ref].sole_block(); diff --git a/src/main.cc b/src/main.cc index 5b6f7cc..46d2b1c 100644 --- a/src/main.cc +++ b/src/main.cc @@ -713,6 +713,8 @@ parse_arg(int argc, char **argv, struct cli_state *state) if (state->output_format == FREESASA_CIF && state->cif != 1) abort_msg("CIF output can not be generated from .pdb input"); if (state->output_format == FREESASA_PDB && state->cif == 1) abort_msg("PDB output can not be generated from .cif input."); + if (state->output_format == FREESASA_CIF && state->structure_options & FREESASA_SEPARATE_CHAINS) + abort_msg("Cannon output a cif file with --separate-chains"); if ((state->output_format == FREESASA_CIF || state->output_format == FREESASA_PDB) && state->structure_options & FREESASA_SEPARATE_CHAINS && state->structure_options & FREESASA_SEPARATE_MODELS) diff --git a/tests/test-cli.in b/tests/test-cli.in index d167915..435bdb3 100644 --- a/tests/test-cli.in +++ b/tests/test-cli.in @@ -62,7 +62,7 @@ function assert_equal_opt tmp2=tmp/tmp2 eval $1 $2 > $tmp1 2>/dev/null eval $1 $3 > $tmp2 2>/dev/null - diff $tmp1 $tmp2 + diff -B $tmp1 $tmp2 if [[ $? -ne 0 ]]; then echo Error: \"$1\" gives different results with arguments \"$2\" and \"$3\" let errors=errors+1 @@ -234,12 +234,12 @@ assert_equal_total "$cli" "$datadir/3bkr.pdb" "$datadir/3bkr.cif --cif" assert_equal_total "$cli -w" "$datadir/3gnn.pdb" "$datadir/3gnn.cif --cif" assert_equal_total "$cli --join-models" "$datadir/2jo4.pdb" "$datadir/2jo4.cif --cif" assert_equal_total "$cli --chain-groups AB+CD -S -n 10" "$datadir/2jo4.pdb" "$datadir/2jo4.cif --cif" -assert_pass "$cli --cif --separate-chain --format=cif $datadir/1ubq.cif > $dump" assert_pass "$cli --cif --separate-models --format=cif $datadir/1ubq.cif > $dump" assert_pass "$cli --cif --format=cif $datadir/1ubq.cif > $dump" +assert_fail "$cli --cif --format=cif --separate-chains $datadir/1ubq.cif > $dump" assert_fail "$cli --cif --format=pdb $datadir/1ubq.cif > $dump" assert_fail "$cli --format=cif $datadir/1ubq.pdb > $dump" -assert_fail "$cli --cif --format=cif --separate-chains --separate-models > $dump" +assert_fail "$cli --cif --format=cif --separate-chains --separate-models $datadir/1ubq.cif > $dump" echo echo "== Testing --separate-chains and --separate-models output are equal between cif and pdb" @@ -326,8 +326,7 @@ assert_pass "test '$rel' = 'N/A N/A N/A N/A N/A'" assert_pass "$cli --rsa $datadir/1ubq.pdb > $dump" echo "== Verify idempotency of CIF format ==" -# does not work yet -# assert_equal_opt "$cli --cif --format=cif $datadir/1ubq.cif" "" " | $cli --cif --format=cif" +assert_equal_opt "$cli --cif --format=cif $datadir/1ubq.cif" "" " | $cli --cif --format=cif" # XML // very basic testing, just to make sure XML is valid and that the right tags are present if [[ use_xml -eq 1 ]] ; then From cb6f6d5c8467cda4046a1cbc09c92bb0bd378dfb Mon Sep 17 00:00:00 2001 From: Simon Mitternacht Date: Sat, 23 Oct 2021 14:01:25 +0200 Subject: [PATCH 11/11] Allow --separate-chains and --separate-models, separately and combined, for CIF output. --- src/cif.cc | 4 ++-- src/main.cc | 4 +--- tests/test-cli.in | 8 ++++++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/cif.cc b/src/cif.cc index d700588..91a912e 100644 --- a/src/cif.cc +++ b/src/cif.cc @@ -260,7 +260,7 @@ freesasa_cif_structure_array(std::FILE *input, std::vector ss; auto doc_idx_pair = generate_gemmi_doc(input); - auto doc = doc_idx_pair.first; + auto &doc = doc_idx_pair.first; gemmi::Structure gemmi_struct = gemmi::make_structure_from_block(doc.blocks[0]); @@ -296,7 +296,7 @@ freesasa_cif_structure_array(std::FILE *input, ss.push_back(std::move(structure)); freesasa_structure_set_model(ss.back(), i + 1); - // we don't set cif_ref here because output is not defined for --separate-chains + freesasa_structure_set_cif_ref(ss.back(), doc_idx_pair.second, NULL); } } if (n_chains == 0) diff --git a/src/main.cc b/src/main.cc index 46d2b1c..8b09305 100644 --- a/src/main.cc +++ b/src/main.cc @@ -713,9 +713,7 @@ parse_arg(int argc, char **argv, struct cli_state *state) if (state->output_format == FREESASA_CIF && state->cif != 1) abort_msg("CIF output can not be generated from .pdb input"); if (state->output_format == FREESASA_PDB && state->cif == 1) abort_msg("PDB output can not be generated from .cif input."); - if (state->output_format == FREESASA_CIF && state->structure_options & FREESASA_SEPARATE_CHAINS) - abort_msg("Cannon output a cif file with --separate-chains"); - if ((state->output_format == FREESASA_CIF || state->output_format == FREESASA_PDB) && + if (state->output_format == FREESASA_PDB && state->structure_options & FREESASA_SEPARATE_CHAINS && state->structure_options & FREESASA_SEPARATE_MODELS) abort_msg("Cannot output a cif/pdb file with both --separate-chains and --separate-models set. Pick one."); diff --git a/tests/test-cli.in b/tests/test-cli.in index 435bdb3..1c67af9 100644 --- a/tests/test-cli.in +++ b/tests/test-cli.in @@ -236,10 +236,10 @@ assert_equal_total "$cli --join-models" "$datadir/2jo4.pdb" "$datadir/2jo4.cif - assert_equal_total "$cli --chain-groups AB+CD -S -n 10" "$datadir/2jo4.pdb" "$datadir/2jo4.cif --cif" assert_pass "$cli --cif --separate-models --format=cif $datadir/1ubq.cif > $dump" assert_pass "$cli --cif --format=cif $datadir/1ubq.cif > $dump" -assert_fail "$cli --cif --format=cif --separate-chains $datadir/1ubq.cif > $dump" +assert_pass "$cli --cif --format=cif --separate-chains $datadir/1ubq.cif > $dump" +assert_pass "$cli --cif --format=cif --separate-chains --separate-models $datadir/1ubq.cif > $dump" assert_fail "$cli --cif --format=pdb $datadir/1ubq.cif > $dump" assert_fail "$cli --format=cif $datadir/1ubq.pdb > $dump" -assert_fail "$cli --cif --format=cif --separate-chains --separate-models $datadir/1ubq.cif > $dump" echo echo "== Testing --separate-chains and --separate-models output are equal between cif and pdb" @@ -327,6 +327,10 @@ assert_pass "$cli --rsa $datadir/1ubq.pdb > $dump" echo "== Verify idempotency of CIF format ==" assert_equal_opt "$cli --cif --format=cif $datadir/1ubq.cif" "" " | $cli --cif --format=cif" +assert_equal_opt "$cli --cif --format=cif --separate-chains $datadir/2jo4.cif" "" " | $cli --separate-chains --cif --format=cif" +assert_equal_opt "$cli --cif --format=cif --separate-models $datadir/2jo4.cif" "" " | $cli --separate-models --cif --format=cif" +assert_equal_opt "$cli --cif --format=cif --separate-models --separate-chains $datadir/2jo4.cif" "" \ + " | $cli --separate-models --separate-chains --cif --format=cif" # XML // very basic testing, just to make sure XML is valid and that the right tags are present if [[ use_xml -eq 1 ]] ; then