Skip to content

Commit

Permalink
Merge pull request #1139 from dsenalik/20201217sbo__database_cross_re…
Browse files Browse the repository at this point in the history
…ference

Tripal field sbo__database_cross_reference timeout fix
  • Loading branch information
laceysanderson committed Jan 31, 2021
2 parents 286d823 + c9bff32 commit bdb0233
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 61 deletions.
81 changes: 61 additions & 20 deletions tripal_chado/api/modules/tripal_chado.pub.api.inc
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ function chado_get_publication($identifiers, $options = []) {
}
else {
tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
"chado_get_publication: The dbxref identifier is not correctly formatted.",
"chado_get_publication: The dbxref identifier is not correctly formatted. Identifiers passed: %identifier.",
['%identifier' => print_r($identifiers, TRUE)]
);
}
Expand Down Expand Up @@ -1169,6 +1169,47 @@ function chado_pub_create_citation($pub) {
return $citation;
}

/**
* Retrieves an array with all database cross references
*
* Implemented as SQL for performance reasons because chado_expand_var
* can take too long as it loads more information than needed
*
* @param $pub_id
* A pub_id from the 'chado.pub' table
*
* @return
* An array of records with the following keys: 'accession', 'version',
* 'description', 'name', 'url', 'urlprefix'.
* These are the column names from the 'dbxref' and 'db' tables
*
* @ingroup tripal_pub_api
*/
function chado_get_pub_dbxrefs($pub_id) {
$fkey = 'pub_id'; // Should this be looked up in the schema?
$options = ['return_array' => 1];
$sql = "SELECT REF.accession, REF.version, REF.description, DB.name, DB.url, DB.urlprefix "
. "FROM {pub_dbxref} LINK "
. "INNER JOIN {dbxref} REF on LINK.dbxref_id = REF.dbxref_id "
. "INNER JOIN {db} DB on REF.db_id = DB.db_id "
. "WHERE LINK.$fkey = :pub_id";
$args = [':pub_id' => $pub_id];
$records = chado_query($sql, $args);

$results = [];
$delta = 0;
while($record = $records->fetchObject()) {
$results[$delta]['accession'] = $record->accession;
$results[$delta]['version'] = $record->version;
$results[$delta]['description'] = $record->description;
$results[$delta]['name'] = $record->name;
$results[$delta]['url'] = $record->url;
$results[$delta]['urlprefix'] = $record->urlprefix;
$delta++;
}
return $results;
}

/**
* Retrieves the minimal information to uniquely describe any publication.
*
Expand Down Expand Up @@ -1238,17 +1279,24 @@ function chado_get_minimal_pub_info($pub) {
}
}

// Load all database cross references.
$pub_dbxrefs = chado_get_pub_dbxrefs($pub->pub_id);

// Get the first database cross-reference with a url.
$options = ['return_array' => 1];
$pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
$dbxref = NULL;
if ($pub->pub_dbxref) {
foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
$dbxref = $pub_dbxref->dbxref_id;
}
}
}
// it is not clear what this was doing, it would have retrieved the last not the first
// dbxref with a url, but the variable $dbxref is not referenced later. It could have
// added information to $pub, but that does not appear to be referenced later.
// chado_expand_var() can sometimes take a long time to execute, so just remove it?
// $options = ['return_array' => 1];
// $pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
// $dbxref = NULL;
// if ($pub->pub_dbxref) {
// foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
// if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
// $dbxref = $pub_dbxref->dbxref_id;
// }
// }
// }

// Get the URL.
$values = [
Expand All @@ -1270,17 +1318,10 @@ function chado_get_minimal_pub_info($pub) {
}
}

// Get the list of database cross references.
$values = [
'pub_id' => $pub->pub_id,
];
$options = [
'return_array' => 1,
];
$pub_dbxrefs = chado_generate_var('pub_dbxref', $values, $options);
// Generate a list of database cross references formatted as "DB:accession".
$dbxrefs = [];
foreach ($pub_dbxrefs as $pub_dbxref) {
$dbxrefs[] = $pub_dbxref->dbxref_id->db_id->name . ':' . $pub_dbxref->dbxref_id->accession;
$dbxrefs[] = $pub_dbxref['name'] . ':' . $pub_dbxref['accession'];
}

// Get the citation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ class sbo__database_cross_reference extends ChadoField {
// type. This will create form elements when editing the field instance
// to allow the site admin to change the term settings above.
'term_fixed' => FALSE,
// The number of items to show on a page.
'items_per_page' => 10,
// Limit to the number of items to show in cases of large number of cross references.
'max_items' => 10000,
];

// The default widget for this field.
Expand Down Expand Up @@ -138,33 +142,39 @@ class sbo__database_cross_reference extends ChadoField {

$linker_table = $base_table . '_dbxref';
$options = ['return_array' => 1];
$record = chado_expand_var($record, 'table', $linker_table, $options);
if (property_exists($record, $linker_table) and is_array($record->$linker_table) and count($record->$linker_table) > 0) {
$i = 0;
foreach ($record->$linker_table as $index => $linker) {
$dbxref = $linker->dbxref_id;

// Ignore the GFF_source database. This is a weird thing required by
// GBrowse and is added by the GFF loader. We don't want to show it.
if ($dbxref->db_id->name == 'GFF_source') {
continue;
}

$URL = chado_get_dbxref_url($dbxref);
$entity->{$field_name}['und'][$i] = [
'value' => [
$dbname_term => $dbxref->db_id->name,
$accession_term => $dbxref->accession,
$dburl_term => $URL,
],
'chado-' . $field_table . '__' . $pkey => $linker->$pkey,
'chado-' . $field_table . '__' . $fkey_lcolumn => $linker->$fkey_lcolumn->$fkey_lcolumn,
'chado-' . $field_table . '__dbxref_id' => $dbxref->dbxref_id,
'db_id' => $dbxref->db_id->db_id,
'accession' => $dbxref->accession,
];
$i++;

// Build the SQL to find records associated with this publication.
$max_items = array_key_exists('max_items', $this->instance['settings']) ? $this->instance['settings']['max_items'] : 10000;
$sql = "SELECT REF.accession, DB.name, DB.urlprefix "
. "FROM {".$linker_table."} LINK "
. "INNER JOIN {dbxref} REF on LINK.dbxref_id = REF.dbxref_id "
. "INNER JOIN {db} DB on REF.db_id = DB.db_id "
. "WHERE LINK.$fkey_lcolumn = :id "
// Ignore the GFF_source database. This is a weird thing required by
// GBrowse and is added by the GFF loader. We don't want to show it.
. "AND NOT DB.name = 'GFF_source' "
. "ORDER BY REF.accession " // if we hit the limit, the subset should be consistent
. "LIMIT :limit";
$args = [':id' => $entity->{'chado_record_id'},
':limit' => $max_items + 1];
$records = chado_query($sql, $args);

// Store the query results
$delta = 0;
while($record = $records->fetchObject()) {
// Need this check to detect the case where the limit exactly equals the number of records
if ($delta < $max_items) {
$entity->{$field_name}['und'][$delta] = [];
$entity->{$field_name}['und'][$delta]['value'][$dbname_term] = $record->name;
$entity->{$field_name}['und'][$delta]['value'][$accession_term] = $record->accession;
$entity->{$field_name}['und'][$delta]['value'][$dburl_term] = $record->urlprefix;
}
$delta++;
}
// Display a warning if we have exceeded the maximum number of cross references
if ( $delta > $max_items ) {
$entity->{$field_name}['und'][$delta]['value'][$dbname_term] = 'Note';
$entity->{$field_name}['und'][$delta]['value'][$accession_term] = "Only the first $max_items cross references are shown";
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ class sbo__database_cross_reference_formatter extends ChadoFieldFormatter {
public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
$content = '';

// Do we have an empty list? If so, just return.
if (!$items[0]['value']) {
return;
}

$field_name = $this->field['field_name'];
$field_type = $this->field['type'];
$field_table = $this->instance['settings']['chado_table'];
Expand All @@ -26,27 +31,83 @@ class sbo__database_cross_reference_formatter extends ChadoFieldFormatter {
$accession_term = chado_get_semweb_term('dbxref', 'accession');
$dburl_term = chado_get_semweb_term('db', 'url');

// First, organize the values by their databases.
$ordered_items = [];
foreach ($items as $delta => $item) {
if (!$item['value']) {
continue;
$db = $item['value'][$dbname_term];
$accession = $item['value'][$accession_term];
// It is possible that a database does not have a url, in which case no link can be generated.
if (array_key_exists($dburl_term, $item['value']) and $item['value'][$dburl_term]) {
$url = $item['value'][$dburl_term];

// This emulates chado_get_dbxref_url() but implemented inline here for better performance.
$db_count = 0;
$acc_count = 0;
$url = preg_replace('/\{db\}/', $db, $url, -1, $acc_count);
$url = preg_replace('/\{accession\}/', $accession, $url, -1, $acc_count);

$content = l($accession, $url, ['attributes' => ['target' => '_blank']]);
}
$content = $item['value'][$dbname_term] . ':' . $item['value'][$accession_term];
if ($item['value'][$dburl_term]) {
$dbxref = chado_get_dbxref(['dbxref_id' => $item['chado-' . $linker_table . '__dbxref_id']]);
$url = chado_get_dbxref_url($dbxref);
$content = l($content, $url, ['attributes' => ['target' => '_blank']]);
else {
$content = $accession;
}
$element[$delta] = [
'#type' => 'markup',
'#markup' => $content,
];
$ordered_items[ucfirst($db)][] = $content;
}

if (count($element) == 0) {
$element[0] = [
'#type' => 'markup',
'#markup' => 'There are no cross references.',
// Reorder the list so it's compatible with theming a list.
ksort($ordered_items);

// Generate the pagers for each type.
$list_items = [];
$headers = [];
$rows = [];
foreach ($ordered_items as $type => $children) {
$items_per_page = array_key_exists('items_per_page', $this->instance['settings']) ? $this->instance['settings']['items_per_page'] : 10;
$total_records = count($children);
$total_pages = (int) ($total_records / $items_per_page) + 1;
$pelement = 0;
$current_page = pager_default_initialize($total_records, $items_per_page, $pelement);
$pager = theme('pager', [
'tags' => [],
'element' => $pelement,
'parameters' => [],
'quantity' => 5,
]);
$pager = $this->ajaxifyPager($pager, $entity);
$page_items = array_chunk($children, $items_per_page);

$rows[] = [
[
'data' => ucfirst($type),
'header' => TRUE,
'width' => '20%',
],
theme_item_list([
'items' => $page_items[$current_page],
'title' => '',
'type' => 'ul',
'attributes' => [],
]) . $pager,
];
}

$table = [
'header' => [],
'rows' => $rows,
'attributes' => [
'id' => 'sbo__database_cross_reference',
'class' => 'tripal-data-table',
],
'sticky' => FALSE,
'caption' => "",
'colgroups' => [],
'empty' => 'There are no cross references.',
];
$content = theme_table($table);
$element[0] = [
'#type' => 'markup',
'#markup' => $content,
];

}
}

0 comments on commit bdb0233

Please sign in to comment.