Skip to content

Commit

Permalink
Merge pull request #1228 from srobb1/7.x-3.x
Browse files Browse the repository at this point in the history
Bugfix: OBOImporter.inc: Search before retrieving
  • Loading branch information
spficklin committed Sep 27, 2021
2 parents fe1f70d + 4980bd6 commit 83f11e4
Show file tree
Hide file tree
Showing 3 changed files with 208 additions and 65 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,21 @@ class data__sequence_record extends ChadoField {
];
$seqs = chado_get_feature_sequences(['feature_id' => $feature->feature_id], $options);
$featurelocs = [];

foreach ($seqs as $seq) {
$featureloc = $this->getFeatureLoc($seq['featureloc_id']);
$coords = $this->getSequenceCoords($featureloc);

// SOFIA: If the feature is a srcfeature don't derive sequence from itself
if ($feature->feature_id != $featureloc->srcfeature_id){
// I am not quite sure what the $option is doing, but I am going to set it
// to 0 since there is no seq derived from parent
$options = [
'derive_from_parent' => 0,
];
continue;
}

$entity->{$field_name}['und'][]['value'] = [
$sequence_term => $seq['residues'],
$label_term => 'Derived ' . ucfirst(preg_replace('/_/', ' ', $feature->type_id->name)) . ' Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
Expand Down
143 changes: 104 additions & 39 deletions tripal_chado/includes/TripalImporter/GFF3Importer.inc
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ class GFF3Importer extends TripalImporter {
*/
private $update = TRUE;


/**
* A list of features to have names updated.
*/
private $update_names = [];


/**
* If the GFF file contains a 'Target' attribute then the feature and the
* target will have an alignment created, but to find the proper target
Expand Down Expand Up @@ -580,98 +587,101 @@ class GFF3Importer extends TripalImporter {
$this->openCacheFile();
// Load the GFF3.
try {
$this->logMessage("Step 1 of 26: Caching GFF3 file... ");
$this->logMessage("Step 1 of 27: Caching GFF3 file... ");
$this->parseGFF3();

// Prep the database for necessary records.
$this->prepSynonms();
$this->prepNullPub();
$this->prepDBs();

$this->logMessage("Step 2 of 26: Find existing landmarks... ");
$this->logMessage("Step 2 of 27: Find existing landmarks... ");
$this->findLandmarks();

$this->logMessage("Step 3 of 26: Insert new landmarks (if needed)... ");
$this->logMessage("Step 3 of 27: Insert new landmarks (if needed)... ");
$this->insertLandmarks();

if (!$this->skip_protein) {
$this->logMessage("Step 4 of 26: Find missing proteins... ");
$this->logMessage("Step 4 of 27: Find missing proteins... ");
$this->findMissingProteins();

$this->logMessage("Step 5 of 26: Add missing proteins to list of features... ");
$this->logMessage("Step 5 of 27: Add missing proteins to list of features... ");
$this->addMissingProteins();
}
else {
$this->logMessage("Step 4 of 26: Find missing proteins (Skipped)... ");
$this->logMessage("Step 5 of 26: Add missing proteins to list of features (Skipped)...");
$this->logMessage("Step 4 of 27: Find missing proteins (Skipped)... ");
$this->logMessage("Step 5 of 27: Add missing proteins to list of features (Skipped)...");
}

$this->logMessage("Step 6 of 26: Find existing features... ");
$this->logMessage("Step 6 of 27: Find existing features... ");
$this->findFeatures();

$this->logMessage("Step 7 of 26: Clear attributes of existing features... ");
$this->logMessage("Step 7 of 27: Clear attributes of existing features... ");
$this->deleteFeatureData();

$this->logMessage("Step 8 of 26: Processing !num_features features... ",
$this->logMessage("Step 8 of 27: Processing !num_features features... ",
['!num_features' => number_format(count(array_keys($this->features)))]);
$this->insertFeatures();
$this->logMessage("Step 9 of 27: Processing !num_features feature Names to update... ",
['!num_features' => number_format(count(array_keys($this->update_names)))]);
$this->updateFeatureNames();

$this->logMessage("Step 9 of 26: Get new feature IDs... ");
$this->logMessage("Step 10 of 27: Get new feature IDs... ");
$this->findFeatures();

$this->logMessage("Step 10 of 26: Insert locations... ");
$this->logMessage("Step 11 of 27: Insert locations... ");
$this->insertFeatureLocs();

$this->logMessage("Step 11 of 26: Associate parents and children... ");
$this->logMessage("Step 12 of 27: Associate parents and children... ");
$this->associateChildren();

$this->logMessage("Step 12 of 26: Calculate child ranks... ");
$this->logMessage("Step 13 of 27: Calculate child ranks... ");
$this->calculateChildRanks();

$this->logMessage("Step 13 of 26: Add child-parent relationships... ");
$this->logMessage("Step 14 of 27: Add child-parent relationships... ");
$this->insertFeatureParents();

$this->logMessage("Step 14 of 26: Insert properties... ");
$this->logMessage("Step 15 of 27: Insert properties... ");
$this->insertFeatureProps();

$this->logMessage("Step 15 of 26: Find synonyms (aliases)... ");
$this->logMessage("Step 16 of 27: Find synonyms (aliases)... ");
$this->findSynonyms();

$this->logMessage("Step 16 of 26: Insert new synonyms (aliases)... ");
$this->logMessage("Step 17 of 27: Insert new synonyms (aliases)... ");
$this->insertSynonyms();

$this->logMessage("Step 17 of 26: Insert feature synonyms (aliases)... ");
$this->logMessage("Step 18 of 27: Insert feature synonyms (aliases)... ");
$this->insertFeatureSynonyms();

$this->logMessage("Step 18 of 26: Find cross references... ");
$this->logMessage("Step 19 of 27: Find cross references... ");
$this->findDbxrefs();

$this->logMessage("Step 19 of 26: Insert new cross references... ");
$this->logMessage("Step 20 of 27: Insert new cross references... ");
$this->insertDbxrefs();

$this->logMessage("Step 20 of 26: Get new cross references IDs... ");
$this->logMessage("Step 21 of 27: Get new cross references IDs... ");
$this->findDbxrefs();

$this->logMessage("Step 21 of 26: Insert feature cross references... ");
$this->logMessage("Step 22 of 27: Insert feature cross references... ");
$this->insertFeatureDbxrefs();

$this->logMessage("Step 22 of 26: Insert feature ontology terms... ");
$this->logMessage("Step 23 of 27: Insert feature ontology terms... ");
$this->insertFeatureCVterms();

$this->logMessage("Step 23 of 26: Insert 'derives_from' relationships... ");
$this->logMessage("Step 24 of 27: Insert 'derives_from' relationships... ");
$this->insertFeatureDerivesFrom();

$this->logMessage("Step 24 of 26: Insert Targets... ");
$this->logMessage("Step 25 of 27: Insert Targets... ");
$this->insertFeatureTargets();

$this->logMessage("Step 25 of 26: Associate features with analysis.... ");
$this->logMessage("Step 26 of 27: Associate features with analysis.... ");
$this->insertFeatureAnalysis();

if (!empty($this->residue_index)) {
$this->logMessage("Step 26 of 26: Adding sequences data... ");
$this->logMessage("Step 27 of 27: Adding sequences data... ");
$this->insertFeatureSeqs();
}
$this->logMessage("Step 26 of 26: Adding sequences data (Skipped: none available)...");
$this->logMessage("Step 27 of 27: Adding sequences data (Skipped: none available)...");
}
// On exception, catch the error, clean up the cache file and rethrow
catch (Exception $e) {
Expand Down Expand Up @@ -1048,7 +1058,7 @@ class GFF3Importer extends TripalImporter {
throw new Exception(t('Each feature can only have one "Target" attribute. The feature %uniquename has more than one.',
['%uniquename' => $ret['uniquename']]));
}
# Get the elements of the target.
// Get the elements of the target.
$matches = [];
if (preg_match('/^(.*?)\s+(\d+)\s+(\d+)(\s+[\+|\-])*$/', trim($tags[$tag_name][0]), $matches)) {
$attr_target['name'] = $matches[1];
Expand Down Expand Up @@ -1230,7 +1240,6 @@ class GFF3Importer extends TripalImporter {
// Add the properties and parent.
$ret['properties'] = $attr_others;
$ret['parent'] = $attr_parent;

return $ret;
}

Expand Down Expand Up @@ -1352,6 +1361,7 @@ class GFF3Importer extends TripalImporter {

// The landmark was found, remember it
$this->landmarks[$landmark_name] = $landmark->getID();

return $landmark;
}
/**
Expand All @@ -1361,7 +1371,6 @@ class GFF3Importer extends TripalImporter {
* The line from the GFF file that is the ##sequence-region comment.
*/
private function insertHeaderLandmark($line) {

$region_matches = [];
if (preg_match('/^##sequence-region\s+(\w*?)\s+(\d+)\s+(\d+)$/i', $line, $region_matches)) {
$rid = $region_matches[1];
Expand Down Expand Up @@ -1531,9 +1540,7 @@ class GFF3Importer extends TripalImporter {
}

// Cache the GFF feature details for later lookup.
if (strcmp($gff_feature['uniquename'], $gff_feature['landmark']) != 0) {
$this->cacheFeature($gff_feature);
}
$this->cacheFeature($gff_feature);

// If this feature has a target then we need to add the target as
// new feature for insertion.
Expand Down Expand Up @@ -1840,6 +1847,57 @@ class GFF3Importer extends TripalImporter {
}
}


/**
* UPDATES the name of feature records in Chado.
*/
private function updateFeatureNames() {
$batch_size = 1000;
$num_features = count(array_keys($this->update_names));
$num_batches = (int) ($num_features / $batch_size) + 1;

$this->setItemsHandled(0);
$this->setTotalItems($num_batches);
// Batch update: https://www.alibabacloud.com/blog/how-does-postgresql-implement-batch-update-deletion-and-insertion_596030
$init_sql = "UPDATE {feature}
SET name=tmp.name from (values\n";

$fin_sql = ") as tmp (name,feature_id) where {feature}.feature_id::text=tmp.feature_id\n";


$i = 0;
$total = 0;
$batch_num = 1;
$sql = '';
$args = [];
foreach ($this->update_names as $feature_id => $new_name){

$total++;
$i++;
// Only do an update if this feature already exist in the database and is flagged for update.
// TO DO: make is_obsolute updatable. Make sure to add is_obsolute collection to cached feature
$sql .= "(:name_$i, :feature_id_$i),\n";
$args[":name_$i"] = $new_name;
$args[":feature_id_$i"] = $feature_id;

// If we've reached the size of the batch then let's do the insert.
if ($i == $batch_size or $total == $num_features) {
if (count($args) > 0) {
$sql = rtrim($sql, ",\n");
$sql = $init_sql . $sql . $fin_sql;
chado_query($sql, $args);
}
$this->setItemsHandled($batch_num);
$batch_num++;
// Now reset all of the variables for the next batch.
$sql = '';
$i = 0;
$args = [];
}
}
}


/**
* Check if the features exist in the database.
*/
Expand All @@ -1851,7 +1909,7 @@ class GFF3Importer extends TripalImporter {
$this->setItemsHandled(0);
$this->setTotalItems($num_batches);

$sql = "SELECT uniquename, type_id, organism_id, feature_id FROM {feature} WHERE uniquename in (:uniquenames)";
$sql = "SELECT uniquename, name, type_id, organism_id, feature_id FROM {feature} WHERE uniquename in (:uniquenames)";
$i = 0;
$total = 0;
$batch_num = 1;
Expand Down Expand Up @@ -1881,6 +1939,13 @@ class GFF3Importer extends TripalImporter {
}
if ($matched_type_id == $f->type_id and $matched_organism_id == $f->organism_id) {
$this->features[$f->uniquename]['feature_id'] = $f->feature_id;
$this->features[$f->uniquename]['name'] = $f->name;
// Checking to see if the name has changed and therefore needs updating
if ($f->name != $matched_feature['name']) {
// Yes. we need to update name of this feature.
// Adding flag to cached feature that indicates updated needed.
$this->update_names[$f->feature_id] = $matched_feature['name'];
}
}
}
}
Expand Down Expand Up @@ -2082,6 +2147,7 @@ class GFF3Importer extends TripalImporter {
*
*/
private function findDbxrefs() {

$batch_size = 1000;
$num_dbxrefs = count(array_keys($this->dbxref_lookup));
$num_batches = (int) ($num_dbxrefs / $batch_size) + 1;
Expand Down Expand Up @@ -2240,6 +2306,7 @@ class GFF3Importer extends TripalImporter {
*
*/
private function insertDbxrefs() {

$batch_size = 1000;
$num_dbxrefs = count(array_keys($this->dbxref_lookup));
$num_batches = (int) ($num_dbxrefs / $batch_size) + 1;
Expand Down Expand Up @@ -2820,8 +2887,7 @@ class GFF3Importer extends TripalImporter {
$uniquename = '';
$name = '';

// If there is no ID or name then try to create a name and ID.
if (!array_key_exists('ID', $attrs) and !array_key_exists('name', $attrs)) {
if (!array_key_exists('ID', $attrs) and !array_key_exists('Name', $attrs)) {

// Check if an alternate ID field is suggested, if so, then use
// that for the name.
Expand Down Expand Up @@ -2895,7 +2961,6 @@ class GFF3Importer extends TripalImporter {
throw new Exception(t("A feature with the same ID exists multiple times: !uname", ['!uname' => $uniquename]));
}
}

return [
'name' => $name,
'uniquename' => $uniquename,
Expand Down

0 comments on commit 83f11e4

Please sign in to comment.