Skip to content

Commit

Permalink
Merge pull request #1117 from tripal/1116-tv3-edam_import
Browse files Browse the repository at this point in the history
Fixes to load EDAM ontology.
  • Loading branch information
laceysanderson committed Nov 7, 2020
2 parents 29ac9f7 + f1cfe3a commit e2b5386
Showing 1 changed file with 54 additions and 8 deletions.
62 changes: 54 additions & 8 deletions tripal_chado/includes/TripalImporter/OBOImporter.inc
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,15 @@ class OBOImporter extends TripalImporter {
*/
private $default_namespace = '';


/**
* Holds the idspace elements from the header. These will correspond
* to the accession prefixes, or short names (e.g. GO) for the terms. For
* example, the EDAM vocabulary has several id spaces:
* format, data, operation and topic.
*/
private $idspaces = [];

/**
* The default database prefix for this ontology.
*
Expand Down Expand Up @@ -841,6 +850,7 @@ class OBOImporter extends TripalImporter {
private function setDefaults($header) {
$short_name = '';
$namespace = '';
$idspaces = [];

// Get the 'ontology' and 'default-namespace' headers. Unfortunately,
// not all OBO files contain these.
Expand All @@ -850,7 +860,19 @@ class OBOImporter extends TripalImporter {
if (array_key_exists('default-namespace', $header)) {
$namespace = $header['default-namespace'][0];
}

if (array_key_exists('idspace', $header)) {
$matches = [];
foreach ($header['idspace'] as $idspace) {
if (preg_match('/^(.+?)\s+(.+?)\s+"(.+)"$/', $idspace, $matches)) {
$idspaces[$matches[1]]['url'] = $matches[2];
$idspaces[$matches[1]]['description'] = $matches[3];
}
elseif (preg_match('/^(.+?)\s+(.+?)$/', $idspace, $matches)) {
$idspaces[$matches[1]]['url'] = $matches[2];
$idspaces[$matches[1]]['description'] = '';
}
}
}
// The OBO specification allows the 'ontology' header tag to be nested for
// subsets (e.g. go/subsets/goslim_plant). We need to simplify that down
// to the top-level item.
Expand Down Expand Up @@ -901,7 +923,7 @@ class OBOImporter extends TripalImporter {

// If we can't find the namespace or the short_name then bust.
if (!$namespace and !$short_name) {
throw new ErrorException('Cannot determine the namespace or ontology prefix from this OBO file. It is missing both the "default-namespace" and "ontology" headers.');
throw new ErrorException('Cannot determine the namespace or ontology prefix from this OBO file. It is missing both the "default-namespace" or a compatible "ontology" header.');
}

// Set the defaults.
Expand All @@ -910,7 +932,12 @@ class OBOImporter extends TripalImporter {
$this->addDB($this->default_db);
$cv = $this->addCV($this->default_namespace);
$this->obo_namespaces[$namespace] = $cv->cv_id;
$this->idspaces = $idspaces;

// Add a new database for each idspace.
foreach ($idspaces as $shortname => $idspace) {
$this->addDB($shortname, $idspace['url'], $idspace['description']);
}
}

/**
Expand Down Expand Up @@ -1026,15 +1053,15 @@ class OBOImporter extends TripalImporter {
$ontology_results = drupal_json_decode($response->data);
if ($ontology_results['error']) {

$this->logMessage('Cannot find the ontology via an EBI OLS lookup: !short_name. \n' .
$this->logMessage(t('Cannot find the ontology via an EBI OLS lookup: !short_name. \n' .
'We tried to access: !url' .
'EBI Reported: !message. ' .
'Consider finding the OBO file for this ontology and manually loading it first.',
[
'!message' => $ontology_results['message'],
'!short_name' => $short_name,
'!url' => $full_url,
], TRIPAL_WARNING);
]), TRIPAL_WARNING);
}
//What should happen with this stuff?
$base_iri = $ontology_results['config']['baseUris'][0];
Expand Down Expand Up @@ -1784,7 +1811,8 @@ class OBOImporter extends TripalImporter {
$accession = $matches[2];

// If the term is borrowed then let's try to deal with it.
if ($short_name != $this->default_db) {
$idspaces = array_keys($this->idspaces);
if ($short_name != $this->default_db and !in_array($short_name, $idspaces)) {

// First try to lookup the term and replace the stanza with the updated
// details.
Expand Down Expand Up @@ -2079,6 +2107,11 @@ class OBOImporter extends TripalImporter {

// If this term has a namespace then we want to keep track of it.
if (array_key_exists('namespace', $stanza)) {
// Fix the namespace for EDAM terms so they all use the same
// namespacke (i.e. cv record).
if ($this->default_namespace == 'EDAM') {
$stanza['namespace'][0] = 'EDAM';
}
$namespace = $stanza['namespace'][0];
$cv = $this->all_cvs[$namespace];
$this->obo_namespaces[$namespace] = $cv->cv_id;
Expand All @@ -2104,6 +2137,11 @@ class OBOImporter extends TripalImporter {
continue;
}

// For EDAM, we have to unfortuantely hard-code a fix as the
// short names of terms are correct.
$line = preg_replace('/EDAM_(\w+)/', '\1', $line);


// break apart the line into the tag and value but ignore any escaped colons
preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
$pair = explode(":", $line, 2);
Expand Down Expand Up @@ -2257,7 +2295,7 @@ class OBOImporter extends TripalImporter {
* @return
* A Chado database object.
*/
private function addDB($dbname) {
private function addDB($dbname, $url = '', $description = '') {
// Add the database if it doesn't exist.
$db = NULL;
if (array_key_exists($dbname, $this->all_dbs)) {
Expand All @@ -2266,7 +2304,14 @@ class OBOImporter extends TripalImporter {
else {
// If it's not in the cache we can assume it doesn't exist and insert.
$db = new ChadoRecord('db');
$db->setValues(['name' => $dbname]);
$values = ['name' => $dbname];
if ($url) {
$values['url'] = $url;
}
if ($description) {
$values['description'] = $description;
}
$db->setValues($values);
$db->insert();
$db = (object) $db->getValues();
$this->all_dbs[$dbname] = $db;
Expand Down Expand Up @@ -2341,7 +2386,8 @@ class OBOImporter extends TripalImporter {
}

if (!$accession) {
throw new Exception("Cannot add an Alt ID without an accession: '$alt_id'");
$this->logMessage("Cannot add an Alt ID without an accession: '!alt_id'", ['!alt_id' => $alt_id]);
return;
}

// Add the database if it doesn't exist.
Expand Down

0 comments on commit e2b5386

Please sign in to comment.