Merge pull request #1136 from tripal/1135-tv3-sequence_fields

Fix for bugs with displaying sequences
tripal · Dec 20, 2020 · 861164d · 861164d
2 parents 48f098f + 51cdd14
commit 861164d
Show file tree

Hide file tree

Showing 16 changed files with 678 additions and 177 deletions.
diff --git a/tripal/includes/tripal.fields.inc b/tripal/includes/tripal.fields.inc
@@ -911,11 +911,13 @@ function tripal_form_field_ui_display_overview_form_alter(&$form, &$form_state,
   $fields_names = element_children($form['fields']);
   foreach ($fields_names as $field_name) {
     $field_info = field_info_field($field_name);
-    if ($field_info['type'] == 'kvproperty_adder') {
-      unset($form['fields'][$field_name]);
-    }
-    if ($field_info['type'] == 'cvterm_class_adder') {
-      unset($form['fields'][$field_name]);
+    if ($field_info) {
+      if ($field_info['type'] == 'kvproperty_adder') {
+        unset($form['fields'][$field_name]);
+      }
+      if ($field_info['type'] == 'cvterm_class_adder') {
+        unset($form['fields'][$field_name]);
+      }
     }
   }
 }

diff --git a/tripal_chado/includes/TripalFields/data__protein_sequence/data__protein_sequence.inc b/tripal_chado/includes/TripalFields/data__protein_sequence/data__protein_sequence.inc
@@ -93,18 +93,12 @@ class data__protein_sequence extends ChadoField {
       WHERE
         FR.object_id = :feature_id and
         CVT.name = 'polypeptide' and
-        RCVT.name  IN ('derives_from', 'part_of')
+        RCVT.name IN ('derives_from', 'part_of')
       ORDER BY FR.rank ASC
     ";
     $proteins = chado_query($sql, [':feature_id' => $feature->feature_id]);
     while ($protein = $proteins->fetchObject()) {
       $entity->{$field_name}['und'][$num_seqs]['value'] = $protein->residues;
-      // Because we'll be saving a feature we need to maintain all of it's
-      // columns in the feature table. The following will add them all.
-      $columns = get_object_vars($protein);
-      foreach ($columns as $colname => $value) {
-        $entity->{$field_name}['und'][$num_seqs]['chado-feature__' . $colname] = $value;
-      }
       $num_seqs++;
     }
   }

diff --git a/...l_chado/includes/TripalFields/data__protein_sequence/data__protein_sequence_formatter.inc b/...l_chado/includes/TripalFields/data__protein_sequence/data__protein_sequence_formatter.inc
@@ -12,7 +12,8 @@ class data__protein_sequence_formatter extends ChadoFieldFormatter {
    * @see TripalFieldFormatter::view()
    */
   public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
-    $content = 'There is no protein sequence.';
+
+    $content = 'There is no protein sequence available.';
     if (count($items) > 0 and $items[0]['value']) {
       $num_bases = 50;
       $content = '<pre class="protein-residues-formatter">';

diff --git a/tripal_chado/includes/TripalFields/data__sequence/data__sequence.inc b/tripal_chado/includes/TripalFields/data__sequence/data__sequence.inc
@@ -15,7 +15,7 @@ class data__sequence extends ChadoField {
   public static $default_label = 'Sequence';
 
   // The default description for this field.
-  public static $description = 'A field for managing nucleotide and protein residues.';
+  public static $description = 'A field for managing the primary sequence for a feature.';
 
   // Provide a list of instance specific settings. These can be accessed within
   // the instanceSettingsForm.  When the instanceSettingsForm is submitted
@@ -77,103 +77,6 @@ class data__sequence extends ChadoField {
 
     $feature = chado_expand_var($feature, 'field', 'feature.residues');
     $entity->{$field_name}['und'][0]['value'] = $feature->residues;
-
-    /* // Add in sequences from alignments.
-       $options = array(
-         'return_array' => 1,
-         'include_fk' => array(
-           'srcfeature_id' => array(
-             'type_id' => 1
-           ),
-           'feature_id' => array(
-             'type_id' => 1
-           ),
-         ),
-       );
-       $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
-       $featureloc_sequences = $this->get_featureloc_sequences($feature->feature_id, $feature->featureloc->feature_id);
-   
-       // Add in the coding sequences. It's faster to provide the SQL rather than
-       // to use chado_generate_var based on the type.
-       $sql = "
-         SELECT F.*
-         FROM {feature_relationship} FR
-           INNER JOIN {feature} F on FR.subject_id = F.feature_id
-           INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
-           INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
-           INNER JOIN {featureloc} FL on FL.feature_id = F.feature_id
-         WHERE
-           FR.object_id = :feature_id and
-           CVT.name = 'CDS' and
-           RCVT.name = 'part_of'
-         ORDER BY FR.rank ASC
-       ";
-       $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
-       $coding_seq = '';
-       while ($CDS = $results->fetchObject()) {
-         if ($CDS->residues) {
-           $coding_seq .= $CDS->residues;
-         }
-       }
-       if ($coding_seq) {
-         $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-           '@type' => 'SO:0000316',
-           'type' => 'coding_sequence',
-           'label' => 'Coding sequence (CDS)',
-           'defline' => chado_get_fasta_defline($feature, 'CDS', NULL, '', strlen($coding_seq)),
-           'residues' => $coding_seq,
-         );
-       }
-   
-       foreach($featureloc_sequences as $src => $attrs){
-         // the $attrs array has the following keys
-         //   * id:  a unique identifier combining the feature id with the cvterm id
-         //   * type: the type of sequence (e.g. mRNA, etc)
-         //   * location:  the alignment location
-         //   * defline: the definition line
-         //   * formatted_seq: the formatted sequences
-         //   * featureloc:  the feature object aligned to
-         $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-           'residues' => $attrs['residues'],
-           '@type' => 'SO:0000110',
-           'type' => 'sequence_feature',
-           'defline' => chado_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', strlen($attrs['residues'])),
-           'label' => 'Sequence from alignment at ' . $attrs['location'],
-         );
-   
-   
-         // check to see if this alignment has any CDS. If so, generate a CDS sequence
-         $cds_sequence = chado_get_feature_sequences(
-             array(
-               'feature_id' => $feature->feature_id,
-               'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id,
-               'name' => $feature->name,
-               'featureloc_id' => $attrs['featureloc']->featureloc_id,
-             ),
-             array(
-               'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
-               'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
-               'sub_feature_types' => array('CDS'), // we're looking for CDS features
-               'is_html' => 0
-             )
-             );
-   
-         if (count($cds_sequence) > 0) {
-           // the chado_get_feature_sequences() function can return multiple sequences
-           // if a feature is aligned to multiple places. In the case of CDSs we expect
-           // that one mRNA is only aligned to a single location on the assembly so we
-           // can access the CDS sequence with index 0.
-           if ($cds_sequence[0]['residues']) {
-             $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-               'residues' => $cds_sequence[0]['residues'],
-               '@type' => 'SO:0000316',
-               'type' => 'coding_sequence',
-               'defline' => chado_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', $cds_sequence[0]['length']),
-               'label' => 'Coding sequence (CDS) from alignment at  ' . $attrs['location'],
-             );
-           }
-         }
-       } */
   }
 }
 
diff --git a/tripal_chado/includes/TripalFields/data__sequence_coordinates/data__sequence_coordinates.inc b/tripal_chado/includes/TripalFields/data__sequence_coordinates/data__sequence_coordinates.inc
@@ -224,49 +224,53 @@ class data__sequence_coordinates extends ChadoField {
     $strand_term = chado_get_semweb_term('featureloc', 'strand');
     $phase_term = chado_get_semweb_term('featureloc', 'phase');
 
-    $options = [
-      'return_array' => TRUE,
-      'order_by' => ['rank' => 'ASC'],
-    ];
-    $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
-
     // Set some defauls for the empty record
     $entity->{$field_name}['und'][0] = [
       'value' => '',
     ];
 
-    // Get the featureloc records that this feature is aligned to.
-    $aligned = $feature->featureloc->feature_id;
-    if ($aligned) {
-      foreach ($aligned as $index => $featureloc) {
-        $srcfeature = $featureloc->srcfeature_id->name;
+    // Get the featureloc records that this feature is aligned to. We use
+    // this SQL rather than the chado_expand_var function because we don't
+    // want the residues included from the srcfeature_id which may be huge
+    // and overrun memory.
+    $featurelocs_sql = "
+      SELECT SRCF.name, FL.srcfeature_id, FL.strand, FL.fmin, FL.fmax, FL,phase
+      FROM {featureloc} FL
+        INNER JOIN {feature} SRCF on SRCF.feature_id = FL.srcfeature_id
+      WHERE FL.feature_id = :feature_id
+      ORDER BY rank ASC
+    ";
+    $aligned = chado_query($featurelocs_sql, [':feature_id' => $feature->feature_id]);
+    $index = 0;
+    while ($featureloc = $aligned->fetchObject()) {
+      $srcfeature = $featureloc->name;
+      $strand = '';
+      if ($featureloc->strand == 1) {
+        $strand = '+';
+      }
+      elseif ($featureloc->strand == -1) {
+        $strand = '-';
+      }
+      else {
         $strand = '';
-        if ($featureloc->strand == 1) {
-          $strand = '+';
-        }
-        elseif ($featureloc->strand == -1) {
-          $strand = '-';
-        }
-        else {
-          $strand = '';
-        }
-        $fmin = $featureloc->fmin + 1;
-        $fmax = $featureloc->fmax;
-        $entity->{$field_name}['und'][$index] = [
-          'value' => [
-            $description => $srcfeature . ':' . $fmin . '-' . $fmax . $strand,
-            $reference_term => $srcfeature,
-            $fmin_term => $fmin,
-            $fmax_term => $fmax,
-            $strand_term => $strand,
-            $phase_term => $featureloc->phase,
-          ],
-        ];
-        $sentity_id = chado_get_record_entity_by_table('feature_id', $featureloc->srcfeature_id->feature_id);
-        if ($sentity_id) {
-          $entity->{$field_name}['und'][0]['value']['entity'] = 'TripalEntity:' . $sentity_id;
-        }
       }
+      $fmin = $featureloc->fmin + 1;
+      $fmax = $featureloc->fmax;
+      $entity->{$field_name}['und'][$index] = [
+        'value' => [
+          $description => $srcfeature . ':' . $fmin . '-' . $fmax . $strand,
+          $reference_term => $srcfeature,
+          $fmin_term => $fmin,
+          $fmax_term => $fmax,
+          $strand_term => $strand,
+          $phase_term => $featureloc->phase,
+        ],
+      ];
+      $sentity_id = chado_get_record_entity_by_table('feature_id', $featureloc->srcfeature_id);
+      if ($sentity_id) {
+        $entity->{$field_name}['und'][0]['value']['entity'] = 'TripalEntity:' . $sentity_id;
+      }
+      $index++;
     }
   }
 }