Skip to content

Commit

Permalink
Merge branch 'main' into add-docs-badge
Browse files Browse the repository at this point in the history
  • Loading branch information
glass-ships committed Aug 16, 2023
2 parents d1cfead + efde28c commit f3f1f56
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 67 deletions.
47 changes: 47 additions & 0 deletions scripts/add_association_copyfields.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/sh

# Add two dynamicfields declarations to the schema

# One for text fields, have to replace because _t comes built in

# delete the _t dynamic field
curl -X POST -H 'Content-type:application/json' --data-binary '{
"delete-dynamic-field": {
"name": "*_t"
}
}' http://localhost:8983/solr/association/schema

curl -X POST -H 'Content-type:application/json' --data-binary '{
"add-dynamic-field": {
"name": "*_t",
"type": "text",
"indexed": true,
"stored": false,
"multiValued": true
}
}' http://localhost:8983/solr/association/schema

# One for autocomplete fields

curl -X POST -H 'Content-type:application/json' --data-binary '{
"add-dynamic-field": {
"name": "*_ac",
"type": "autocomplete",
"indexed": true,
"stored": false,
"multiValued": true
}
}' http://localhost:8983/solr/association/schema


# now add copyfields declarations for subject_label, subject_closure_label, object_label, object_closure_label

for field in subject_label subject_closure_label subject_taxon subject_taxon_label predicate object_label object_closure_label object_taxon object_taxon_label primary_knowledge_source in_taxon_label qualifier_label onset_qualifier_label frequency_qualifier_label sex_qualifier_label
do
curl -X POST -H 'Content-type:application/json' --data-binary "{
\"add-copy-field\": {
\"source\":\"$field\",
\"dest\": \"${field}_t\"
}
}" http://localhost:8983/solr/association/schema
done
7 changes: 7 additions & 0 deletions scripts/add_fieldtypes.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

for core in entity association
do
curl -X POST -H 'Content-type:application/json' -d @scripts/text-fieldtype.json http://localhost:8983/solr/$core/schema
curl -X POST -H 'Content-type:application/json' -d @scripts/autocomplete-fieldtype.json http://localhost:8983/solr/$core/schema
done
26 changes: 26 additions & 0 deletions scripts/autocomplete-fieldtype.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"add-field-type": {
"name": "autocomplete",
"class": "solr.TextField",
"positionIncrementGap": "100",
"indexAnalyzer": {
"tokenizer": { "class": "solr.StandardTokenizerFactory" },
"filters": [
{ "class": "solr.ASCIIFoldingFilterFactory" },
{ "class": "solr.LowerCaseFilterFactory" },
{
"class": "solr.EdgeNGramFilterFactory",
"minGramSize": "1",
"maxGramSize": "25"
}
]
},
"queryAnalyzer": {
"tokenizer": { "class": "solr.StandardTokenizerFactory" },
"filters": [
{ "class": "solr.ASCIIFoldingFilterFactory" },
{ "class": "solr.LowerCaseFilterFactory" }
]
}
}
}
3 changes: 2 additions & 1 deletion scripts/load_solr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ sleep 10

# todo: ideally, this will live in linkml-solr
echo "Adding additional fieldtypes"
scripts/add_entity_fieldtypes.sh
scripts/add_fieldtypes.sh
sleep 5

echo "Adding entity schema"
Expand All @@ -38,6 +38,7 @@ sleep 5
# todo: this also should live in linkml-solr, and copy-fields should be based on the schema
echo "Add dynamic fields and copy fields declarations"
scripts/add_entity_copyfields.sh
scripts/add_association_copyfields.sh
sleep 5

echo "Loading entities"
Expand Down
32 changes: 2 additions & 30 deletions scripts/add_entity_fieldtypes.sh → scripts/text-fieldtype.json
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/bin/sh
curl -X POST -H 'Content-type:application/json' --data-binary '{
{
"add-field-type": {
"name": "text",
"class": "solr.TextField",
Expand Down Expand Up @@ -52,31 +51,4 @@ curl -X POST -H 'Content-type:application/json' --data-binary '{
]
}
}
}' http://localhost:8983/solr/entity/schema

curl -X POST -H 'Content-type:application/json' --data-binary '{
"add-field-type": {
"name": "autocomplete",
"class": "solr.TextField",
"positionIncrementGap": "100",
"indexAnalyzer": {
"tokenizer": { "class": "solr.StandardTokenizerFactory" },
"filters": [
{ "class": "solr.ASCIIFoldingFilterFactory" },
{ "class": "solr.LowerCaseFilterFactory" },
{
"class": "solr.EdgeNGramFilterFactory",
"minGramSize": "1",
"maxGramSize": "25"
}
]
},
"queryAnalyzer": {
"tokenizer": { "class": "solr.StandardTokenizerFactory" },
"filters": [
{ "class": "solr.ASCIIFoldingFilterFactory" },
{ "class": "solr.LowerCaseFilterFactory" }
]
}
}
}' http://localhost:8983/solr/entity/schema
}
18 changes: 8 additions & 10 deletions src/monarch_ingest/ingests/alliance/gene_to_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,14 @@
# but may have an UBERON term that we can use
# stage_term_id = get_data(row, "whenExpressed.stageUberonSlimTerm.uberonTerm")

evidence = list()
assay = get_data(row, "assay") # e.g. "MMO:0000658"
if assay:
evidence.append(assay)


publication_ids = [get_data(row, "evidence.publicationId")]

xref = get_data(row, "crossReference.id")
if xref:
evidence.append(xref)
publication_ids.append(xref)

publication_ids = get_data(row, "evidence.publicationId")

# Our current ingest policy is to first use a reported Anatomical structure term...
if anatomical_entity_id:
Expand All @@ -53,8 +51,8 @@
predicate='biolink:expressed_in',
object=anatomical_entity_id,
stage_qualifier=stage_term_id,
has_evidence=evidence,
publications=[publication_ids],
qualifiers=([get_data(row, "assay")] if get_data(row, "assay") else None),
publications=publication_ids,
aggregator_knowledge_source=["infores:monarchinitiative", "infores:alliancegenome"],
primary_knowledge_source=source
)
Expand All @@ -70,8 +68,8 @@
predicate='biolink:expressed_in',
object=cellular_component_id,
stage_qualifier=stage_term_id,
has_evidence=evidence,
publications=[publication_ids],
qualifiers=([get_data(row, "assay")] if get_data(row, "assay") else None),
publications=publication_ids,
aggregator_knowledge_source=["infores:monarchinitiative", "infores:alliancegenome"],
primary_knowledge_source=source
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ edge_properties:
- 'object'
- 'stage_qualifier'
- 'category'
- 'has_evidence'
- 'qualifiers'
- 'publications'
- 'aggregator_knowledge_source'
- 'primary_knowledge_source'
Expand Down
34 changes: 9 additions & 25 deletions tests/unit/alliance/test_alliance_gene_to_expression.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,6 @@
import pytest
from biolink.pydanticmodel import GeneToExpressionSiteAssociation

#
# test of utility function - proven to work, unless modified in the future?
#
# def test_get_data():
# entry = {
# "testing": {
# "one": {
# "two": {
# "three": "Success!"
# }
# }
# }
# }
# assert get_data(entry, "testing.one.two.three") == "Success!"


@pytest.fixture
def source_name():
return "alliance_gene_to_expression"
Expand Down Expand Up @@ -75,7 +59,7 @@ def test_rattus_association(rattus):
assert association.object == "GO:0030141"
assert not association.stage_qualifier
assert "PMID:12615975" in association.publications
assert "MMO:0000640" in association.has_evidence
assert "MMO:0000640" in association.qualifiers
assert association.primary_knowledge_source == "infores:rgd"
assert aggregator_knowledge_sources(association)

Expand Down Expand Up @@ -126,8 +110,8 @@ def test_mouse_association(mouse):
assert association.object == "EMAPA:16039"
assert association.stage_qualifier is None
assert "MGI:1199209" in association.publications
assert "MMO:0000655" in association.has_evidence
assert "MGI:1203979" in association.has_evidence
assert "MGI:1203979" in association.publications
assert "MMO:0000655" in association.qualifiers
assert association.primary_knowledge_source == "infores:mgi"
assert aggregator_knowledge_sources(association)

Expand Down Expand Up @@ -183,8 +167,8 @@ def test_zebrafish_association(zebrafish):
assert association.object == "ZFA:0001094"
assert association.stage_qualifier == "ZFS:0000035"
assert "PMID:18544660" in association.publications
assert "MMO:0000655" in association.has_evidence
assert "ZFIN:ZDB-FIG-080908-4" in association.has_evidence
assert "ZFIN:ZDB-FIG-080908-4" in association.publications
assert "MMO:0000655" in association.qualifiers
assert association.primary_knowledge_source == "infores:zfin"
assert aggregator_knowledge_sources(association)

Expand Down Expand Up @@ -231,7 +215,7 @@ def test_drosophila_association(drosophila):
assert association.object == "FBbt:00003007"
assert association.stage_qualifier == "FBdv:00005369"
assert "FB:FBrf0231198" in association.publications
assert "MMO:0000534" in association.has_evidence
assert "MMO:0000534" in association.qualifiers
assert association.primary_knowledge_source == "infores:flybase"
assert aggregator_knowledge_sources(association)

Expand Down Expand Up @@ -282,8 +266,8 @@ def test_worm_association(worm):
assert association.object == "WBbt:0000100"
assert association.stage_qualifier == "WBls:0000057"
assert association.publications[0] == "PMID:1782857"
assert "MMO:0000670" in association.has_evidence
assert "WB:Expr1" in association.has_evidence
assert "WB:Expr1" in association.publications
assert "MMO:0000670" in association.qualifiers
assert association.primary_knowledge_source == "infores:wormbase"
assert aggregator_knowledge_sources(association)

Expand Down Expand Up @@ -327,6 +311,6 @@ def test_yeast_association(yeast):
assert association.object == "GO:1990316"
assert not association.stage_qualifier
assert association.publications[0] == "PMID:26753620"
assert "MMO:0000642" in association.has_evidence
assert "MMO:0000642" in association.qualifiers
assert association.primary_knowledge_source == "infores:sgd"
assert aggregator_knowledge_sources(association)

0 comments on commit f3f1f56

Please sign in to comment.