Skip to content

Commit

Permalink
merged main
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinschaper committed Aug 25, 2023
2 parents c2fdc8f + 265e23a commit 99cabad
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 116 deletions.
6 changes: 6 additions & 0 deletions docs/Sources/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Data Sources

This section contains detailed information on all datasets and ontologies
ingested to create the Monarch knowledge graph.

To learn more about a specific dataset/ontology, click on the source name in the list to the left.
1 change: 1 addition & 0 deletions mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ nav:
- KG Build Process: 'KG-Build-Process/kg-build-process.md'
- Principles: 'Principles/modeling-principles.md'
- Sources:
- Overview: 'Sources/index.md'
- Alliance: 'Sources/alliance.md'
- BGee: 'Sources/bgee.md'
- CTD: 'Sources/ctd.md'
Expand Down
223 changes: 112 additions & 111 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions src/monarch_ingest/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,10 @@ def load_jsonl():
edges_df = pandas.read_csv(edge_file, sep="\t", dtype="string", lineterminator="\n", quoting=csv.QUOTE_NONE,
comment='#')
edges_df["category"] = edges_df["category"].map(class_ancestor_dict)
# Prefixing only these two fields is an odd thing that Translator needs, so
# they're being duplicated with the prefixes here
edges_df["biolink:primary_knowledge_source"] = edges_df["primary_knowledge_source"]
edges_df["biolink:aggregator_knowledge_source"] = edges_df["aggregator_knowledge_source"]
edges_df.to_json("output/monarch-kg_edges.jsonl", orient="records", lines=True)
del edges_df
gc.collect()
Expand Down
4 changes: 2 additions & 2 deletions src/monarch_ingest/ingests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ goa_go_annotation:
config: 'ingests/goa/go_annotation.yaml'
hgnc_gene:
config: 'ingests/hgnc/gene.yaml'
hpoa_disease_phenotype:
config: 'ingests/hpoa/disease_phenotype.yaml'
hpoa_disease_to_phenotype:
config: 'ingests/hpoa/disease_to_phenotype.yaml'
hpoa_gene_to_disease:
config: 'ingests/hpoa/gene_to_disease.yaml'
hpoa_disease_mode_of_inheritance:
Expand Down
6 changes: 4 additions & 2 deletions src/monarch_ingest/ingests/dictybase/gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
koza_app = get_koza_app("dictybase_gene")
taxon_labels = koza_app.get_map("taxon-labels")

in_taxon = "NCBITaxon:44689"
in_taxon_label = taxon_labels[in_taxon]['label'] if in_taxon in taxon_labels else "Dictyostelium discoideum"

while (row := koza_app.get_row()) is not None:

synonyms = []
if row['Synonyms'] is not None:
synonyms = row['Synonyms'].split(", ")

in_taxon = "NCBITaxon:44689"

gene = Gene(
id='dictyBase:' + row['GENE ID'],
Expand All @@ -20,7 +22,7 @@
full_name=row['Gene Name'],
synonym=synonyms,
in_taxon=[in_taxon],
in_taxon_label=taxon_labels[in_taxon]['label'],
in_taxon_label=in_taxon_label,
provided_by=["infores:dictybase"]
)

Expand Down
15 changes: 14 additions & 1 deletion src/monarch_ingest/ingests/ncbi/gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,23 @@
koza_app = get_koza_app("ncbi_gene")
taxon_labels = koza_app.get_map("taxon-labels")

# If a taxon label we need isn't in phenio's NCBITaxon subset, we can add it here
extra_taxon_labels = {
'NCBITaxon:227321': 'Dictyostelium discoideum'
}

while (row := koza_app.get_row()) is not None:

in_taxon = 'NCBITaxon:' + row["tax_id"]
in_taxon_label = taxon_labels[in_taxon]["label"]


if in_taxon in taxon_labels:
in_taxon_label = taxon_labels[in_taxon]['label']
elif in_taxon in extra_taxon_labels:
in_taxon_label = extra_taxon_labels[in_taxon]
else:
raise ValueError(f"Taxon {in_taxon} not found in taxon-labels")

gene = Gene(
id='NCBIGene:' + row["GeneID"],
symbol=row["Symbol"],
Expand Down

0 comments on commit 99cabad

Please sign in to comment.