Skip to content

Commit

Permalink
Adding generate-extract command, 158. Add cell type templates monarch…
Browse files Browse the repository at this point in the history
…-initiative#159

This PR does two things:

- Add a combined generate-extract command, fixes monarch-initiative#158
- Adds cell type templates, fixes monarch-initiative#159
  • Loading branch information
cmungall authored and Ruchira S Datta rsd11 committed Aug 10, 2023
1 parent f60b75a commit 3183c48
Show file tree
Hide file tree
Showing 6 changed files with 387 additions and 11 deletions.
203 changes: 203 additions & 0 deletions src/ontogpt/templates/cell_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
from __future__ import annotations
from datetime import datetime, date
from enum import Enum
from typing import List, Dict, Optional, Any, Union, Literal
from pydantic import BaseModel as BaseModel, Field
from linkml_runtime.linkml_model import Decimal

metamodel_version = "None"
version = "None"

class WeakRefShimBaseModel(BaseModel):
__slots__ = '__weakref__'

class ConfiguredBaseModel(WeakRefShimBaseModel,
validate_assignment = True,
validate_all = True,
underscore_attrs_are_private = True,
extra = 'forbid',
arbitrary_types_allowed = True):
pass


class BrainRegionIdentifier(str, Enum):


dummy = "dummy"


class NullDataOptions(str, Enum):

UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION"
NOT_APPLICABLE = "NOT_APPLICABLE"
NOT_MENTIONED = "NOT_MENTIONED"



class CellTypeDocument(ConfiguredBaseModel):

cell_type: Optional[str] = Field(None, description="""the name of the cell type described""")
range: Optional[str] = Field(None)
parents: Optional[List[str]] = Field(default_factory=list, description="""categorization""")
subtypes: Optional[List[str]] = Field(default_factory=list)
localizations: Optional[List[str]] = Field(default_factory=list)
genes: Optional[List[str]] = Field(default_factory=list)
diseases: Optional[List[str]] = Field(default_factory=list)



class InterneuronDocument(CellTypeDocument):

projects_to_or_from: Optional[List[str]] = Field(default_factory=list, description="""Brain structures from which this cell type projects into or receives projections from""")
cell_type: Optional[str] = Field(None, description="""the name of the cell type described""")
range: Optional[str] = Field(None)
parents: Optional[List[str]] = Field(default_factory=list, description="""categorization""")
subtypes: Optional[List[str]] = Field(default_factory=list)
localizations: Optional[List[str]] = Field(default_factory=list)
genes: Optional[List[str]] = Field(default_factory=list)
diseases: Optional[List[str]] = Field(default_factory=list)



class ExtractionResult(ConfiguredBaseModel):
"""
A result of extracting knowledge on text
"""
input_id: Optional[str] = Field(None)
input_title: Optional[str] = Field(None)
input_text: Optional[str] = Field(None)
raw_completion_output: Optional[str] = Field(None)
prompt: Optional[str] = Field(None)
extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""")
named_entities: Optional[List[Any]] = Field(default_factory=list, description="""Named entities extracted from the text""")



class NamedEntity(ConfiguredBaseModel):

id: str = Field(None, description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class Gene(NamedEntity):

id: str = Field(None, description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class Pathway(NamedEntity):

id: str = Field(None, description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class AnatomicalStructure(NamedEntity):

id: str = Field(None, description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class BrainRegion(AnatomicalStructure):

id: str = Field(None, description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class CellType(NamedEntity):

id: str = Field(None, description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class Disease(NamedEntity):

id: str = Field(None, description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class Drug(NamedEntity):

id: str = Field(None, description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class CompoundExpression(ConfiguredBaseModel):

None



class Triple(CompoundExpression):
"""
Abstract parent for Relation Extraction tasks
"""
subject: Optional[str] = Field(None)
predicate: Optional[str] = Field(None)
object: Optional[str] = Field(None)
qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""")
subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""")
object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""")



class TextWithTriples(ConfiguredBaseModel):

publication: Optional[Publication] = Field(None)
triples: Optional[List[Triple]] = Field(default_factory=list)



class RelationshipType(NamedEntity):

id: str = Field(None, description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class Publication(ConfiguredBaseModel):

id: Optional[str] = Field(None, description="""The publication identifier""")
title: Optional[str] = Field(None, description="""The title of the publication""")
abstract: Optional[str] = Field(None, description="""The abstract of the publication""")
combined_text: Optional[str] = Field(None)
full_text: Optional[str] = Field(None, description="""The full text of the publication""")



class AnnotatorResult(ConfiguredBaseModel):

subject_text: Optional[str] = Field(None)
object_id: Optional[str] = Field(None)
object_text: Optional[str] = Field(None)




# Update forward refs
# see https://pydantic-docs.helpmanual.io/usage/postponed_annotations/
CellTypeDocument.update_forward_refs()
InterneuronDocument.update_forward_refs()
ExtractionResult.update_forward_refs()
NamedEntity.update_forward_refs()
Gene.update_forward_refs()
Pathway.update_forward_refs()
AnatomicalStructure.update_forward_refs()
BrainRegion.update_forward_refs()
CellType.update_forward_refs()
Disease.update_forward_refs()
Drug.update_forward_refs()
CompoundExpression.update_forward_refs()
Triple.update_forward_refs()
TextWithTriples.update_forward_refs()
RelationshipType.update_forward_refs()
Publication.update_forward_refs()
AnnotatorResult.update_forward_refs()

143 changes: 143 additions & 0 deletions src/ontogpt/templates/cell_type.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
id: http://w3id.org/ontogpt/cell_type
name: cell_type
title: Composite Disease
description: >-
A template for representing cell types
license: https://creativecommons.org/publicdomain/zero/1.0/
prefixes:
linkml: https://w3id.org/linkml/
cell_type: http://w3id.org/ontogpt/cell_type/
keywords:
- disease
- indication

default_prefix: cell_type
default_range: string

imports:
- linkml:types
- core

classes:
CellTypeDocument:
tree_root: true
attributes:
cell_type:
description: the name of the cell type described
range: CellType

parents:
description: categorization
annotations:
prompt: semicolon-separated list of parent (broader) cell types
multivalued: true
range: CellType

subtypes:
annotations:
prompt: semicolon-separated list of the main subtypes
multivalued: true
range: CellType

localizations:
annotations:
prompt: semicolon-separated list of anatomical structures in which this cell type is localized
multivalued: true
range: AnatomicalStructure

genes:
annotations:
prompt: semicolon-separated list of genes expressed in cells of this type
multivalued: true
range: Gene

diseases:
annotations:
prompt: semicolon-separated list of diseases in which this cell type is implicated
multivalued: true
range: Disease

InterneuronDocument:
is_a: CellTypeDocument

attributes:
projects_to_or_from:
description: Brain structures from which this cell type projects into or receives projections from
annotations:
prompt: semicolon-separated list of anatomical structures from which this cell type projects from or into
multivalued: true
range: BrainRegion

Gene:
is_a: NamedEntity
id_prefixes:
- HGNC
- MGI
- PR
- UniProtKB
annotations:
annotators: sqlite:obo:hgnc, bioportal:hgnc-nr

Pathway:
is_a: NamedEntity
id_prefixes:
- GO
- PW
annotations:
annotators: sqlite:obo:go, sqlite:obo:pw

AnatomicalStructure:
is_a: NamedEntity
id_prefixes:
- UBERON
- FBbt
- WBbt
annotations:
annotators: sqlite:obo:uberon, sqlite:obo:fbbt, sqlite:obo:wbbt

BrainRegion:
is_a: AnatomicalStructure
id_prefixes:
- UBERON
- FBbt
- WBbt
annotations:
annotators: sqlite:obo:uberon, sqlite:obo:fbbt, sqlite:obo:wbbt
slot_usage:
id:
values_from:
- BrainRegionIdentifier

CellType:
is_a: NamedEntity
id_prefixes:
- CL
- FBbt
- WBbt
annotations:
annotators: sqlite:obo:cl, sqlite:obo:fbbt, sqlite:obo:wbbt

Disease:
is_a: NamedEntity
id_prefixes:
- MONDO
- HP
annotations:
annotators: sqlite:obo:mondo, sqlite:obo:hp

Drug:
is_a: NamedEntity
annotations:
annotators: sqlite:obo:chebi, sqlite:obo:drugbank

enums:

BrainRegionIdentifier:
description: Brain region (or for now, any nervous system part)
reachable_from:
source_ontology: obo:uberon
relationship_types:
- rdfs:subClassOf
- BFO:0000050
source_nodes:
- UBERON:0001016 ## nervous system
Loading

0 comments on commit 3183c48

Please sign in to comment.