Skip to content

Commit

Permalink
[MAINT] annotations/tests (#188)
Browse files Browse the repository at this point in the history
* updates

* move analysis relationship with analysis_annotations to analysis table

* test that annotation_analyses are deleted when the analysis is deleted

* update openapi

* add patch functionality

* Revert "update openapi"

This reverts commit 1b0d5e3.

* add openapi changes

* Revert "add patch functionality"
remove patch functionality
This reverts commit 66a8aa0.

* check when annotations are updated that they all contain the same keys

* test proper/improper additions

* abject mess

* Revert "abject mess"

This reverts commit 287fa66.

* stable models

* delete annotationanalyses when a study is removed from dataset

* refactor ingestion to account for annotations

* fix wonkiness with deleting studies

* add datasetStudies schema

* refactor how annotationanalyses are handled

* handle tests for new annotation situations

* add new changes to openapi

* merge openapi spec with master

* fix type

* fix conflicts

* change annotation to annotation_csv when exporting
  • Loading branch information
jdkent authored Feb 1, 2022
1 parent dcf3374 commit b82b3e0
Show file tree
Hide file tree
Showing 9 changed files with 469 additions and 184 deletions.
146 changes: 85 additions & 61 deletions neurostore/ingest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
Study,
Dataset,
)
from neurostore.models.data import DatasetStudy


def ingest_neurovault(verbose=False, limit=20):
Expand Down Expand Up @@ -145,78 +146,101 @@ def ingest_neurosynth(max_rows=None):
if max_rows is not None:
metadata = metadata.iloc[:max_rows]
annotations = annotations.iloc[:max_rows]
# collect notes (single annotations) for each analysis
notes = []
for (metadata_row, annotation_row) in zip(
metadata.itertuples(), annotations.itertuples(index=False)
):
id_ = metadata_row.Index
study_coord_data = coord_data.loc[[id_]]
md = {
"year": int(metadata_row.year),
}
s = Study(
name=metadata_row.title,
authors=metadata_row.authors,
publication=metadata_row.journal,
metadata=md,
pmid=id_,
doi=metadata_row.doi,
source="neurosynth",
source_id=id_,
)
analyses = []
points = []

for t_id, df in study_coord_data.groupby("table_id"):
a = Analysis(name=str(t_id), study=s)
analyses.append(a)
for _, p in df.iterrows():
point = Point(
x=p["x"],
y=p["y"],
z=p["z"],
space=metadata_row.space,
kind="unknown",
analysis=a,
)
points.append(point)
# add annotation
notes.append(
AnnotationAnalysis(
note=annotation_row._asdict(),
study=s,
analysis=a,
)
)

db.session.add_all([s] + analyses + points)
db.session.commit()

# make a neurosynth dataset
# create dataset object
d = Dataset(
name="neurosynth",
description="TODO",
publication="Nature Methods",
pmid="21706013",
doi="10.1038/nmeth.1635",
authors="Yarkoni T, Poldrack RA, Nichols TE, Van Essen DC, Wager TD",
public=True,
studies=Study.query.filter_by(source="neurosynth").all(),
public=True
)

# create annotation
annot = Annotation(
name="neurosynth",
source="neurostore",
source_id=None,
description="TODO",
dataset=d,
annotation_analyses=notes,
)
studies = []
to_commit = []
with db.session.no_autoflush:
for (metadata_row, annotation_row) in zip(
metadata.itertuples(), annotations.itertuples(index=False)
):
id_ = metadata_row.Index
study_coord_data = coord_data.loc[[id_]]
md = {
"year": int(metadata_row.year),
}
s = Study(
name=metadata_row.title,
authors=metadata_row.authors,
publication=metadata_row.journal,
metadata=md,
pmid=id_,
doi=metadata_row.doi,
source="neurosynth",
source_id=id_,
)
analyses = []
points = []

for t_id, df in study_coord_data.groupby("table_id"):
a = Analysis(name=str(t_id), study=s)
analyses.append(a)
for _, p in df.iterrows():
point = Point(
x=p["x"],
y=p["y"],
z=p["z"],
space=metadata_row.space,
kind="unknown",
analysis=a,
)
points.append(point)
to_commit.extend(points)
to_commit.extend(analyses)
studies.append(s)

# add studies to dataset
d.studies = studies
db.session.add(d)
db.session.commit()

db.session.add_all([d, annot])
db.session.commit()
# create annotation object
annot = Annotation(
name="neurosynth",
source="neurostore",
source_id=None,
description="TODO",
dataset=d,
)

# collect notes (single annotations) for each analysis
notes = []
for (metadata_row, annotation_row) in zip(
metadata.itertuples(), annotations.itertuples(index=False)
):
id_ = metadata_row.Index
study_coord_data = coord_data.loc[[id_]]
study = Study.query.filter_by(pmid=id_).one()
dataset_study = DatasetStudy.query.filter_by(
study_id=study.id, dataset_id=d.id
).one()

for analysis in study.analyses:
# add annotation
notes.append(
AnnotationAnalysis(
note=annotation_row._asdict(),
analysis=analysis,
annotation=annot,
dataset_study=dataset_study,
)
)

# add notes to annotation
annot.annotation_analyses = notes

db.session.add(annot)
db.session.commit()


def ingest_neuroquery(max_rows=None):
Expand Down
71 changes: 57 additions & 14 deletions neurostore/models/data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from sqlalchemy import event, ForeignKeyConstraint
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.mutable import MutableDict
from sqlalchemy.orm import relationship, backref
Expand Down Expand Up @@ -50,8 +51,9 @@ class Dataset(BaseMixin, db.Model):
user = relationship("User", backref=backref("datasets"))
studies = relationship(
"Study",
cascade="all",
secondary="dataset_studies",
backref="datasets",
backref=backref("datasets"),
)
annotations = relationship("Annotation", cascade="all, delete", backref="dataset")

Expand All @@ -67,23 +69,29 @@ class Annotation(BaseMixin, db.Model):
user = relationship('User', backref=backref('annotations'))
dataset_id = db.Column(db.Text, db.ForeignKey('datasets.id'))
metadata_ = db.Column(db.JSON)
public = db.Column(db.Boolean, default=True)
annotation_analyses = relationship(
'AnnotationAnalysis',
backref=backref("annotation"),
cascade='all, delete-orphan'
)


class AnnotationAnalysis(BaseMixin, db.Model):
class AnnotationAnalysis(db.Model):
__tablename__ = "annotation_analyses"
__table_args__ = (
ForeignKeyConstraint(
('study_id', 'dataset_id'),
('dataset_studies.study_id', 'dataset_studies.dataset_id'),
ondelete="CASCADE"),
)

annotation_id = db.Column(db.Text, db.ForeignKey("annotations.id"))
analysis_id = db.Column(db.Text, db.ForeignKey("analyses.id"))
study_id = db.Column(db.Text, db.ForeignKey("studies.id"))
study_id = db.Column(db.Text, nullable=False)
dataset_id = db.Column(db.Text, nullable=False)
annotation_id = db.Column(db.Text, db.ForeignKey("annotations.id"), primary_key=True)
analysis_id = db.Column(db.Text, db.ForeignKey("analyses.id"), primary_key=True)
note = db.Column(MutableDict.as_mutable(db.JSON))

study = relationship("Study", backref=backref("annotation_analyses"))
analysis = relationship("Analysis", backref=backref("annotation_analyses"))
annotation = relationship("Annotation", backref=backref("annotation_analyses"))

user_id = db.Column(db.Text, db.ForeignKey('users.external_id'))
user = relationship('User', backref=backref('annotation_analyses'))


class Study(BaseMixin, db.Model):
__tablename__ = "studies"
Expand All @@ -108,10 +116,17 @@ class Study(BaseMixin, db.Model):
)


class DatasetStudy(BaseMixin, db.Model):
class DatasetStudy(db.Model):
__tablename__ = "dataset_studies"
study_id = db.Column(db.ForeignKey('studies.id', ondelete='CASCADE'), primary_key=True)
dataset_id = db.Column(db.ForeignKey('datasets.id', ondelete='CASCADE'), primary_key=True)
study = relationship("Study", backref=backref("dataset_study", cascade="all, delete-orphan"))
dataset = relationship("Dataset", backref=backref("dataset_study"))
annotation_analyses = relationship(
"AnnotationAnalysis",
cascade='all, delete-orphan',
backref=backref("dataset_study")
)


class Analysis(BaseMixin, db.Model):
Expand Down Expand Up @@ -140,7 +155,10 @@ class Analysis(BaseMixin, db.Model):
user_id = db.Column(db.Text, db.ForeignKey("users.external_id"))
user = relationship("User", backref=backref("analyses"))
analysis_conditions = relationship(
"AnalysisConditions", backref=backref("analysis"), cascade="all, delete"
"AnalysisConditions", backref=backref("analysis"), cascade="all, delete, delete-orphan"
)
annotation_analyses = relationship(
"AnnotationAnalysis", backref=backref("analysis"), cascade="all, delete, delete-orphan"
)


Expand Down Expand Up @@ -248,3 +266,28 @@ class PointValue(BaseMixin, db.Model):
point = relationship("Point", backref=backref("values"))
user_id = db.Column(db.Text, db.ForeignKey("users.external_id"))
user = relationship("User", backref=backref("point_values"))


def check_note_columns(annotation, annotation_analyses, collection_adapter):
"listen for the 'bulk_replace' event"

def _combine_compare_keys(aa1, aa2):
"""compare keys """
aa1_dict = {aa.analysis.id: set(aa.note.keys()) for aa in aa1}
aa2_dict = {aa.analysis.id: set(aa.note.keys()) for aa in aa2}
aa_dict = {}
for key in aa1_dict.keys():
if key in aa2_dict:
aa_dict[key] = aa2_dict.pop(key)
else:
aa_dict[key] = aa1_dict[key]

aa_list = [*aa_dict.values(), *aa2_dict.values()]
return all([aa_list[0] == note for note in aa_list[1:]])

all_equal = _combine_compare_keys(annotation.annotation_analyses, annotation_analyses)
if not all_equal:
raise ValueError("All analyses must have the same annotations")


event.listen(Annotation.annotation_analyses, 'bulk_replace', check_note_columns)
2 changes: 1 addition & 1 deletion neurostore/openapi
Submodule openapi updated 1 files
+239 −218 neurostore-openapi.yml
Loading

0 comments on commit b82b3e0

Please sign in to comment.