Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINT] annotations/tests #188

Merged
merged 24 commits into from
Feb 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 85 additions & 61 deletions neurostore/ingest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
Study,
Dataset,
)
from neurostore.models.data import DatasetStudy


def ingest_neurovault(verbose=False, limit=20):
Expand Down Expand Up @@ -145,78 +146,101 @@ def ingest_neurosynth(max_rows=None):
if max_rows is not None:
metadata = metadata.iloc[:max_rows]
annotations = annotations.iloc[:max_rows]
# collect notes (single annotations) for each analysis
notes = []
for (metadata_row, annotation_row) in zip(
metadata.itertuples(), annotations.itertuples(index=False)
):
id_ = metadata_row.Index
study_coord_data = coord_data.loc[[id_]]
md = {
"year": int(metadata_row.year),
}
s = Study(
name=metadata_row.title,
authors=metadata_row.authors,
publication=metadata_row.journal,
metadata=md,
pmid=id_,
doi=metadata_row.doi,
source="neurosynth",
source_id=id_,
)
analyses = []
points = []

for t_id, df in study_coord_data.groupby("table_id"):
a = Analysis(name=str(t_id), study=s)
analyses.append(a)
for _, p in df.iterrows():
point = Point(
x=p["x"],
y=p["y"],
z=p["z"],
space=metadata_row.space,
kind="unknown",
analysis=a,
)
points.append(point)
# add annotation
notes.append(
AnnotationAnalysis(
note=annotation_row._asdict(),
study=s,
analysis=a,
)
)

db.session.add_all([s] + analyses + points)
db.session.commit()

# make a neurosynth dataset
# create dataset object
d = Dataset(
name="neurosynth",
description="TODO",
publication="Nature Methods",
pmid="21706013",
doi="10.1038/nmeth.1635",
authors="Yarkoni T, Poldrack RA, Nichols TE, Van Essen DC, Wager TD",
public=True,
studies=Study.query.filter_by(source="neurosynth").all(),
public=True
)

# create annotation
annot = Annotation(
name="neurosynth",
source="neurostore",
source_id=None,
description="TODO",
dataset=d,
annotation_analyses=notes,
)
studies = []
to_commit = []
with db.session.no_autoflush:
for (metadata_row, annotation_row) in zip(
metadata.itertuples(), annotations.itertuples(index=False)
):
id_ = metadata_row.Index
study_coord_data = coord_data.loc[[id_]]
md = {
"year": int(metadata_row.year),
}
s = Study(
name=metadata_row.title,
authors=metadata_row.authors,
publication=metadata_row.journal,
metadata=md,
pmid=id_,
doi=metadata_row.doi,
source="neurosynth",
source_id=id_,
)
analyses = []
points = []

for t_id, df in study_coord_data.groupby("table_id"):
a = Analysis(name=str(t_id), study=s)
analyses.append(a)
for _, p in df.iterrows():
point = Point(
x=p["x"],
y=p["y"],
z=p["z"],
space=metadata_row.space,
kind="unknown",
analysis=a,
)
points.append(point)
to_commit.extend(points)
to_commit.extend(analyses)
studies.append(s)

# add studies to dataset
d.studies = studies
db.session.add(d)
db.session.commit()

db.session.add_all([d, annot])
db.session.commit()
# create annotation object
annot = Annotation(
name="neurosynth",
source="neurostore",
source_id=None,
description="TODO",
dataset=d,
)

# collect notes (single annotations) for each analysis
notes = []
for (metadata_row, annotation_row) in zip(
metadata.itertuples(), annotations.itertuples(index=False)
):
id_ = metadata_row.Index
study_coord_data = coord_data.loc[[id_]]
study = Study.query.filter_by(pmid=id_).one()
dataset_study = DatasetStudy.query.filter_by(
study_id=study.id, dataset_id=d.id
).one()

for analysis in study.analyses:
# add annotation
notes.append(
AnnotationAnalysis(
note=annotation_row._asdict(),
analysis=analysis,
annotation=annot,
dataset_study=dataset_study,
)
)

# add notes to annotation
annot.annotation_analyses = notes

db.session.add(annot)
db.session.commit()


def ingest_neuroquery(max_rows=None):
Expand Down
71 changes: 57 additions & 14 deletions neurostore/models/data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from sqlalchemy import event, ForeignKeyConstraint
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.mutable import MutableDict
from sqlalchemy.orm import relationship, backref
Expand Down Expand Up @@ -50,8 +51,9 @@ class Dataset(BaseMixin, db.Model):
user = relationship("User", backref=backref("datasets"))
studies = relationship(
"Study",
cascade="all",
secondary="dataset_studies",
backref="datasets",
backref=backref("datasets"),
)
annotations = relationship("Annotation", cascade="all, delete", backref="dataset")

Expand All @@ -67,23 +69,29 @@ class Annotation(BaseMixin, db.Model):
user = relationship('User', backref=backref('annotations'))
dataset_id = db.Column(db.Text, db.ForeignKey('datasets.id'))
metadata_ = db.Column(db.JSON)
public = db.Column(db.Boolean, default=True)
annotation_analyses = relationship(
'AnnotationAnalysis',
backref=backref("annotation"),
cascade='all, delete-orphan'
)


class AnnotationAnalysis(BaseMixin, db.Model):
class AnnotationAnalysis(db.Model):
__tablename__ = "annotation_analyses"
__table_args__ = (
ForeignKeyConstraint(
('study_id', 'dataset_id'),
('dataset_studies.study_id', 'dataset_studies.dataset_id'),
ondelete="CASCADE"),
)

annotation_id = db.Column(db.Text, db.ForeignKey("annotations.id"))
analysis_id = db.Column(db.Text, db.ForeignKey("analyses.id"))
study_id = db.Column(db.Text, db.ForeignKey("studies.id"))
study_id = db.Column(db.Text, nullable=False)
dataset_id = db.Column(db.Text, nullable=False)
annotation_id = db.Column(db.Text, db.ForeignKey("annotations.id"), primary_key=True)
analysis_id = db.Column(db.Text, db.ForeignKey("analyses.id"), primary_key=True)
note = db.Column(MutableDict.as_mutable(db.JSON))

study = relationship("Study", backref=backref("annotation_analyses"))
analysis = relationship("Analysis", backref=backref("annotation_analyses"))
annotation = relationship("Annotation", backref=backref("annotation_analyses"))

user_id = db.Column(db.Text, db.ForeignKey('users.external_id'))
user = relationship('User', backref=backref('annotation_analyses'))


class Study(BaseMixin, db.Model):
__tablename__ = "studies"
Expand All @@ -108,10 +116,17 @@ class Study(BaseMixin, db.Model):
)


class DatasetStudy(BaseMixin, db.Model):
class DatasetStudy(db.Model):
__tablename__ = "dataset_studies"
study_id = db.Column(db.ForeignKey('studies.id', ondelete='CASCADE'), primary_key=True)
dataset_id = db.Column(db.ForeignKey('datasets.id', ondelete='CASCADE'), primary_key=True)
study = relationship("Study", backref=backref("dataset_study", cascade="all, delete-orphan"))
dataset = relationship("Dataset", backref=backref("dataset_study"))
annotation_analyses = relationship(
"AnnotationAnalysis",
cascade='all, delete-orphan',
backref=backref("dataset_study")
)


class Analysis(BaseMixin, db.Model):
Expand Down Expand Up @@ -140,7 +155,10 @@ class Analysis(BaseMixin, db.Model):
user_id = db.Column(db.Text, db.ForeignKey("users.external_id"))
user = relationship("User", backref=backref("analyses"))
analysis_conditions = relationship(
"AnalysisConditions", backref=backref("analysis"), cascade="all, delete"
"AnalysisConditions", backref=backref("analysis"), cascade="all, delete, delete-orphan"
)
annotation_analyses = relationship(
"AnnotationAnalysis", backref=backref("analysis"), cascade="all, delete, delete-orphan"
)


Expand Down Expand Up @@ -248,3 +266,28 @@ class PointValue(BaseMixin, db.Model):
point = relationship("Point", backref=backref("values"))
user_id = db.Column(db.Text, db.ForeignKey("users.external_id"))
user = relationship("User", backref=backref("point_values"))


def check_note_columns(annotation, annotation_analyses, collection_adapter):
"listen for the 'bulk_replace' event"

def _combine_compare_keys(aa1, aa2):
"""compare keys """
aa1_dict = {aa.analysis.id: set(aa.note.keys()) for aa in aa1}
aa2_dict = {aa.analysis.id: set(aa.note.keys()) for aa in aa2}
aa_dict = {}
for key in aa1_dict.keys():
if key in aa2_dict:
aa_dict[key] = aa2_dict.pop(key)
else:
aa_dict[key] = aa1_dict[key]

aa_list = [*aa_dict.values(), *aa2_dict.values()]
return all([aa_list[0] == note for note in aa_list[1:]])

all_equal = _combine_compare_keys(annotation.annotation_analyses, annotation_analyses)
if not all_equal:
raise ValueError("All analyses must have the same annotations")


event.listen(Annotation.annotation_analyses, 'bulk_replace', check_note_columns)
2 changes: 1 addition & 1 deletion neurostore/openapi
Submodule openapi updated 1 files
+239 −218 neurostore-openapi.yml
Loading