nadeemlab · jimmymathews · May 15, 2024 · May 9, 2024 · May 9, 2024 · May 9, 2024
diff --git a/analysis_replication/README.md b/analysis_replication/README.md
@@ -38,12 +38,3 @@ To run the figure generation script, alter the command below to reference your o
 ```bash
 python retrieve_example_plot.py dataset_directory/ ~/.spt_db.config
 ```
-
-# GNN importance fractions figure generation
-
-Another figure is generated programmatically from extractions from Graph Neural Network models, provided by the API.
-
-```bash
-cd gnn_figure/
-python graph_plugin_plots.py
-```
diff --git a/build/apiserver/Dockerfile b/build/apiserver/Dockerfile
@@ -16,6 +16,7 @@ RUN python -m pip install scikit-learn==1.2.2
 RUN python -m pip install Pillow==9.5.0
 RUN python -m pip install pydantic==2.0.2
 RUN python -m pip install secure==0.3.0
+RUN python -m pip install matplotlib==3.7.1
 ARG version
 ARG service_name
 ARG WHEEL_FILENAME

diff --git a/build/apiserver/Makefile b/build/apiserver/Makefile
@@ -81,4 +81,4 @@ ${TESTS}: setup-testing
 clean:
 >@rm -f ${WHEEL_FILENAME}
 >@rm -f status_code
->@for f in dlogs.db.txt dlogs.api.txt dlogs.od.txt ../../${TEST_LOCATION}\/${MODULE_NAME}/_proximity.json ../../${TEST_LOCATION}\/${MODULE_NAME}/_squidpy.json; do rm -f $$f; done;
+>@for f in dlogs.db.txt dlogs.api.txt dlogs.od.txt ../../${TEST_LOCATION}\/${MODULE_NAME}/_proximity.json ../../${TEST_LOCATION}\/${MODULE_NAME}/_squidpy.json ../../${TEST_LOCATION}\/${MODULE_NAME}/_gnn.svg ; do rm -f $$f; done;
diff --git a/build/build_scripts/.graph_transformer.config b/build/build_scripts/.graph_transformer.config
@@ -0,0 +1,7 @@
+[general]
+db_config_file_path = build/db/.spt_db.config.local
+study_name = Melanoma intralesional IL2
+
+[upload-importances]
+plugin_used = graph-transformer
+datetime_of_run = 2023-10-02 10:46 AM
diff --git a/build/build_scripts/expected_table_counts.txt b/build/build_scripts/expected_table_counts.txt
@@ -9,15 +9,15 @@
                             diagnosis        2
        diagnostic_selection_criterion        4
             expression_quantification    18200
-                feature_specification        1
-                    feature_specifier        4
+                feature_specification        2
+                    feature_specifier        8
                histological_structure      700
 histological_structure_identification      700
          histology_assessment_process        7
                          intervention        2
    plane_coordinates_reference_system        0
                           publication        2
-           quantitative_feature_value      700
+           quantitative_feature_value     1400
                 research_professional       32
                            shape_file      700
           specimen_collection_process        7

diff --git a/build/build_scripts/import_test_dataset1.sh b/build/build_scripts/import_test_dataset1.sh
@@ -10,8 +10,10 @@ nextflow run .
 rm -f .nextflow.log*; rm -rf .nextflow/; rm -f configure.sh; rm -f run.sh; rm -f main.nf; rm -f nextflow.config; rm -rf work/; rm -rf results/
 
 spt graphs upload-importances --config_path=build/build_scripts/.graph.config --importances_csv_path=test/test_data/gnn_importances/1.csv
+spt graphs upload-importances --config_path=build/build_scripts/.graph_transformer.config --importances_csv_path=test/test_data/gnn_importances/1.csv
 
-spt db upload-sync-findings --database-config-file=build/db/.spt_db.config.local test/test_data/findings.json
+spt db upload-sync-small --database-config-file=build/db/.spt_db.config.local findings test/test_data/findings.json
+spt db upload-sync-small --database-config-file=build/db/.spt_db.config.local gnn_plot_configurations test/test_data/gnn_plot.json
 
 spt db status --database-config-file build/db/.spt_db.config.local > table_counts.txt
 diff build/build_scripts/expected_table_counts.txt table_counts.txt

diff --git a/docs/maintenance.md b/docs/maintenance.md
@@ -19,7 +19,9 @@ The modules in this repository are built, tested, and deployed using `make` and
 | [Docker Engine](https://docs.docker.com/engine/install/)   | 20.10.17                         |
 | [Docker Compose](https://docs.docker.com/compose/install/) | 2.10.2                           |
 | [bash](https://www.gnu.org/software/bash/)                 | >= 4                             |
-| [python](https://www.python.org/downloads/)                | >=3.7                            |
+| [python](https://www.python.org/downloads/)                | >=3.7 <3.12                      |
+| [postgresql](https://www.postgresql.org/download/)         | 13.4                             |
+| [toml](https://pypi.org/project/toml/)                     | 0.10.2                           |
 
 A typical development workflow looks like:
 

diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,9 @@
+name: spt
+channels:
+  - conda-forge
+dependencies:
+  - python=3.11
+  - toml
+  - make
+  - bash
+  - postgresql
diff --git a/pyproject.toml.unversioned b/pyproject.toml.unversioned
@@ -31,6 +31,7 @@ repository = "https://github.com/nadeemlab/SPT"
 
 [project.optional-dependencies]
 apiserver = [
+    "matplotlib==3.7.1",
     "fastapi==0.100.0",
     "uvicorn>=0.15.0,<0.16.0",
     "pandas==2.0.2",
@@ -190,7 +191,7 @@ packages = [
     "drop.py",
     "drop_ondemand_computations.py",
     "delete_feature.py",
-    "upload_sync_findings.py",
+    "upload_sync_small.py",
     "collection.py",
 ]
 "spatialprofilingtoolbox.db.data_model" = [
@@ -211,6 +212,7 @@ packages = [
     "extract.py",
     "finalize_graphs.py",
     "generate_graphs.py",
+    "plot_importance_fractions.py",
     "plot_interactives.py",
     "prepare_graph_creation.py",
     "upload_importances.py",

diff --git a/spatialprofilingtoolbox/apiserver/app/main.py b/spatialprofilingtoolbox/apiserver/app/main.py
@@ -2,6 +2,7 @@
 
 from typing import cast
 from typing import Annotated
+from typing import Literal
 import json
 from io import BytesIO
 from base64 import b64decode
@@ -12,6 +13,7 @@
 from fastapi.responses import StreamingResponse
 from fastapi import Query
 from fastapi import HTTPException
+import matplotlib.pyplot as plt
 
 import secure
 
@@ -42,6 +44,9 @@
     ValidChannelListNegatives2,
     ValidFeatureClass,
 )
+from spatialprofilingtoolbox.graphs.config_reader import read_plot_importance_fractions_config
+from spatialprofilingtoolbox.graphs.importance_fractions import PlotGenerator
+
 VERSION = '0.23.0'
 
 TITLE = 'Single cell studies data API'
@@ -68,6 +73,7 @@
 
 CELL_DATA_CELL_LIMIT = 100001
 
+
 def custom_openapi():
     if app.openapi_schema:
         return app.openapi_schema
@@ -113,6 +119,7 @@ async def get_study_names(
     """The names of studies/datasets, with display names."""
     specifiers = query().retrieve_study_specifiers()
     handles = [query().retrieve_study_handle(study) for study in specifiers]
+
     def is_public(study_handle: StudyHandle) -> bool:
         if StudyCollectionNaming.is_untagged(study_handle):
             return True
@@ -128,6 +135,7 @@ def is_public(study_handle: StudyHandle) -> bool:
                 status_code=404,
                 detail=f'Collection "{collection}" is not a valid collection string.',
             )
+
         def tagged(study_handle: StudyHandle) -> bool:
             return StudyCollectionNaming.tagged_with(study_handle, collection)
         handles = list(filter(tagged, map(query().retrieve_study_handle, specifiers)))
@@ -341,6 +349,7 @@ async def get_cell_data(
     if not sample in query().get_sample_names(study):
         raise HTTPException(status_code=404, detail=f'Sample "{sample}" does not exist.')
     number_cells = cast(int, query().get_number_cells(study))
+
     def match(c: PhenotypeCount) -> bool:
         return c.specimen == sample
     count = tuple(filter(match, get_phenotype_counts([], [], study, number_cells).counts))[0].count
@@ -375,3 +384,60 @@ async def get_plot_high_resolution(
     def streaming_iteration():
         yield from input_buffer
     return StreamingResponse(streaming_iteration(), media_type="image/png")
+
+
+@app.get("/importance-fraction-plot/")
+async def importance_fraction_plot(
+    study: ValidStudy,
+    img_format: Literal['svg', 'png'] = 'svg',
+) -> StreamingResponse:
+    """Return a plot of the fraction of important cells expressing a given phenotype."""
+    settings: str = cast(list[str], query().get_study_gnn_plot_configurations(study))[0]
+    (
+        _,
+        _,
+        phenotypes,
+        cohorts,
+        plugins,
+        figure_size,
+        orientation,
+    ) = read_plot_importance_fractions_config(None, settings)
+
+    plot = PlotGenerator(
+        (
+            get_anonymous_phenotype_counts_fast,
+            get_study_summary,
+            get_phenotype_criteria,
+            importance_composition,
+        ),
+        study,
+        phenotypes,
+        cohorts,
+        plugins,
+        figure_size,
+        orientation,
+    ).generate_plot()
+    plt.figure(plot.number)
+    buf = BytesIO()
+    plt.savefig(buf, format=img_format)
+    buf.seek(0)
+    return StreamingResponse(buf, media_type=f"image/{img_format}")
+
+
+def parse_gnn_plot_settings(settings: list[str]) -> tuple[
+    list[str],
+    list[tuple[int, str]],
+    list[str],
+    tuple[int, int],
+    str,
+]:
+    phenotypes = settings[0].split(', ')
+    plugins = settings[1].split(', ')
+    figure_size = tuple(map(int, settings[2].split(', ')))
+    assert len(figure_size) == 2
+    orientation = settings[3]
+    cohorts: list[tuple[int, str]] = []
+    for cohort in settings[4:]:
+        count, name = cohort.split(', ')
+        cohorts.append((int(count), name))
+    return phenotypes, cohorts, plugins, figure_size, orientation
diff --git a/spatialprofilingtoolbox/db/accessors/study.py b/spatialprofilingtoolbox/db/accessors/study.py
@@ -83,7 +83,13 @@ def get_available_gnn(self, study: str) -> AvailableGNN:
         return AvailableGNN(plugins=tuple(specifier for (specifier, ) in rows))
 
     def get_study_findings(self) -> list[str]:
-        self.cursor.execute('SELECT txt FROM findings ORDER BY id;')
+        return self._get_study_small_artifacts('findings')
+
+    def get_study_gnn_plot_configurations(self) -> list[str]:
+        return self._get_study_small_artifacts('gnn_plot_configurations')
+
+    def _get_study_small_artifacts(self, name: str) -> list[str]:
+        self.cursor.execute(f'SELECT txt FROM {name} ORDER BY id;')
         return [row[0] for row in self.cursor.fetchall()]
 
     @staticmethod

diff --git a/spatialprofilingtoolbox/db/database_connection.py b/spatialprofilingtoolbox/db/database_connection.py
@@ -266,6 +266,7 @@ class (QueryCursor) newly provides on each invocation.
     get_sample_names: Callable
     get_available_gnn: Callable
     get_study_findings: Callable
+    get_study_gnn_plot_configurations: Callable
     is_public_collection: Callable
 
     def __init__(self, query_handler: Type):

diff --git a/spatialprofilingtoolbox/db/querying.py b/spatialprofilingtoolbox/db/querying.py
@@ -62,6 +62,10 @@ def get_available_gnn(cls, cursor, study: str) -> AvailableGNN:
     def get_study_findings(cls, cursor, study: str) -> list[str]:
         return StudyAccess(cursor).get_study_findings()
 
+    @classmethod
+    def get_study_gnn_plot_configurations(cls, cursor, study: str) -> list[str]:
+        return StudyAccess(cursor).get_study_gnn_plot_configurations()
+
     @classmethod
     def get_composite_phenotype_identifiers(cls, cursor) -> tuple[str, ...]:
         return sort(PhenotypesAccess(cursor).get_composite_phenotype_identifiers())

diff --git a/spatialprofilingtoolbox/db/scripts/upload_sync_findings.py b/spatialprofilingtoolbox/db/scripts/upload_sync_findings.py