Skip to content

Commit

Permalink
rewrite epiv2 exports (#870)
Browse files Browse the repository at this point in the history
* added complete_df to each qs in managers.py

* reformatted complete_df to data extraction model

* added study mapping to complete_df

* added helper methods to querysets

* removed duplicative datasets and exports

* transformed age-profile_display

* added todos

* Implement new pattern for exports, fix test

* Remove old manager function

* cast ids to int

* coerce empty string to null

* Cleaned up docs, made requested changes

* updates

* add special comment

---------

Co-authored-by: Daniel Rabstejnek <rabstejnek@gmail.com>
Co-authored-by: Andy Shapiro <shapiromatron@gmail.com>
  • Loading branch information
3 people committed Sep 27, 2023
1 parent a131ead commit 1f9ad6c
Show file tree
Hide file tree
Showing 7 changed files with 556 additions and 298 deletions.
220 changes: 220 additions & 0 deletions hawc/apps/common/exports.py
@@ -0,0 +1,220 @@
import pandas as pd
from django.db.models import QuerySet

from .helper import FlatExport


class ModelExport:
"""Model level export module for use in Exporter class."""

def __init__(
self,
key_prefix: str = "",
query_prefix: str = "",
include: tuple[str, ...] | None = None,
exclude: tuple[str, ...] | None = None,
):
"""Instantiate an exporter instance for a given django model.
Args:
key_prefix (str, optional): The model name to prepend to data frame columns.
query_prefix (str, optional): The model prefix in the ORM.
include (tuple | None, optional): If included, only these items are added.
exclude (tuple | None, optional): If specified, items are removed from base.
"""
self.key_prefix = key_prefix + "-" if key_prefix else key_prefix
self.query_prefix = query_prefix + "__" if query_prefix else query_prefix
self.include = (key_prefix + field for field in include) if include else tuple()
self.exclude = (key_prefix + field for field in exclude) if exclude else tuple()

@property
def value_map(self) -> dict:
"""Value map of column names to ORM field names.
This caches the result from get_value_map and applies any prefixes
to the column names and ORM field names. It is also filtered down
in compliance with any include/exclude parameters.
Returns:
dict: Value map
"""
if hasattr(self, "_value_map"):
return self._value_map

value_map = self.get_value_map()
# add key prefix
if self.key_prefix:
value_map = {self.key_prefix + k: v for k, v in value_map.items()}
# add query prefix
if self.query_prefix:
value_map = {k: self.query_prefix + v for k, v in value_map.items()}
# handle any includes
if self.include:
value_map = {k: v for k, v in value_map.items() if k in self.include}
# handle any excludes
if self.exclude:
value_map = {k: v for k, v in value_map.items() if k not in self.exclude}

self._value_map = value_map
return self._value_map

@property
def annotation_map(self) -> dict:
"""Annotation map of annotated names to ORM expressions.
This caches the result from get_annotation_map and applies any
query_prefix to the annotated names. It is also filtered down
in compliance with any include/exclude parameters.
Returns:
dict: Annotation map
"""
if hasattr(self, "_annotation_map"):
return self._annotation_map

annotation_map = self.get_annotation_map(self.query_prefix)
# add query prefix
if self.query_prefix:
annotation_map = {self.query_prefix + k: v for k, v in annotation_map.items()}
# handle any includes/excludes
if self.include or self.exclude:
annotation_map = {
k: v for k, v in annotation_map.items() if k in self.value_map.values()
}

self._annotation_map = annotation_map
return self._annotation_map

def get_value_map(self) -> dict:
"""Value map of column names to ORM field names.
This should be overridden by any subclass where applicable.
Prefixes and include/exclude should not be handled in this method;
they are handled by the value_map property.
Returns:
dict: Value map
"""
return {}

def get_annotation_map(self, query_prefix: str) -> dict:
"""Annotation map of annotated names to ORM expressions.
This should be overridden by any subclass where applicable.
query_prefix for the annotated names and any include/exclude parameters
are handled by the annotation_map property.
query_prefix should still be used in the custom ORM expression
values though, since there is no way to apply that through the
annotation_map property.
Returns:
dict: Annotation map
"""
return {}

def get_column_name(self, name: str) -> str:
"""Get column name with key_prefix applied.
Args:
name (str): Column name
Returns:
str: Column name with prefix
"""
return f"{self.key_prefix}{name}"

def prepare_qs(self, qs: QuerySet) -> QuerySet:
"""Prepare the queryset for export.
This includes applying any annotations if they exist.
Args:
qs (QuerySet): Queryset to prepare
Returns:
QuerySet: Prepared queryset
"""
if self.annotation_map:
return qs.annotate(**self.annotation_map)
return qs

def prepare_df(self, df: pd.DataFrame) -> pd.DataFrame:
"""Prepare the dataframe for export.
This should be overridden by any subclass where applicable.
Any data manipulations that couldn't be done by the ORM
should be done in this method.
Args:
df (pd.DataFrame): Dataframe to manipulate
Returns:
pd.DataFrame: Manipulated dataframe
"""
return df

def get_df(self, qs: QuerySet) -> pd.DataFrame:
"""Get dataframe export from queryset.
Args:
qs (QuerySet): Queryset
Returns:
pd.DataFrame: Dataframe
"""
qs = self.prepare_qs(qs)
df = pd.DataFrame(
data=qs.values_list(*self.value_map.values()), columns=list(self.value_map.keys())
)
return self.prepare_df(df)


class Exporter:
"""Data export for querysets.
This class runs multiple ModelExports on a queryset
and outputs a dataframe through the get_df method.
"""

def build_modules(self) -> list[ModelExport]:
"""ModelExport instances to use for exporter.
This should be overridden by any subclass.
A key_prefix and query_prefix should be given to
each ModelExport so that the column names don't clash
and the ORM correctly navigates relationships.
Returns:
list[ModelExport]: List of ModelExports to build export with
"""
raise NotImplementedError()

def get_df(self, qs: QuerySet) -> pd.DataFrame:
"""Get dataframe export from queryset.
Args:
qs (QuerySet): Queryset
Returns:
pd.DataFrame: Dataframe
"""
self._modules = self.build_modules()
for module in self._modules:
qs = module.prepare_qs(qs)
values = [value for module in self._modules for value in module.value_map.values()]
keys = [key for module in self._modules for key in module.value_map.keys()]
df = pd.DataFrame(data=qs.values_list(*values), columns=keys)
for module in self._modules:
df = module.prepare_df(df)
return df

@classmethod
def flat_export(cls, qs: QuerySet, filename: str) -> FlatExport:
"""Return an instance of a FlatExport.
Args:
qs (QuerySet): the initial QuerySet
filename (str): the filename for the export
"""
df = cls().get_df(qs)
return FlatExport(df=df, filename=filename)
28 changes: 26 additions & 2 deletions hawc/apps/common/models.py
Expand Up @@ -10,8 +10,8 @@
from django.core.exceptions import ObjectDoesNotExist, SuspiciousOperation
from django.core.files.storage import FileSystemStorage
from django.db import IntegrityError, connection, models, router, transaction
from django.db.models import Case, CharField, Choices, Q, QuerySet, URLField, Value, When
from django.db.models.functions import Coalesce
from django.db.models import Case, CharField, Choices, Q, QuerySet, TextField, URLField, Value, When
from django.db.models.functions import Coalesce, Concat
from django.template.defaultfilters import slugify as default_slugify
from django.utils.html import strip_tags
from treebeard.mp_tree import MP_Node
Expand Down Expand Up @@ -534,6 +534,30 @@ def sql_display(name: str, Choice: type[Choices]) -> Case:
)


def sql_format(format_str: str, *field_params) -> Concat:
"""Create an ORM expression to simulate a format string.
Args:
format_str (str): Format string. Any {} present in the string
will be replaced by field_params.
Returns:
Concat: An expression that generates a string
"""
value_params = format_str.split("{}")
if format_str.count("{}") != len(field_params):
raise ValueError("field params must be equal to value params.")
replace_num = len(field_params)
concat_args = []
for i in range(replace_num):
if value_params[i]:
concat_args.append(Value(value_params[i]))
concat_args.append(field_params[i])
if remainder := "".join(value_params[replace_num:]):
concat_args.append(Value(remainder))
return Concat(*concat_args, output_field=TextField())


def replace_null(field: str, replacement: str = ""):
"""Replace null values with a replacement string
Expand Down
4 changes: 2 additions & 2 deletions hawc/apps/epiv2/api.py
Expand Up @@ -40,8 +40,8 @@ def export(self, request, pk):
.published_only(published_only)
.complete()
)
exporter = exports.EpiFlatComplete(qs, filename=f"{assessment}-epi")
return Response(exporter.build_export())
exporter = exports.EpiV2Exporter.flat_export(qs, filename=f"{assessment}-epi")
return Response(exporter)

@action(
detail=True,
Expand Down

0 comments on commit 1f9ad6c

Please sign in to comment.