Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rewrite epiv2 exports #870

Merged
merged 16 commits into from Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
199 changes: 199 additions & 0 deletions hawc/apps/common/exports.py
@@ -0,0 +1,199 @@
import pandas as pd
from django.db.models import QuerySet


class ModelExport:
"""Model level export module for use in Exporter class."""

def __init__(
self,
key_prefix: str = "",
query_prefix: str = "",
include: tuple | None = None,
exclude: tuple | None = None,
):
self.key_prefix = key_prefix + "-" if key_prefix else key_prefix
self.query_prefix = query_prefix + "__" if query_prefix else query_prefix
self.include = (key_prefix + field for field in include) if include else tuple()
self.exclude = (key_prefix + field for field in exclude) if exclude else tuple()

@property
def value_map(self) -> dict:
"""Value map of column names to ORM field names.

This caches the result from get_value_map and applies any prefixes
to the column names and ORM field names. It is also filtered down
in compliance with any include/exclude parameters.

Returns:
dict: Value map
"""
if hasattr(self, "_value_map"):
return self._value_map

value_map = self.get_value_map()
# add key prefix
if self.key_prefix:
value_map = {self.key_prefix + k: v for k, v in value_map.items()}
# add query prefix
if self.query_prefix:
value_map = {k: self.query_prefix + v for k, v in value_map.items()}
# handle any includes
if self.include:
value_map = {k: v for k, v in value_map.items() if k in self.include}
# handle any excludes
if self.exclude:
value_map = {k: v for k, v in value_map.items() if k not in self.exclude}

self._value_map = value_map
return self._value_map

@property
def annotation_map(self) -> dict:
"""Annotation map of annotated names to ORM expressions.

This caches the result from get_annotation_map and applies any
query_prefix to the annotated names. It is also filtered down
in compliance with any include/exclude parameters.

Returns:
dict: Annotation map
"""
if hasattr(self, "_annotation_map"):
return self._annotation_map

annotation_map = self.get_annotation_map(self.query_prefix)
# add query prefix
if self.query_prefix:
annotation_map = {self.query_prefix + k: v for k, v in annotation_map.items()}
# handle any includes/excludes
if self.include or self.exclude:
annotation_map = {
k: v for k, v in annotation_map.items() if k in self.value_map.values()
}

self._annotation_map = annotation_map
return self._annotation_map

def get_value_map(self) -> dict:
"""Value map of column names to ORM field names.

This should be overridden by any subclass where applicable.
Prefixes and include/exclude should not be handled in this method;
they are handled by the value_map property.

Returns:
dict: Value map
"""
return {}

def get_annotation_map(self, query_prefix: str) -> dict:
"""Annotation map of annotated names to ORM expressions.

This should be overridden by any subclass where applicable.
query_prefix for the annotated names and any include/exclude parameters
are handled by the annotation_map property.
query_prefix should still be used in the custom ORM expression
values though, since there is no way to apply that through the
annotation_map property.

Returns:
dict: Annotation map
"""
return {}

def get_column_name(self, name: str) -> str:
"""Get column name with key_prefix applied.

Args:
name (str): Column name

Returns:
str: Column name with prefix
"""
return f"{self.key_prefix}{name}"

def prepare_qs(self, qs: QuerySet) -> QuerySet:
"""Prepare the queryset for export.

This includes applying any annotations if they exist.

Args:
qs (QuerySet): Queryset to prepare

Returns:
QuerySet: Prepared queryset
"""
if self.annotation_map:
return qs.annotate(**self.annotation_map)
return qs

def prepare_df(self, df: pd.DataFrame) -> pd.DataFrame:
"""Prepare the dataframe for export.

This should be overridden by any subclass where applicable.
Any data manipulations that couldn't be done by the ORM
should be done in this method.

Args:
df (pd.DataFrame): Dataframe to manipulate

Returns:
pd.DataFrame: Manipulated dataframe
"""
return df

def get_df(self, qs: QuerySet) -> pd.DataFrame:
"""Get dataframe export from queryset.

Args:
qs (QuerySet): Queryset

Returns:
pd.DataFrame: Dataframe
"""
qs = self.prepare_qs(qs)
df = pd.DataFrame(
data=qs.values_list(*self.value_map.values()), columns=list(self.value_map.keys())
)
return self.prepare_df(df)


class Exporter:
rabstejnek marked this conversation as resolved.
Show resolved Hide resolved
"""Data export for querysets.

This class runs multiple ModelExports on a queryset
and outputs a dataframe through the get_df method.
"""

def build_modules(self) -> list[ModelExport]:
"""ModelExport instances to use for exporter.

This should be overridden by any subclass.
A key_prefix and query_prefix should be given to
each ModelExport so that the column names don't clash
and the ORM correctly navigates relationships.

Returns:
list[ModelExport]: List of ModelExports to build export with
"""
raise NotImplementedError()

def get_df(self, qs: QuerySet) -> pd.DataFrame:
"""Get dataframe export from queryset.

Args:
qs (QuerySet): Queryset

Returns:
pd.DataFrame: Dataframe
"""
self._modules = self.build_modules()
for module in self._modules:
qs = module.prepare_qs(qs)
values = [value for module in self._modules for value in module.value_map.values()]
keys = [key for module in self._modules for key in module.value_map.keys()]
df = pd.DataFrame(data=qs.values_list(*values), columns=keys)
for module in self._modules:
df = module.prepare_df(df)
return df
24 changes: 22 additions & 2 deletions hawc/apps/common/models.py
Expand Up @@ -10,8 +10,8 @@
from django.core.exceptions import ObjectDoesNotExist, SuspiciousOperation
from django.core.files.storage import FileSystemStorage
from django.db import IntegrityError, connection, models, router, transaction
from django.db.models import Case, CharField, Choices, Q, QuerySet, URLField, Value, When
from django.db.models.functions import Coalesce
from django.db.models import Case, CharField, Choices, Q, QuerySet, TextField, URLField, Value, When
from django.db.models.functions import Coalesce, Concat
from django.template.defaultfilters import slugify as default_slugify
from django.utils.html import strip_tags
from treebeard.mp_tree import MP_Node
Expand Down Expand Up @@ -534,6 +534,26 @@ def sql_display(name: str, Choice: type[Choices]) -> Case:
)


def sql_format(format_str: str, *field_params) -> Concat:
"""Create an ORM expression to simulate a format string.

Args:
format_str (str): Format string. Any {} present in the string
will be replaced by field_params.

Returns:
Concat: An expression that generates a string
"""
value_params = format_str.split("{}")
replace_num = min(len(value_params) - 1, len(field_params))
concat_args = []
for i in range(replace_num):
concat_args.append(Value(value_params[i]))
concat_args.append(field_params[i])
concat_args.append(Value("".join(value_params[replace_num:])))
return Concat(*concat_args, output_field=TextField())


def replace_null(field: str, replacement: str = ""):
"""Replace null values with a replacement string

Expand Down