Skip to content

Commit

Permalink
Merge bdcad96 into a0ee1b3
Browse files Browse the repository at this point in the history
  • Loading branch information
twheys authored Aug 6, 2018
2 parents a0ee1b3 + bdcad96 commit 90561a2
Show file tree
Hide file tree
Showing 15 changed files with 1,684 additions and 90 deletions.
2 changes: 2 additions & 0 deletions fireant/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

INFINITY = "Infinity"
NULL_VALUE = 'null'
TOTALS_VALUE = 'totals'
RAW_VALUE = 'raw'

NO_TIME = time(0)

Expand Down
2 changes: 1 addition & 1 deletion fireant/slicer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self, key, label=None, definition=None, display_definition=None):
used for querying labels.
"""
self.key = key
self.label = label or key
self.label = label if label is not None else key
self.definition = definition

self.display_definition = display_definition
Expand Down
2 changes: 1 addition & 1 deletion fireant/slicer/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _apply(self, dimension_definition, patterns):
return definition

def __init__(self, dimension_definition, pattern, *patterns):
definition = self._apply(dimension_definition, [pattern, *patterns])
definition = self._apply(dimension_definition, (pattern,) + patterns)
super(PatternFilter, self).__init__(definition)


Expand Down
42 changes: 26 additions & 16 deletions fireant/slicer/queries/database.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import time
from functools import partial
from typing import Iterable

import pandas as pd

from fireant.database.base import Database
from fireant.formats import NULL_VALUE
from fireant.formats import (
NULL_VALUE,
TOTALS_VALUE,
)
from fireant.utils import format_dimension_key
from .logger import (
query_logger,
Expand Down Expand Up @@ -65,7 +69,7 @@ def clean_and_apply_index(data_frame: pd.DataFrame, dimensions: Iterable[Dimensi
continue

level = format_dimension_key(dimension.key)
data_frame[level] = fill_nans_in_level(data_frame, dimension, dimension_keys[:i]) \
data_frame[level] = fill_nans_in_level(data_frame, dimensions[:i + 1]) \
.apply(
# Handles an annoying case of pandas in which the ENTIRE data frame gets converted from int to float if
# the are NaNs, even if there are no NaNs in the column :/
Expand All @@ -76,7 +80,7 @@ def clean_and_apply_index(data_frame: pd.DataFrame, dimensions: Iterable[Dimensi
return data_frame.set_index(dimension_keys)


def fill_nans_in_level(data_frame, dimension, preceding_dimension_keys):
def fill_nans_in_level(data_frame, dimensions):
"""
In case there are NaN values representing both totals (from ROLLUP) and database nulls, we need to replace the real
nulls with an empty string in order to distinguish between them. We choose to replace the actual database nulls
Expand All @@ -85,33 +89,39 @@ def fill_nans_in_level(data_frame, dimension, preceding_dimension_keys):
:param data_frame:
The data_frame we are replacing values in.
:param dimension:
The level of the data frame to replace nulls in. This function should be called once per non-conitnuous
dimension, in the order of the dimensions.
:param preceding_dimension_keys:
:param dimensions:
A list of dimensions with the last item in the list being the dimension to fill nans for. This function requires
the dimension being processed as well as the preceding dimensions since a roll up in a higher level dimension
results in nulls for lower level dimension.
:return:
The level in the data_frame with the nulls replaced with empty string
"""
level = format_dimension_key(dimension.key)
level = format_dimension_key(dimensions[-1].key)

number_rollup_dimensions = sum(dimension.is_rollup for dimension in dimensions)
if 0 < number_rollup_dimensions:
fill_nan_for_nulls = partial(_fill_nan_for_nulls, offset=number_rollup_dimensions)

if 1 < len(dimensions):
preceding_dimension_keys = [format_dimension_key(d.key)
for d in dimensions[:-1]]

if dimension.is_rollup:
if preceding_dimension_keys:
return (data_frame
.groupby(preceding_dimension_keys)[level]
.apply(_fill_nan_for_nulls))
.apply(fill_nan_for_nulls))

return _fill_nan_for_nulls(data_frame[level])
return fill_nan_for_nulls(data_frame[level])

return data_frame[level].fillna(NULL_VALUE)


def _fill_nan_for_nulls(df):
def _fill_nan_for_nulls(df, offset=1):
"""
Fills the first NaN with a literal string "null" if there are two NaN values, otherwise nothing is filled.
:param df:
:return:
"""
if 1 < pd.isnull(df).sum():
return df.fillna(NULL_VALUE, limit=1)
return df
if offset < pd.isnull(df).sum():
return df.fillna(NULL_VALUE, limit=1).fillna(TOTALS_VALUE)
return df.fillna(TOTALS_VALUE)
1 change: 1 addition & 0 deletions fireant/slicer/widgets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .base import Widget
from .csv import CSV
from .datatables import DataTablesJS
from .reacttable import ReactTable
from .highcharts import HighCharts
from .matplotlib import Matplotlib
from .pandas import Pandas
10 changes: 7 additions & 3 deletions fireant/slicer/widgets/base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from fireant import Metric
from typing import Union

from fireant import (
Metric,
Operation,
)
from fireant.slicer.exceptions import MetricRequiredException
from fireant.utils import immutable
from ..operations import Operation


class Widget:
def __init__(self, *items: Metric):
def __init__(self, *items: Union[Metric, Operation]):
self.items = list(items)

@immutable
Expand Down
1 change: 1 addition & 0 deletions fireant/slicer/widgets/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
class CSV(Pandas):
def transform(self, data_frame, slicer, dimensions, references):
result_df = super(CSV, self).transform(data_frame, slicer, dimensions, references)
result_df.columns.names = [None]
return result_df.to_csv()
52 changes: 43 additions & 9 deletions fireant/slicer/widgets/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@


class Pandas(TransformableWidget):
def __init__(self, metric, *metrics: Metric, pivot=False, max_columns=None):
def __init__(self, metric, *metrics: Metric, pivot=(), transpose=False, max_columns=None):
super(Pandas, self).__init__(metric, *metrics)
self.pivot = pivot
self.transpose = transpose
self.max_columns = min(max_columns, HARD_MAX_COLUMNS) \
if max_columns is not None \
else HARD_MAX_COLUMNS
Expand Down Expand Up @@ -71,15 +72,46 @@ def transform(self, data_frame, slicer, dimensions, references):
result.index.names = [dimension.label or dimension.key
for dimension in dimensions]

result.columns = [reference_label(item, reference)
for reference in [None] + references
for item in self.items]
result.columns = pd.Index([reference_label(item, reference)
for item in self.items
for reference in [None] + references],
name='Metrics')

if not self.pivot:
return result
return self.pivot_data_frame(result, [d.label or d.key for d in self.pivot], self.transpose)

@staticmethod
def pivot_data_frame(data_frame, pivot=(), transpose=False):
"""
Pivot and transpose the data frame. Dimensions including in the `pivot` arg will be unshifted to columns. If
`transpose` is True the data frame will be transposed. If there is only index level in the data frame (ie. one
dimension), and that dimension is pivoted, then the data frame will just be transposed. If there is a single
metric in the data frame and at least one dimension pivoted, the metrics column level will be dropped for
simplicity.
:param data_frame:
:param pivot:
:param transpose:
:return:
"""
if not (pivot or transpose):
return data_frame

# NOTE: Don't pivot a single dimension data frame. This turns the data frame into a series and pivots the
# metrics anyway. Instead, transpose the data frame.
should_transpose_instead_of_pivot = len(pivot) == len(data_frame.index.names)

if pivot and not should_transpose_instead_of_pivot:
data_frame = data_frame.unstack(level=pivot)

if transpose or should_transpose_instead_of_pivot:
data_frame = data_frame.transpose()

# If there are more than one column levels and the last level is a single metric, drop the level
if isinstance(data_frame.columns, pd.MultiIndex) and 1 == len(data_frame.columns.levels[0]):
data_frame.name = data_frame.columns.levels[0][0] # capture the name of the metrics column
data_frame.columns = data_frame.columns.droplevel(0) # drop the metrics level

pivot_levels = result.index.names[1:]
return result.unstack(level=pivot_levels)
return data_frame.fillna('')

def _replace_display_values_in_index(self, dimension, result):
"""
Expand All @@ -89,7 +121,9 @@ def _replace_display_values_in_index(self, dimension, result):
df_key = format_dimension_key(dimension.key)
values = [dimension.display_values.get(x, x)
for x in result.index.get_level_values(df_key)]
result.index.set_levels(level=df_key, levels=values)
result.index.set_levels(level=df_key,
levels=values,
inplace=True)
return result

values = [dimension.display_values.get(x, x)
Expand Down
Loading

0 comments on commit 90561a2

Please sign in to comment.