Merge 4cc80be into a0ee1b3

kayak · Aug 6, 2018 · cc9770d · cc9770d
2 parents a0ee1b3 + 4cc80be
commit cc9770d
Show file tree

Hide file tree

Showing 14 changed files with 1,683 additions and 89 deletions.
diff --git a/fireant/formats.py b/fireant/formats.py
@@ -10,6 +10,8 @@
 
 INFINITY = "Infinity"
 NULL_VALUE = 'null'
+TOTALS_VALUE = 'totals'
+RAW_VALUE = 'raw'
 
 NO_TIME = time(0)
 

diff --git a/fireant/slicer/base.py b/fireant/slicer/base.py
@@ -19,7 +19,7 @@ def __init__(self, key, label=None, definition=None, display_definition=None):
             used for querying labels.
         """
         self.key = key
-        self.label = label or key
+        self.label = label if label is not None else key
         self.definition = definition
 
         self.display_definition = display_definition

diff --git a/fireant/slicer/queries/database.py b/fireant/slicer/queries/database.py
@@ -1,10 +1,14 @@
 import time
+from functools import partial
 from typing import Iterable
 
 import pandas as pd
 
 from fireant.database.base import Database
-from fireant.formats import NULL_VALUE
+from fireant.formats import (
+    NULL_VALUE,
+    TOTALS_VALUE,
+)
 from fireant.utils import format_dimension_key
 from .logger import (
     query_logger,
@@ -65,7 +69,7 @@ def clean_and_apply_index(data_frame: pd.DataFrame, dimensions: Iterable[Dimensi
             continue
 
         level = format_dimension_key(dimension.key)
-        data_frame[level] = fill_nans_in_level(data_frame, dimension, dimension_keys[:i]) \
+        data_frame[level] = fill_nans_in_level(data_frame, dimensions[:i + 1]) \
             .apply(
               # Handles an annoying case of pandas in which the ENTIRE data frame gets converted from int to float if
               # the are NaNs, even if there are no NaNs in the column :/
@@ -76,7 +80,7 @@ def clean_and_apply_index(data_frame: pd.DataFrame, dimensions: Iterable[Dimensi
     return data_frame.set_index(dimension_keys)
 
 
-def fill_nans_in_level(data_frame, dimension, preceding_dimension_keys):
+def fill_nans_in_level(data_frame, dimensions):
     """
     In case there are NaN values representing both totals (from ROLLUP) and database nulls, we need to replace the real
     nulls with an empty string in order to distinguish between them.  We choose to replace the actual database nulls
@@ -85,33 +89,39 @@ def fill_nans_in_level(data_frame, dimension, preceding_dimension_keys):
 
     :param data_frame:
         The data_frame we are replacing values in.
-    :param dimension:
-        The level of the data frame to replace nulls in. This function should be called once per non-conitnuous
-        dimension, in the order of the dimensions.
-    :param preceding_dimension_keys:
+    :param dimensions:
+        A list of dimensions with the last item in the list being the dimension to fill nans for. This function requires
+        the dimension being processed as well as the preceding dimensions since a roll up in a higher level dimension
+        results in nulls for lower level dimension.
     :return:
         The level in the data_frame with the nulls replaced with empty string
     """
-    level = format_dimension_key(dimension.key)
+    level = format_dimension_key(dimensions[-1].key)
+
+    number_rollup_dimensions = sum(dimension.is_rollup for dimension in dimensions)
+    if 0 < number_rollup_dimensions:
+        fill_nan_for_nulls = partial(_fill_nan_for_nulls, offset=number_rollup_dimensions)
+
+        if 1 < len(dimensions):
+            preceding_dimension_keys = [format_dimension_key(d.key)
+                                        for d in dimensions[:-1]]
 
-    if dimension.is_rollup:
-        if preceding_dimension_keys:
             return (data_frame
                     .groupby(preceding_dimension_keys)[level]
-                    .apply(_fill_nan_for_nulls))
+                    .apply(fill_nan_for_nulls))
 
-        return _fill_nan_for_nulls(data_frame[level])
+        return fill_nan_for_nulls(data_frame[level])
 
     return data_frame[level].fillna(NULL_VALUE)
 
 
-def _fill_nan_for_nulls(df):
+def _fill_nan_for_nulls(df, offset=1):
     """
     Fills the first NaN with a literal string "null" if there are two NaN values, otherwise nothing is filled.
 
     :param df:
     :return:
     """
-    if 1 < pd.isnull(df).sum():
-        return df.fillna(NULL_VALUE, limit=1)
-    return df
+    if offset < pd.isnull(df).sum():
+        return df.fillna(NULL_VALUE, limit=1).fillna(TOTALS_VALUE)
+    return df.fillna(TOTALS_VALUE)
diff --git a/fireant/slicer/widgets/__init__.py b/fireant/slicer/widgets/__init__.py
@@ -1,6 +1,7 @@
 from .base import Widget
 from .csv import CSV
 from .datatables import DataTablesJS
+from .reacttable import ReactTable
 from .highcharts import HighCharts
 from .matplotlib import Matplotlib
 from .pandas import Pandas
diff --git a/fireant/slicer/widgets/base.py b/fireant/slicer/widgets/base.py
@@ -1,11 +1,15 @@
-from fireant import Metric
+from typing import Union
+
+from fireant import (
+    Metric,
+    Operation,
+)
 from fireant.slicer.exceptions import MetricRequiredException
 from fireant.utils import immutable
-from ..operations import Operation
 
 
 class Widget:
-    def __init__(self, *items: Metric):
+    def __init__(self, *items: Union[Metric, Operation]):
         self.items = list(items)
 
     @immutable

diff --git a/fireant/slicer/widgets/csv.py b/fireant/slicer/widgets/csv.py
@@ -4,4 +4,5 @@
 class CSV(Pandas):
     def transform(self, data_frame, slicer, dimensions, references):
         result_df = super(CSV, self).transform(data_frame, slicer, dimensions, references)
+        result_df.columns.names = [None]
         return result_df.to_csv()
diff --git a/fireant/slicer/widgets/pandas.py b/fireant/slicer/widgets/pandas.py
@@ -18,9 +18,10 @@
 
 
 class Pandas(TransformableWidget):
-    def __init__(self, metric, *metrics: Metric, pivot=False, max_columns=None):
+    def __init__(self, metric, *metrics: Metric, pivot=(), transpose=False, max_columns=None):
         super(Pandas, self).__init__(metric, *metrics)
         self.pivot = pivot
+        self.transpose = transpose
         self.max_columns = min(max_columns, HARD_MAX_COLUMNS) \
             if max_columns is not None \
             else HARD_MAX_COLUMNS
@@ -71,15 +72,46 @@ def transform(self, data_frame, slicer, dimensions, references):
             result.index.names = [dimension.label or dimension.key
                                   for dimension in dimensions]
 
-        result.columns = [reference_label(item, reference)
-                          for reference in [None] + references
-                          for item in self.items]
+        result.columns = pd.Index([reference_label(item, reference)
+                                   for item in self.items
+                                   for reference in [None] + references],
+                                  name='Metrics')
 
-        if not self.pivot:
-            return result
+        return self.pivot_data_frame(result, [d.label or d.key for d in self.pivot], self.transpose)
+
+    @staticmethod
+    def pivot_data_frame(data_frame, pivot=(), transpose=False):
+        """
+        Pivot and transpose the data frame. Dimensions including in the `pivot` arg will be unshifted to columns. If
+        `transpose` is True the data frame will be transposed. If there is only index level in the data frame (ie. one
+        dimension), and that dimension is pivoted, then the data frame will just be transposed. If there is a single
+        metric in the data frame and at least one dimension pivoted, the metrics column level will be dropped for
+        simplicity.
+
+        :param data_frame:
+        :param pivot:
+        :param transpose:
+        :return:
+        """
+        if not (pivot or transpose):
+            return data_frame
+
+        # NOTE: Don't pivot a single dimension data frame. This turns the data frame into a series and pivots the
+        # metrics anyway. Instead, transpose the data frame.
+        should_transpose_instead_of_pivot = len(pivot) == len(data_frame.index.names)
+
+        if pivot and not should_transpose_instead_of_pivot:
+            data_frame = data_frame.unstack(level=pivot)
+
+        if transpose or should_transpose_instead_of_pivot:
+            data_frame = data_frame.transpose()
+
+        # If there are more than one column levels and the last level is a single metric, drop the level
+        if isinstance(data_frame.columns, pd.MultiIndex) and 1 == len(data_frame.columns.levels[0]):
+            data_frame.name = data_frame.columns.levels[0][0]  # capture the name of the metrics column
+            data_frame.columns = data_frame.columns.droplevel(0)  # drop the metrics level
 
-        pivot_levels = result.index.names[1:]
-        return result.unstack(level=pivot_levels)
+        return data_frame.fillna('')
 
     def _replace_display_values_in_index(self, dimension, result):
         """
@@ -89,7 +121,9 @@ def _replace_display_values_in_index(self, dimension, result):
             df_key = format_dimension_key(dimension.key)
             values = [dimension.display_values.get(x, x)
                       for x in result.index.get_level_values(df_key)]
-            result.index.set_levels(level=df_key, levels=values)
+            result.index.set_levels(level=df_key,
+                                    levels=values,
+                                    inplace=True)
             return result
 
         values = [dimension.display_values.get(x, x)