diff --git a/docs/Release Notes/Notices.md b/docs/Release Notes/Notices.md
index a7f86939a..dca3b11f6 100644
--- a/docs/Release Notes/Notices.md	
+++ b/docs/Release Notes/Notices.md	
@@ -4,6 +4,7 @@ Opteryx uses the following libraries and components:
 
 Component                                                  | Licence
 ---------------------------------------------------------- | -------
+[distogram](https://github.com/maki-nage/distogram)        | [MIT](https://github.com/maki-nage/distogram/blob/master/LICENSE.txt)
 [pyarrow](https://github.com/apache/arrow/)                | [Apache 2.0](https://github.com/apache/arrow/blob/master/LICENSE.txt)
 [sqloxide](https://github.com/wseaton/sqloxide)            | [MIT](https://github.com/wseaton/sqloxide/blob/master/LICENSE)
 [cython](https://github.com/cython/cython)                 | [Apache 2.0](https://github.com/cython/cython/blob/master/LICENSE.txt)
diff --git a/opteryx/engine/planner/operations/show_columns.py b/opteryx/engine/planner/operations/show_columns.py
index 200d70367..235b95564 100644
--- a/opteryx/engine/planner/operations/show_columns.py
+++ b/opteryx/engine/planner/operations/show_columns.py
@@ -21,12 +21,14 @@
 from typing import Iterable
 
 import numpy
+import orjson
 import pyarrow
 
 from opteryx.engine.query_statistics import QueryStatistics
 from opteryx.engine.attribute_types import OPTERYX_TYPES, determine_type
 from opteryx.engine.planner.operations.base_plan_node import BasePlanNode
 from opteryx.exceptions import SqlError
+from opteryx.third_party import distogram
 from opteryx.utils.columns import Columns
 
 
@@ -43,9 +45,213 @@ def myhash(any):
     return CityHash64(str(any))
 
 
+def _simple_collector(page):
+    """
+    Collect the very summary type information only, we read only a single page to do
+    this so it's pretty quick - helpful if you want to know what fields are available
+    programatically.
+    """
+    columns = Columns(page)
+
+    buffer = []
+    for column in page.column_names:
+        column_data = page.column(column)
+        _type = determine_type(str(column_data.type))
+        new_row = {"column_name": columns.get_preferred_name(column), "type": _type}
+        buffer.append(new_row)
+
+    table = pyarrow.Table.from_pylist(buffer)
+    table = Columns.create_table_metadata(
+        table=table,
+        expected_rows=len(buffer),
+        name="show_columns",
+        table_aliases=[],
+    )
+    return table
+
+
+def _full_collector(pages):
+    """
+    Collect basic count information about columns, to do this we read the entire
+    dataset.
+    """
+
+    EMPTY_PROFILE = orjson.dumps(
+        {
+            "column_name": None,
+            "type": [],
+            "count": 0,
+            "min": None,
+            "max": None,
+            "missing": 0,
+        }
+    )
+
+    columns = None
+    profile_collector = {}
+
+    for page in pages:
+        if columns is None:
+            columns = Columns(page)
+
+        for column in page.column_names:
+            column_data = page.column(column)
+            profile = profile_collector.get(column, orjson.loads(EMPTY_PROFILE))
+            _type = determine_type(str(column_data.type))
+            if _type not in profile["type"]:
+                profile["type"].append(_type)
+
+            profile["count"] += len(column_data)
+            profile["missing"] += column_data.null_count
+
+            if _type == OPTERYX_TYPES.NUMERIC:
+                if profile["min"]:
+                    profile["min"] = min(profile["min"], numpy.nanmin(column_data))
+                    profile["max"] = max(profile["max"], numpy.nanmax(column_data))
+                else:
+                    profile["min"] = numpy.nanmin(column_data)
+                    profile["max"] = numpy.nanmax(column_data)
+
+            profile_collector[column] = profile
+
+    buffer = []
+
+    for column, profile in profile_collector.items():
+        profile["column_name"] = columns.get_preferred_name(column)
+        profile["type"] = ", ".join(profile["type"])
+        buffer.append(profile)
+
+    table = pyarrow.Table.from_pylist(buffer)
+    table = Columns.create_table_metadata(
+        table=table,
+        expected_rows=len(buffer),
+        name="show_columns",
+        table_aliases=[],
+    )
+    return table
+
+
+def _extended_collector(pages):
+    """
+    Collect summary statistics about each column
+    """
+
+    EMPTY_PROFILE = orjson.dumps(
+        {
+            "column_name": None,
+            "type": [],
+            "count": 0,
+            "min": None,
+            "max": None,
+            "missing": 0,
+            "mean": None,
+            "quantiles": None,
+            "histogram": None,
+            "unique": 0,
+            "most_frequent_values": None,
+            "most_frequent_counts": None,
+            "distogram": None,
+        }
+    )
+
+    columns = None
+    profile_collector = {}
+
+    for page in pages:
+        if columns is None:
+            columns = Columns(page)
+
+        for column in page.column_names:
+            column_data = page.column(column)
+            profile = profile_collector.get(column, orjson.loads(EMPTY_PROFILE))
+            _type = determine_type(str(column_data.type))
+            if _type not in profile["type"]:
+                profile["type"].append(_type)
+
+            profile["count"] += len(column_data)
+
+            # calculate the missing count more robustly
+            missing = reduce(
+                lambda x, y: x + 1,
+                (i for i in column_data if i in (None, numpy.nan)),
+                0,
+            )
+            profile["missing"] += missing
+
+            if _type in (OPTERYX_TYPES.LIST, OPTERYX_TYPES.STRUCT, OPTERYX_TYPES.OTHER):
+                profile_collector[column] = profile
+                continue
+
+            # convert TIMESTAMP into a NUMERIC (seconds after Linux Epoch)
+            if _type == OPTERYX_TYPES.TIMESTAMP:
+                import datetime
+
+                to_linux_epoch = (
+                    lambda x: numpy.nan
+                    if x.as_py() is None
+                    else datetime.datetime.fromisoformat(
+                        x.as_py().isoformat()
+                    ).timestamp()
+                )
+                column_data = (to_linux_epoch(i) for i in column_data)
+            else:
+                column_data = (i.as_py() for i in column_data)
+
+            # remove empty values
+            column_data = numpy.array(
+                [i for i in column_data if i not in (None, numpy.nan)]
+            )
+
+            if _type in (OPTERYX_TYPES.BOOLEAN, OPTERYX_TYPES.VARCHAR):
+                if profile[""]
+
+            if _type in (OPTERYX_TYPES.NUMERIC, OPTERYX_TYPES.TIMESTAMP):
+                # populate the distogram
+                if profile["distogram"] is None:
+                    dgram = distogram.Distogram(10)
+                else:
+                    dgram = profile["distogram"]
+                values, counts = numpy.unique(column_data, return_counts=True)
+                for index, value in enumerate(values):
+                    dgram = distogram.update(dgram, value=value, count=counts[index])
+                profile["distogram"] = dgram
+
+            profile_collector[column] = profile
+
+    buffer = []
+
+    for column, profile in profile_collector.items():
+        profile["column_name"] = columns.get_preferred_name(column)
+        profile["type"] = ", ".join(profile["type"])
+        dgram = profile.pop("distogram")
+        if dgram:
+            profile["min"], profile["max"] = distogram.bounds(dgram)
+            profile["mean"] = distogram.mean(dgram)
+
+            histogram = distogram.histogram(dgram, bin_count=10)
+            if histogram:
+                profile["histogram"] = histogram[0]
+
+            profile["quantiles"] = (
+                distogram.quantile(dgram, value=0.25),
+                distogram.quantile(dgram, value=0.5),
+                distogram.quantile(dgram, value=0.75),
+            )
+        buffer.append(profile)
+
+    table = pyarrow.Table.from_pylist(buffer)
+    table = Columns.create_table_metadata(
+        table=table,
+        expected_rows=len(buffer),
+        name="show_columns",
+        table_aliases=[],
+    )
+    return table
+
+
 class ShowColumnsNode(BasePlanNode):
     def __init__(self, statistics: QueryStatistics, **config):
-        self._full = (config.get("full"),)
+        self._full = config.get("full")
         self._extended = config.get("extended")
         pass
 
@@ -67,136 +273,18 @@ def execute(self) -> Iterable:
         if data_pages is None:
             return None
 
-        if self._full:
-            dataset = pyarrow.concat_tables(data_pages.execute())
-        else:
-            dataset = next(data_pages.execute())
-
-        source_metadata = Columns(dataset)
-
-        buffer = []
-        for column in dataset.column_names:
-            column_data = dataset.column(column)
-            _type = determine_type(str(column_data.type))
-            new_row = {
-                "column_name": source_metadata.get_preferred_name(column),
-                "type": _type,
-                "nulls": (column_data.null_count) > 0,
-            }
-
-            if self._extended:
-
-                new_row = {
-                    **new_row,
-                    "count": -1,
-                    "min": None,
-                    "max": None,
-                    "mean": None,
-                    "quantiles": None,
-                    "histogram": None,
-                    "unique": -1,
-                    "missing": -1,
-                    "most_frequent_values": None,
-                    "most_frequent_counts": None,
-                }
-
-                # Basic counting statistics
-                new_row["count"] = len(column_data)
-                new_row["missing"] = reduce(
-                    lambda x, y: x + 1,
-                    (i for i in column_data if i in (None, numpy.nan)),
-                    0,
-                )
-                # Number of unique items in the column)
-                # We use hashes because some types don't play nicely
-                values = numpy.unique(
-                    [hash(i) for i in column_data if i not in (None, numpy.nan)]
-                )
-                unique_values = len(values)
-                new_row["unique"] = unique_values
-                del values
-
-                # LISTS and STRUCTS are complex, don't profile them
-                if _type in (OPTERYX_TYPES.LIST, OPTERYX_TYPES.STRUCT):
-                    continue
-
-                # convert TIMESTAMP into a NUMERIC (seconds after Linux Epoch)
-                if _type == OPTERYX_TYPES.TIMESTAMP:
-                    import datetime
-
-                    to_linux_epoch = (
-                        lambda x: numpy.nan
-                        if x.as_py() is None
-                        else datetime.datetime.fromisoformat(
-                            x.as_py().isoformat()
-                        ).timestamp()
-                    )
-                    column_data = (to_linux_epoch(i) for i in column_data)
-                else:
-                    column_data = (i.as_py() for i in column_data)
-
-                # remove empty values
-                column_data = numpy.array(
-                    [i for i in column_data if i not in (None, numpy.nan)]
-                )
-
-                # don't work with long strings
-                if _type == OPTERYX_TYPES.VARCHAR:
-                    if max(len(i) for i in column_data) > 32:
-                        continue
-
-                # For NUMERIC and TIMESTAMPS (now NUMERIC), get min, max, mean,
-                # quantiles and distribution
-                if _type in (
-                    OPTERYX_TYPES.NUMERIC,
-                    OPTERYX_TYPES.TIMESTAMP,
-                ):
-                    new_row["min"] = numpy.min(column_data)
-                    new_row["max"] = numpy.max(column_data)
-
-                    # Python has no practical limits on numbers, but Arrow does
-                    if (
-                        new_row["min"] < -9007199254740992
-                        or new_row["max"] > 9007199254740992
-                    ):
-                        new_row["min"] = None
-                        new_row["max"] = None
-                    else:
-                        new_row["mean"] = numpy.mean(column_data)
-                        new_row["quantiles"] = numpy.percentile(
-                            column_data, [25, 50, 75]
-                        )
-                        new_row["histogram"], boundaries = numpy.histogram(
-                            column_data, min(unique_values, 10)
-                        )
-                        del boundaries
-
-                # Don't work out frequencies for TIMESTAMPS
-                if _type not in (OPTERYX_TYPES.TIMESTAMP) and unique_values < 10:
-                    column_data, counts = numpy.unique(column_data, return_counts=True)
-                    # skip if everything occurs the same number of times
-                    if max(counts) != min(counts):
-                        top_counts = sorted(counts, reverse=True)[0:5]
-                        most_frequent = {
-                            str(v): c
-                            for v, c in zip(column_data, counts)
-                            if c in top_counts
-                        }
-                        new_row["most_frequent_values"] = list(most_frequent.keys())
-                        new_row["most_frequent_counts"] = most_frequent.values()
-                        del most_frequent
-                    del counts
-
-                del column_data
-
-            buffer.append(new_row)
-
-        table = pyarrow.Table.from_pylist(buffer)
-        table = Columns.create_table_metadata(
-            table=table,
-            expected_rows=len(buffer),
-            name="show_columns",
-            table_aliases=[],
-        )
-        yield table
-        return
+        if not (self._full or self._extended):
+            # if it's not full or extended, do just get the list of columns and their
+            # types
+            yield _simple_collector(next(data_pages.execute()))
+            return
+
+        if self._full and not self._extended:
+            # we're going to read the full table, so we can count stuff
+            yield _full_collector(data_pages.execute())
+            return
+
+        if self._extended:
+            # get everything we can reasonable get
+            yield _extended_collector(data_pages.execute())
+            return
diff --git a/opteryx/third_party/distogram/LICENSE.txt b/opteryx/third_party/distogram/LICENSE.txt
new file mode 100644
index 000000000..96b7dfe5d
--- /dev/null
+++ b/opteryx/third_party/distogram/LICENSE.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2020 by Romain Picard
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/opteryx/third_party/distogram/README.rst b/opteryx/third_party/distogram/README.rst
new file mode 100644
index 000000000..71f96f375
--- /dev/null
+++ b/opteryx/third_party/distogram/README.rst
@@ -0,0 +1,152 @@
+==========
+DistoGram
+==========
+
+
+.. image:: https://badge.fury.io/py/distogram.svg
+    :target: https://badge.fury.io/py/distogram
+
+.. image:: https://github.com/maki-nage/distogram/workflows/Python%20package/badge.svg
+    :target: https://github.com/maki-nage/distogram/actions?query=workflow%3A%22Python+package%22
+    :alt: Github WorkFlows
+
+.. image:: https://img.shields.io/codecov/c/github/maki-nage/distogram?style=plastic&color=brightgreen&logo=codecov&style=for-the-badge
+    :target: https://codecov.io/gh/maki-nage/distogram
+    :alt: Coverage
+
+.. image:: https://readthedocs.org/projects/distogram/badge/?version=latest
+    :target: https://distogram.readthedocs.io/en/latest/?badge=latest
+    :alt: Documentation Status
+
+.. image:: https://mybinder.org/badge_logo.svg
+    :target: https://mybinder.org/v2/gh/maki-nage/distogram/master?urlpath=notebooks%2Fexamples%2Fdistogram.ipynb
+
+
+DistoGram is a library that allows to compute histogram on streaming data, in
+distributed environments. The implementation follows the algorithms described in
+Ben-Haim's `Streaming Parallel Decision Trees
+<http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf>`__
+
+Get Started
+============
+
+First create a compressed representation of a distribution:
+
+.. code:: python
+
+    import numpy as np
+    import distogram
+
+    distribution = np.random.normal(size=10000)
+
+    # Create and feed distogram from distribution
+    # on a real usage, data comes from an event stream
+    h = distogram.Distogram()
+    for i in distribution:
+        h = distogram.update(h, i)
+
+
+Compute statistics on the distribution:
+
+.. code:: python
+
+    nmin, nmax = distogram.bounds(h)
+    print("count: {}".format(distogram.count(h)))
+    print("mean: {}".format(distogram.mean(h)))
+    print("stddev: {}".format(distogram.stddev(h)))
+    print("min: {}".format(nmin))
+    print("5%: {}".format(distogram.quantile(h, 0.05)))
+    print("25%: {}".format(distogram.quantile(h, 0.25)))
+    print("50%: {}".format(distogram.quantile(h, 0.50)))
+    print("75%: {}".format(distogram.quantile(h, 0.75)))
+    print("95%: {}".format(distogram.quantile(h, 0.95)))
+    print("max: {}".format(nmax))
+
+
+.. code:: console
+
+    count: 10000
+    mean: -0.005082954640481095
+    stddev: 1.0028524290149186
+    min: -3.5691130319855047
+    5%: -1.6597242392338374
+    25%: -0.6785107421744653
+    50%: -0.008672960012168916
+    75%: 0.6720718926935414
+    95%: 1.6476822301131866
+    max: 3.8800560034877427
+
+Compute and display the histogram of the distribution:
+
+.. code:: python
+
+    hist = distogram.histogram(h)
+    df_hist = pd.DataFrame(np.array(hist), columns=["bin", "count"])
+    fig = px.bar(df_hist, x="bin", y="count", title="distogram")
+    fig.update_layout(height=300)
+    fig.show()
+
+.. image:: docs/normal_histogram.png
+  :scale: 60%
+  :align: center
+
+Install
+========
+
+DistoGram is available on PyPi and can be installed with pip:
+
+.. code:: console
+
+    pip install distogram
+
+
+Play With Me
+============
+
+You can test this library directly on this
+`live notebook <https://mybinder.org/v2/gh/maki-nage/distogram/master?urlpath=notebooks%2Fexamples%2Fdistogram.ipynb>`__.
+
+
+Performances
+=============
+
+Distogram is design for fast updates when using python types. The following
+numbers show the results of the benchmark program located in the examples. 
+
+On a i7-9800X Intel CPU, performances are:
+
+============  ==========  =======  ==========
+Interpreter   Operation   Numpy         Req/s
+============  ==========  =======  ==========
+pypy 7.3      update      no          6563311
+pypy 7.3      update      yes          111318
+CPython 3.7   update      no           436709
+CPython 3.7   update      yes          251603
+============  ==========  =======  ==========
+
+On a modest 2014 13" macbook pro, performances are:
+
+============  ==========  =======  ==========
+Interpreter   Operation   Numpy         Req/s
+============  ==========  =======  ==========
+pypy 7.3      update      no          3572436
+pypy 7.3      update      yes           37630
+CPython 3.7   update      no           112749
+CPython 3.7   update      yes           81005
+============  ==========  =======  ==========
+
+As you can see, your are encouraged to use pypy with python native types. Pypy's
+jit is penalised by numpy native types, causing a huge performance hit. Moreover
+the streaming phylosophy of Distogram is more adapted to python native types
+while numpy is optimized for batch computations, even with CPython.
+
+
+Credits
+========
+
+Although this code has been written by following the aforementioned research
+paper, some parts are also inspired by the implementation from
+`Carson Farmer <https://github.com/carsonfarmer/streamhist>`__.
+
+Thanks to `John Belmonte <https://github.com/belm0>`_ for his help on
+performances and accuracy improvements.
\ No newline at end of file
diff --git a/opteryx/third_party/distogram/__init__.py b/opteryx/third_party/distogram/__init__.py
new file mode 100644
index 000000000..8b5fd8b84
--- /dev/null
+++ b/opteryx/third_party/distogram/__init__.py
@@ -0,0 +1,425 @@
+# type:ignore
+__author__ = """Romain Picard"""
+__email__ = "romain.picard@oakbits.com"
+__version__ = "3.0.0"
+
+import math
+from bisect import bisect_left
+from functools import reduce
+from itertools import accumulate
+from operator import itemgetter
+from typing import List
+from typing import Optional
+from typing import Tuple
+
+EPSILON = 1e-5
+Bin = Tuple[float, int]
+
+
+# bins is a tuple of (cut point, count)
+class Distogram(object):
+    """Compressed representation of a distribution."""
+
+    __slots__ = "bin_count", "bins", "min", "max", "diffs", "min_diff", "weighted_diff"
+
+    def __init__(self, bin_count: int = 100, weighted_diff: bool = False):
+        """Creates a new Distogram object
+
+        Args:
+            bin_count: [Optional] the number of bins to use.
+            weighted_diff: [Optional] Whether to use weighted bin sizes.
+
+        Returns:
+            A Distogram object.
+        """
+        self.bin_count: int = bin_count
+        self.bins: List[Bin] = list()
+        self.min: Optional[float] = None
+        self.max: Optional[float] = None
+        self.diffs: Optional[List[float]] = None
+        self.min_diff: Optional[float] = None
+        self.weighted_diff: bool = weighted_diff
+
+
+def _linspace(start: float, stop: float, num: int) -> List[float]:
+    if num == 1:
+        return [start, stop]
+    step = (stop - start) / float(num)
+    values = [start + step * i for i in range(num)]
+    values.append(stop)
+    return values
+
+
+def _moment(x: List[float], counts: List[float], c: float, n: int) -> float:
+    m = (ci * (v - c) ** n for i, (ci, v) in enumerate(zip(counts, x)))
+    return sum(m) / sum(counts)
+
+
+def _weighted_diff(h: Distogram, left: Bin, right: Bin):
+    diff = left[0] - right[0]
+    if h.weighted_diff is True:
+        diff *= math.log(EPSILON + min(left[1], right[1]))
+    return diff
+
+
+def _update_diffs(h: Distogram, i: int) -> None:
+    if h.diffs is not None:
+        update_min = False
+
+        if i > 0:
+            if h.diffs[i - 1] == h.min_diff:
+                update_min = True
+
+            h.diffs[i - 1] = _weighted_diff(h, h.bins[i], h.bins[i - 1])
+            if h.diffs[i - 1] < h.min_diff:
+                h.min_diff = h.diffs[i - 1]
+
+        if i < len(h.bins) - 1:
+            if h.diffs[i] == h.min_diff:
+                update_min = True
+
+            h.diffs[i] = _weighted_diff(h, h.bins[i + 1], h.bins[i])
+            if h.diffs[i] < h.min_diff:
+                h.min_diff = h.diffs[i]
+
+        if update_min is True:
+            h.min_diff = min(h.diffs)
+
+    return
+
+
+def _trim(h: Distogram) -> Distogram:
+    while len(h.bins) > h.bin_count:
+        if h.diffs is not None:
+            i = h.diffs.index(h.min_diff)
+        else:
+            diffs = [
+                (i - 1, _weighted_diff(h, b, h.bins[i - 1]))
+                for i, b in enumerate(h.bins[1:], start=1)
+            ]
+            i, _ = min(diffs, key=itemgetter(1))
+
+        v1, f1 = h.bins[i]
+        v2, f2 = h.bins[i + 1]
+        h.bins[i] = (v1 * f1 + v2 * f2) / (f1 + f2), f1 + f2
+        del h.bins[i + 1]
+
+        if h.diffs is not None:
+            del h.diffs[i]
+            _update_diffs(h, i)
+            h.min_diff = min(h.diffs)
+
+    return h
+
+
+def _trim_in_place(h: Distogram, value: float, c: int, i: int):
+    v, f = h.bins[i]
+    h.bins[i] = (v * f + value * c) / (f + c), f + c
+    _update_diffs(h, i)
+    return h
+
+
+def _compute_diffs(h: Distogram) -> List[float]:
+    if h.weighted_diff is True:
+        diffs = [
+            (v2 - v1) * math.log(EPSILON + min(f1, f2))
+            for (v1, f1), (v2, f2) in zip(h.bins[:-1], h.bins[1:])
+        ]
+    else:
+        diffs = [v2 - v1 for (v1, _), (v2, _) in zip(h.bins[:-1], h.bins[1:])]
+    h.min_diff = min(diffs)
+
+    return diffs
+
+
+def _search_in_place_index(h: Distogram, new_value: float, index: int) -> int:
+    if h.diffs is None:
+        h.diffs = _compute_diffs(h)
+
+    if index > 0:
+        diff1 = _weighted_diff(h, (new_value, 1), h.bins[index - 1])
+        diff2 = _weighted_diff(h, h.bins[index], (new_value, 1))
+
+        i_bin, diff = (index - 1, diff1) if diff1 < diff2 else (index, diff2)
+
+        return i_bin if diff < h.min_diff else -1
+
+    return -1
+
+
+def update(h: Distogram, value: float, count: int = 1) -> Distogram:
+    """Adds a new element to the distribution.
+
+    Args:
+        h: A Distogram object.
+        value: The value to add on the histogram.
+        count: [Optional] The number of times that value must be added.
+
+    Returns:
+        A Distogram object where value as been processed.
+
+    Raises:
+        ValueError if count is not strictly positive.
+    """
+    if count <= 0:
+        raise ValueError("count must be strictly positive")
+
+    index = 0
+    if len(h.bins) > 0:
+        if value <= h.bins[0][0]:
+            index = 0
+        elif value >= h.bins[-1][0]:
+            index = -1
+        else:
+            index = bisect_left(h.bins, (value, 1))
+
+        vi, fi = h.bins[index]
+        if vi == value:
+            h.bins[index] = (vi, fi + count)
+            return h
+
+    if index > 0 and len(h.bins) >= h.bin_count:
+        in_place_index = _search_in_place_index(h, value, index)
+        if in_place_index > 0:
+            h = _trim_in_place(h, value, count, in_place_index)
+            return h
+
+    if index == -1:
+        h.bins.append((value, count))
+        if h.diffs is not None:
+            diff = _weighted_diff(h, h.bins[-1], h.bins[-2])
+            h.diffs.append(diff)
+            h.min_diff = min(h.min_diff, diff)
+    else:
+        h.bins.insert(index, (value, count))
+        if h.diffs is not None:
+            h.diffs.insert(index, 0)
+            _update_diffs(h, index)
+
+    if (h.min is None) or (h.min > value):
+        h.min = value
+    if (h.max is None) or (h.max < value):
+        h.max = value
+
+    return _trim(h)
+
+
+def merge(h1: Distogram, h2: Distogram) -> Distogram:
+    """Merges two Distogram objects
+
+    Args:
+        h1: First Distogram.
+        h2: Second Distogram.
+
+    Returns:
+        A Distogram object being the composition of h1 and h2. The number of
+        bins in this Distogram is equal to the number of bins in h1.
+    """
+    h = reduce(
+        lambda residual, b: update(residual, *b),
+        h2.bins,
+        h1,
+    )
+    return h
+
+
+def count_at(h: Distogram, value: float):
+    """Counts the number of elements present in the distribution up to value.
+
+    Args:
+        h: A Distogram object.
+        value: The value up to what elements must be counted.
+
+    Returns:
+        An estimation of the real count, computed from the compressed
+        representation of the distribution. Returns None if the Distogram
+        object contains no element or value is outside of the distribution
+        bounds.
+    """
+    if len(h.bins) == 0:
+        return None
+
+    if value < h.min or value > h.max:
+        return None
+
+    if value == h.min:
+        return 0
+
+    if value == h.max:
+        return count(h)
+
+    v0, f0 = h.bins[0]
+    vl, fl = h.bins[-1]
+    if value <= v0:  # left
+        ratio = (value - h.min) / (v0 - h.min)
+        result = ratio * v0 / 2
+    elif value >= vl:  # right
+        ratio = (value - vl) / (h.max - vl)
+        result = (1 + ratio) * fl / 2
+        result += sum((f for _, f in h.bins[:-1]))
+    else:
+        i = sum(((value > v) for v, _ in h.bins)) - 1
+        vi, fi = h.bins[i]
+        vj, fj = h.bins[i + 1]
+
+        mb = fi + (fj - fi) / (vj - vi) * (value - vi)
+        result = (fi + mb) / 2 * (value - vi) / (vj - vi)
+        result += sum((f for _, f in h.bins[:i]))
+
+        result = result + fi / 2
+
+    return result
+
+
+def count(h: Distogram) -> float:
+    """Counts the number of elements in the distribution.
+
+    Args:
+        h: A Distogram object.
+
+    Returns:
+        The number of elements in the distribution.
+    """
+    return sum((f for _, f in h.bins))
+
+
+def bounds(h: Distogram) -> Tuple[float, float]:
+    """Returns the min and max values of the distribution.
+
+    Args:
+        h: A Distogram object.
+
+    Returns:
+        A tuple containing the minimum and maximum values of the distribution.
+    """
+    return h.min, h.max
+
+
+def mean(h: Distogram) -> float:
+    """Returns the mean of the distribution.
+
+    Args:
+        h: A Distogram object.
+
+    Returns:
+        An estimation of the mean of the values in the distribution.
+    """
+    p, m = zip(*h.bins)
+    return _moment(p, m, 0, 1)
+
+
+def variance(h: Distogram) -> float:
+    """Returns the variance of the distribution.
+
+    Args:
+        h: A Distogram object.
+
+    Returns:
+        An estimation of the variance of the values in the distribution.
+    """
+    p, m = zip(*h.bins)
+    return _moment(p, m, mean(h), 2)
+
+
+def stddev(h: Distogram) -> float:
+    """Returns the standard deviation of the distribution.
+
+    Args:
+        h: A Distogram object.
+
+    Returns:
+        An estimation of the standard deviation of the values in the
+        distribution.
+    """
+    return math.sqrt(variance(h))
+
+
+def histogram(h: Distogram, bin_count: int = 100) -> Tuple[List[float], List[float]]:
+    """Returns a histogram of the distribution in numpy format.
+
+    Args:
+        h: A Distogram object.
+        bin_count: [Optional] The number of bins in the histogram.
+
+    Returns:
+        An estimation of the histogram of the distribution, or None
+        if there is not enough items in the distribution.
+    """
+    if len(h.bins) < bin_count:
+        return None
+
+    bin_bounds = _linspace(h.min, h.max, num=bin_count)
+    counts = [count_at(h, e) for e in bin_bounds]
+    counts = [new - last for new, last in zip(counts[1:], counts[:-1])]
+    u = tuple([counts, bin_bounds])
+    return u
+
+
+def frequency_density_distribution(h: Distogram) -> Tuple[List[float], List[float]]:
+    """Returns a histogram of the distribution
+
+    Args:
+        h: A Distogram object.
+
+    Returns:
+        An estimation of the frequency density distribution, or None if
+        there are not enough values in the distribution.
+    """
+
+    if count(h) < 2:
+        return None
+
+    bin_bounds = [float(i[0]) for i in h.bins]
+    bin_widths = [
+        (bin_bounds[i] - bin_bounds[i - 1]) for i in range(1, len(bin_bounds))
+    ]
+    counts = [0]
+    counts.extend([count_at(h, e) for e in bin_bounds[1:]])
+    densities = [
+        (new - last) / delta
+        for new, last, delta in zip(counts[1:], counts[:-1], bin_widths)
+    ]
+    return (densities, bin_bounds)
+
+
+def quantile(h: Distogram, value: float) -> Optional[float]:
+    """Returns a quantile of the distribution
+
+    Args:
+        h: A Distogram object.
+        value: The quantile to compute. Must be between 0 and 1
+
+    Returns:
+        An estimation of the quantile. Returns None if the Distogram
+        object contains no element or value is outside of [0, 1].
+    """
+    if len(h.bins) == 0:
+        return None
+
+    if not (0 <= value <= 1):
+        return None
+
+    total_count = count(h)
+    q_count = int(total_count * value)
+    v0, f0 = h.bins[0]
+    vl, fl = h.bins[-1]
+
+    if q_count <= (f0 / 2):  # left values
+        fraction = q_count / (f0 / 2)
+        result = h.min + (fraction * (v0 - h.min))
+
+    elif q_count >= (total_count - (fl / 2)):  # right values
+        base = q_count - (total_count - (fl / 2))
+        fraction = base / (fl / 2)
+        result = vl + (fraction * (h.max - vl))
+
+    else:
+        mb = q_count - f0 / 2
+        mids = [(fi + fj) / 2 for (_, fi), (_, fj) in zip(h.bins[:-1], h.bins[1:])]
+        i, _ = next(filter(lambda i_f: mb < i_f[1], enumerate(accumulate(mids))))
+
+        (vi, _), (vj, _) = h.bins[i], h.bins[i + 1]
+        fraction = (mb - sum(mids[:i])) / mids[i]
+        result = vi + (fraction * (vj - vi))
+
+    return result
diff --git a/opteryx/third_party/distogram/tests/test_bounds.py b/opteryx/third_party/distogram/tests/test_bounds.py
new file mode 100644
index 000000000..e4efa9c63
--- /dev/null
+++ b/opteryx/third_party/distogram/tests/test_bounds.py
@@ -0,0 +1,18 @@
+import sys
+import os
+
+sys.path.insert(1, os.path.join(sys.path[0], "../.."))
+import distogram
+import random
+
+
+def test_bounds():
+    normal = [random.normalvariate(0.0, 1.0) for _ in range(10000)]
+    h = distogram.Distogram()
+
+    for i in normal:
+        h = distogram.update(h, i)
+
+    dmin, dmax = distogram.bounds(h)
+    assert dmin == min(normal)
+    assert dmax == max(normal)
diff --git a/opteryx/third_party/distogram/tests/test_count.py b/opteryx/third_party/distogram/tests/test_count.py
new file mode 100644
index 000000000..1fd420b4a
--- /dev/null
+++ b/opteryx/third_party/distogram/tests/test_count.py
@@ -0,0 +1,17 @@
+import sys
+import os
+
+sys.path.insert(1, os.path.join(sys.path[0], "../.."))
+import distogram
+
+
+def test_count():
+    h = distogram.Distogram(bin_count=3)
+    assert distogram.count(h) == 0
+
+    h = distogram.update(h, 16, count=4)
+    assert distogram.count(h) == 4
+    h = distogram.update(h, 23, count=3)
+    assert distogram.count(h) == 7
+    h = distogram.update(h, 28, count=5)
+    assert distogram.count(h) == 12
diff --git a/opteryx/third_party/distogram/tests/test_count_at.py b/opteryx/third_party/distogram/tests/test_count_at.py
new file mode 100644
index 000000000..ccdd1d11e
--- /dev/null
+++ b/opteryx/third_party/distogram/tests/test_count_at.py
@@ -0,0 +1,77 @@
+# type:ignore
+import sys
+import os
+
+sys.path.insert(1, os.path.join(sys.path[0], "../.."))
+from pytest import approx
+import random
+import distogram
+
+
+def test_count_at():
+    h = distogram.Distogram(bin_count=3)
+    print(h)
+
+    # fill histogram
+    h = distogram.update(h, 16, count=4)
+    h = distogram.update(h, 23, count=3)
+    h = distogram.update(h, 28, count=5)
+    print(h)
+
+    actual_result = distogram.count_at(h, 25)
+    assert actual_result == approx(6.859999999)
+
+
+def test_count_at_normal():
+    points = 10000
+    normal = [random.normalvariate(0.0, 1.0) for _ in range(points)]
+    h = distogram.Distogram()
+
+    for i in normal:
+        h = distogram.update(h, i)
+
+    assert distogram.count_at(h, 0) == approx(points / 2, rel=0.05)
+
+
+def test_count_at_not_enough_elements():
+    h = distogram.Distogram()
+
+    h = distogram.update(h, 1)
+    h = distogram.update(h, 2)
+    h = distogram.update(h, 3)
+
+    assert distogram.count_at(h, 2.5) == 2
+
+
+def test_count_at_left():
+    h = distogram.Distogram(bin_count=6)
+
+    for i in [1, 2, 3, 4, 5, 6, 0.7, 1.1]:
+        h = distogram.update(h, i)
+
+    assert distogram.count_at(h, 0.77) == approx(0.14)
+
+
+def test_count_at_right():
+    h = distogram.Distogram(bin_count=6)
+
+    for i in [1, 2, 3, 4, 5, 6, 6.7, 6.1]:
+        h = distogram.update(h, i)
+
+    assert distogram.count_at(h, 6.5) == approx(7.307692307692308)
+
+
+def test_count_at_empty():
+    h = distogram.Distogram()
+
+    assert distogram.count_at(h, 6.5) is None
+
+
+def test_count_at_out_of_bouns():
+    h = distogram.Distogram()
+
+    for i in [1, 2, 3, 4, 5, 6, 6.7, 6.1]:
+        h = distogram.update(h, i)
+
+    assert distogram.count_at(h, 0.2) is None
+    assert distogram.count_at(h, 10) is None
diff --git a/opteryx/third_party/distogram/tests/test_histogram.py b/opteryx/third_party/distogram/tests/test_histogram.py
new file mode 100644
index 000000000..83144f5fe
--- /dev/null
+++ b/opteryx/third_party/distogram/tests/test_histogram.py
@@ -0,0 +1,50 @@
+# type:ignore
+import sys
+import os
+
+sys.path.insert(1, os.path.join(sys.path[0], "../.."))
+import random
+import numpy as np
+from pytest import approx
+import distogram
+
+
+def test_histogram():
+    normal = [random.normalvariate(0.0, 1.0) for _ in range(10000)]
+    h = distogram.Distogram(bin_count=64)
+
+    for i in normal:
+        h = distogram.update(h, i)
+
+    np_values, np_edges = np.histogram(normal, 10)
+    d_values, d_edges = distogram.histogram(h, 10)
+
+    h = distogram.Distogram(bin_count=3)
+    h = distogram.update(h, 23)
+    h = distogram.update(h, 28)
+    h = distogram.update(h, 16)
+    assert distogram.histogram(h, bin_count=3) == (
+        approx([1.0714285714285714, 0.6285714285714286, 1.3]),
+        [16.0, 20.0, 24.0, 28],
+    )
+    assert sum(distogram.histogram(h, bin_count=3)[0]) == approx(3.0)
+
+
+def test_histogram_on_too_small_distribution():
+    h = distogram.Distogram(bin_count=64)
+
+    for i in range(5):
+        h = distogram.update(h, i)
+
+    assert distogram.histogram(h, 10) == None
+
+
+def test_format_histogram():
+    bin_count = 4
+    h = distogram.Distogram(bin_count=bin_count)
+
+    for i in range(4):
+        h = distogram.update(h, i)
+
+    hist = distogram.histogram(h, bin_count=bin_count)
+    assert len(hist[1]) == len(hist[0]) + 1
diff --git a/opteryx/third_party/distogram/tests/test_quantile.py b/opteryx/third_party/distogram/tests/test_quantile.py
new file mode 100644
index 000000000..8e576c282
--- /dev/null
+++ b/opteryx/third_party/distogram/tests/test_quantile.py
@@ -0,0 +1,79 @@
+# type:ignore
+import sys
+import os
+
+sys.path.insert(1, os.path.join(sys.path[0], "../.."))
+from pytest import approx
+import distogram
+
+import numpy as np
+import random
+
+
+def test_quantile():
+    h = distogram.Distogram(bin_count=3)
+    h = distogram.update(h, 16, count=4)
+    h = distogram.update(h, 23, count=3)
+    h = distogram.update(h, 28, count=5)
+
+    assert distogram.quantile(h, 0.5) == approx(23.625)
+
+
+def test_quantile_not_enough_elemnts():
+    h = distogram.Distogram(bin_count=10)
+
+    for i in [12.3, 5.4, 8.2, 100.53, 23.5, 13.98]:
+        h = distogram.update(h, i)
+
+    assert distogram.quantile(h, 0.5) == approx(13.14)
+
+
+def test_quantile_on_left():
+    h = distogram.Distogram(bin_count=6)
+
+    data = [12.3, 5.2, 5.4, 4.9, 5.5, 5.6, 8.2, 30.53, 23.5, 13.98]
+    for i in data:
+        h = distogram.update(h, i)
+
+    assert distogram.quantile(h, 0.01) == approx(np.quantile(data, 0.01), rel=0.01)
+    assert distogram.quantile(h, 0.05) == approx(np.quantile(data, 0.05), rel=0.05)
+    assert distogram.quantile(h, 0.25) == approx(np.quantile(data, 0.25), rel=0.05)
+
+
+def test_quantile_on_right():
+    h = distogram.Distogram(bin_count=6)
+
+    data = [12.3, 8.2, 100.53, 23.5, 13.98, 200, 200.2, 200.8, 200.4, 200.1]
+    for i in data:
+        h = distogram.update(h, i)
+
+    assert distogram.quantile(h, 0.99) == approx(np.quantile(data, 0.99), rel=0.01)
+    assert distogram.quantile(h, 0.85) == approx(np.quantile(data, 0.85), rel=0.01)
+
+
+def test_normal():
+    # normal = np.random.normal(0,1, 1000)
+    normal = [random.normalvariate(0.0, 1.0) for _ in range(10000)]
+    h = distogram.Distogram(bin_count=64)
+
+    for i in normal:
+        h = distogram.update(h, i)
+
+    assert distogram.quantile(h, 0.5) == approx(np.quantile(normal, 0.5), abs=0.2)
+    assert distogram.quantile(h, 0.95) == approx(np.quantile(normal, 0.95), abs=0.2)
+
+
+def test_quantile_empty():
+    h = distogram.Distogram()
+
+    assert distogram.quantile(h, 0.3) is None
+
+
+def test_quantile_out_of_bouns():
+    h = distogram.Distogram()
+
+    for i in [1, 2, 3, 4, 5, 6, 6.7, 6.1]:
+        h = distogram.update(h, i)
+
+    assert distogram.quantile(h, -0.2) is None
+    assert distogram.quantile(h, 10) is None
diff --git a/opteryx/third_party/distogram/tests/test_stats.py b/opteryx/third_party/distogram/tests/test_stats.py
new file mode 100644
index 000000000..3a4434eab
--- /dev/null
+++ b/opteryx/third_party/distogram/tests/test_stats.py
@@ -0,0 +1,22 @@
+# type:ignore
+import sys
+import os
+
+sys.path.insert(1, os.path.join(sys.path[0], "../.."))
+from pytest import approx
+import distogram
+
+import numpy as np
+import random
+
+
+def test_stats():
+    normal = [random.normalvariate(0.0, 1.0) for _ in range(10000)]
+    h = distogram.Distogram()
+
+    for i in normal:
+        h = distogram.update(h, i)
+
+    assert distogram.mean(h) == approx(np.mean(normal), abs=0.1)
+    assert distogram.variance(h) == approx(np.var(normal), abs=0.1)
+    assert distogram.stddev(h) == approx(np.std(normal), abs=0.1)
diff --git a/opteryx/third_party/distogram/tests/test_update.py b/opteryx/third_party/distogram/tests/test_update.py
new file mode 100644
index 000000000..bec641e8b
--- /dev/null
+++ b/opteryx/third_party/distogram/tests/test_update.py
@@ -0,0 +1,42 @@
+# type:ignore
+import sys
+import os
+
+sys.path.insert(1, os.path.join(sys.path[0], "../.."))
+import pytest
+from pytest import approx
+import distogram
+
+
+def test_update():
+    h = distogram.Distogram(bin_count=3)
+
+    # fill histogram
+    h = distogram.update(h, 23)
+    assert h.bins == [(23, 1)]
+    h = distogram.update(h, 28)
+    assert h.bins == [(23, 1), (28, 1)]
+    h = distogram.update(h, 16)
+    assert h.bins == [(16, 1), (23, 1), (28, 1)]
+
+    # update count on existing value
+    h = distogram.update(h, 23)
+    assert h.bins == [(16, 1), (23, 2), (28, 1)]
+    h = distogram.update(h, 28)
+    assert h.bins == [(16, 1), (23, 2), (28, 2)]
+    h = distogram.update(h, 16)
+    assert h.bins == [(16, 2), (23, 2), (28, 2)]
+
+    # merge values
+    h = distogram.update(h, 26)
+    assert h.bins[0] == (16, 2)
+    assert h.bins[1] == (23, 2)
+    assert h.bins[2][0] == approx(27.33333)
+    assert h.bins[2][1] == 3
+
+
+def test_update_with_invalid_count():
+    h = distogram.Distogram(bin_count=3)
+
+    with pytest.raises(ValueError):
+        distogram.update(h, 23, count=0)