Skip to content

Integration with Datashader issues (on_change, layout.image updates) #1207

@pedroallenrevez

Description

@pedroallenrevez

First and foremost, awesome job with the tool, it looks terrific.

Jumping right in on the issue, I'm trying to integrate plotly with datashader, following mostly this article right here .

So, as far as I know there are two solutions to this problem:

  1. You keep updating the image itself (see update_image function below);
  2. You keep updating the data, aka doing the aggregation step again (see update_timeseries function below);

The two solutions produce different problems.
The first solution (updating the image), seems like not all points are displayed, being cutoff at a certain range. You can see it here:

screen shot 2018-09-28 at 17 04 48

As for the second solution (updating the data) the on_change callback is being called repeatedly, without any changes made to the plot whatsoever. This also happens when autoscaling or resetting axes. You can see it here:

screen shot 2018-09-28 at 17 08 50

The plot is being zoomed in bit by bit (without me doing anything) which calls the on_change over and over.

Below you can use the code that I'm using to reproduce this.

Thanks in advance!

import plotly.graph_objs as go
import pandas as pd
import numpy as np
import datashader as ds
from datetime import datetime
import datashader.transfer_functions as tf
import dask.dataframe as dd
from math import floor

global plot_obj
plot_obj = None

global df
df = None

def generate_df(size=100000):
    d = {    
        'dates' : pd.date_range('2015-01-01', periods=size, freq='1min'),
        'unique_id' : np.arange(0, size),
        'ints' : np.random.randint(0, size, size=size),
        'floats' : np.random.randn(size),
        'bools' : np.random.choice([0, 1], size=size),
        'int_nans' : np.random.choice([0, 1, np.nan], size=size),
        'float_nans' : np.random.choice([0.0, 1.0, np.nan], size=size),
        'constant' : 1, 
        'categorical' : np.random.choice([10, 20, 30, 40, 50], size=size) , 
        'categorical_binary' : np.random.choice(['a', 'b'], size=size), 
        #'categorical_nans' : np.random.choice(['a', 'b', np.nan], size=size)
        'categorical_nans' : np.random.choice(['a', 'b', 'c'], size=size)
    }

    df = pd.DataFrame(d)
    # df['hardbools'] = df['bools'] == 1
    df['categorical_nans'] = df['categorical_nans'].replace('c', np.nan)
    # df['hardcategorical_nans'] = df['categorical_nans'].astype('category')
    df['categorical_binary'] = df['categorical_binary'].astype('category')
    df['categorical_nans'] = df['categorical_nans'].astype('category')

    # df = df.set_index('dates')
    df['dates_int'] = df['dates'].astype('int64')
    globals()['df'] = df
    return df

# ===================================DATA==========================
def update_timeseries(layout, x_range, y_range, plot_width, plot_height):
    print(f"On change")
    
    x_range = [
            int(pd.to_datetime(plot_obj.layout.xaxis.range[0]).timestamp()*1000000000), 
            int(pd.to_datetime(plot_obj.layout.xaxis.range[1]).timestamp()*1000000000)
    ]
    y_range = [
            plot_obj.layout.yaxis.range[0],
            plot_obj.layout.yaxis.range[1]]

    plot_width = floor(plot_width)
    plot_height = floor(plot_height)

    cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)

    agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())

    pq = agg_scatter.to_pandas().transpose()
    a = pq.stack().reset_index()
    q = a.loc[a[a.columns[2]]==True]

    ew = [datetime.fromtimestamp(item/1000000000) for item in q.dates_int.values]

    with plot_obj.batch_update():
        plot_obj.data[0].x = ew
        plot_obj.data[0].y = q.floats.values
        plot_obj.layout.xaxis.range = (ew[0], ew[-1])
        plot_obj.layout.yaxis.range = (q.floats.min(), q.floats.max())

def datashader_data(size=1000000):
    df = generate_df(size=size)

    x_range=[df.head(1).dates.values[0].astype('int64'), df.tail(1).dates.values[0].astype('int64')]
    y_range=[df.floats.min(), df.floats.max()]
    plot_height=300
    plot_width=300

    cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)

    agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())


    agg_scatter = agg_scatter.to_pandas().transpose()
    agg_scatter = agg_scatter.stack().reset_index()
    # select only pixels where data points are present
    agg_scatter = agg_scatter.loc[agg_scatter[agg_scatter.columns[2]]==True]

    # init plot
    d = go.Scatter(
            x=agg_scatter.dates_int.astype('int64').astype('<M8[ns]'), 
            y=agg_scatter.floats)#, mode='markers')
    f = go.FigureWidget(data=[d])

    # add callback
    f.layout.on_change(update_timeseries, 'xaxis.range', 'yaxis.range', 'width', 'height')
    f.layout.dragmode = 'zoom'
    
    globals()['plot_obj'] = f

    return f

# ===============================IMAGE===============================
def gen_ds_image(x_range, y_range, plot_width, plot_height):
    if x_range is None or y_range is None or plot_width is None or plot_height is None:
        return None
    
    cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)
    agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())

    img = tf.shade(agg_scatter)
    img = tf.dynspread(img, threshold=0.95, max_px=5, shape='circle')
    
    return img.to_pil()

def update_ds_image(layout, x_range, y_range, plot_width, plot_height):
    print(f"On change")
    img = plot_obj.layout.images[0]
    
    x_range = [
            int(pd.to_datetime(plot_obj.layout.xaxis.range[0]).timestamp()*1000000000), 
            int(pd.to_datetime(plot_obj.layout.xaxis.range[1]).timestamp()*1000000000)]
    y_range = [plot_obj.layout.yaxis.range[0], plot_obj.layout.yaxis.range[1]]

    # Update with batch_update so all updates happen simultaneously
    with plot_obj.batch_update():
        img.x = x_range[0]
        img.y = y_range[1]
        img.sizex = x_range[1] - x_range[0]
        img.sizey = y_range[1] - y_range[0]
        img.source = gen_ds_image(x_range, y_range, plot_width, plot_height)


def datashader_image(size=100000):
    df = generate_df(size=size)

    x_range=[df.head(1).dates.values[0].astype('int64'), df.tail(1).dates.values[0].astype('int64')]
    y_range=[df.floats.min(), df.floats.max()]
    plot_height=600
    plot_width=1200

    initial_img = gen_ds_image(x_range, y_range, plot_width, plot_height)

    # init plot
    f = go.FigureWidget(
        data=[{
            'x': [
                datetime.fromtimestamp(df.head(1).dates.values[0].astype('int64')/1000000000), 
                datetime.fromtimestamp(df.tail(1).dates.values[0].astype('int64')/1000000000)
                ], 
            'y': y_range, 
            'mode': 'markers',
            'marker': {'opacity': 0} # invisible trace to init axes and to support autoresize
        }], 
        layout={'width': plot_width, 'height': plot_height}
    )

    # add image to plot
    f.layout.images = [
            go.layout.Image(
                source = initial_img,  # plotly now performs auto conversion of PIL image to png data URI
                xref = "x",
                yref = "y",
                x = x_range[0],
                y = y_range[1],
                sizex = x_range[1] - x_range[0],
                sizey = y_range[1] - y_range[0],
                sizing = "contain",
                layer = "below")
            ]

    f.layout.on_change(update_ds_image, 'xaxis.range', 'yaxis.range', 'width', 'height')
    f.layout.dragmode = 'zoom'

    globals()['plot_obj'] = f
    return f

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions