In [154]:
import pandas as pd
import numpy as np
import glob

np.random.seed(491)

def get_df_glob(glob_pattern):
    """Concatenate and preprocess data from CSVs matching the given glob pattern."""
    dfs = []
    for filepath in glob.glob(glob_pattern):
        #filename = "data/avalik_1.csv"
        dfs.append(pd.read_csv(filepath, sep="\t"))
    df = pd.concat(dfs, axis=0)
    print(df.shape)

    # Remove missing
    df = df[pd.notnull(df["Lest_X"]) & pd.notnull(df["Lest_Y"])]

    # Extract coordinate buckets as integers
    re_coordbucket = r"(\d+)\-(\d+)"
    random_std = 0.5
    extracted_x = df["Lest_X"].str.extract(re_coordbucket, expand=False)
    df["Lest_X_min"] = extracted_x.iloc[:, 0].astype(int)
    df["Lest_X_max"] = extracted_x.iloc[:, 1].astype(int)
    #df["Lest_X_rand"] = df["Lest_X_min"] + np.random.rand(df.shape[0]) * (df["Lest_X_max"] - df["Lest_X_min"])
    df["Lest_X_rand"] = df["Lest_X_min"] + (0.5 + np.random.normal(size=(df.shape[0],), scale=random_std)) * (df["Lest_X_max"] - df["Lest_X_min"])

    extracted_y = df["Lest_Y"].str.extract(re_coordbucket, expand=False)
    df["Lest_Y_min"] = extracted_y.iloc[:, 0].astype(int)
    df["Lest_Y_max"] = extracted_y.iloc[:, 1].astype(int)
    #df["Lest_Y_rand"] = df["Lest_Y_min"] + np.random.rand(df.shape[0]) * (df["Lest_Y_max"] - df["Lest_Y_min"])
    df["Lest_Y_rand"] = df["Lest_Y_min"] + (0.5 + np.random.normal(size=(df.shape[0],), scale=random_std)) * (df["Lest_Y_max"] - df["Lest_Y_min"])
    
    return df

In [155]:
from bokeh.plotting import figure, output_notebook
from datashader.bokeh_ext import InteractiveImage
from datashader.colors import colormap_select
from datashader.utils import export_image
from colorcet import fire
import datashader as ds
import datashader.transfer_functions as tf
from functools import partial



def plot_heatmap(df, jitter=False, height=2000, background="black", filename="heatmap.png"):

    cm = partial(colormap_select, reverse=(background!="black"))

    ratio = (np.max(df["Lest_Y_max"]) - np.min(df["Lest_Y_min"])) / (np.max(df["Lest_X_max"]) - np.min(df["Lest_X_min"]))
    plot_width = int(ratio * height)
    cvs =  ds.Canvas(plot_width=plot_width,
                     plot_height=height,
                     #x_range=x_range,
                     #y_range=y_range
                    )
    
    options = dict()
    
    if jitter:
        agg = cvs.points(df, 'Lest_Y_rand', 'Lest_X_rand')
    else:
        agg = cvs.points(df, 'Lest_Y_min', 'Lest_X_min')
        
    img = tf.shade(agg, cmap=cm(fire, 0.5), how='eq_hist', **options)
    #img = tf.spread(img, px=5, how="saturate")
    img = tf.set_background(img, "black")

    img_pil = img.to_pil()
    img_pil.save(filename)

In [156]:
df_avalik = get_df_glob("data/avalik_*.csv")
df_vara = get_df_glob("data/vara*.csv")
df_liiklus = get_df_glob("data/liiklusjarelevalve*.csv")

(71799, 18)
(161327, 27)
(637530, 26)


In [157]:
df_all = pd.concat((df_avalik, df_vara, df_liiklus))
df_all["cat"] = pd.Series(["avalik"] * df_avalik.shape[0] + ["vara"] * df_vara.shape[0] + ["liiklus"] * df_liiklus.shape[0],
                         dtype="category")

jitter = False
height = 1200
plot_heatmap(df_avalik, jitter=jitter, filename="heat_avalik.png", height=height)
plot_heatmap(df_vara, jitter=jitter, filename="heat_vara.png", height=height)
plot_heatmap(df_liiklus, jitter=jitter, filename="heat_liiklus.png", height=height)
plot_heatmap(df_all, jitter=jitter, filename="heat_all.png", height=height)