In [1]:
from jp_doodle import dual_canvas
from IPython.display import display
import numpy as np

In [64]:
def floatarray(seq):
    return np.array(seq, dtype=np.float)

class BoxPlot:
    
    def __init__(
        self,
        minimum,
        maximum,
        quartiles,
        xy_position=(0,0),
        dxdy=(0,1),
        dots=None,
        color="black",
        thin_line=1,
        thick_line=5,
        thicker_line=15,
        dmedian=0.01,
        ddots=0.05,
        ):
        quartiles = self.quartiles = floatarray(quartiles)
        self.minimum = float(minimum)
        self.maximum = float(maximum)
        (self.q25, self.q50, self.q75) = quartiles
        self.xy = floatarray(xy_position)
        self.dxdy = floatarray(dxdy)
        self.color = color
        self.thin = thin_line
        self.thick = thick_line
        self.thicker = thicker_line
        self.dmedian = dmedian
        self.dots = dots
        self.ddots = ddots
        
    def draw(self, on_frame, at_xy=None):
        if at_xy is None:
            at_xy = floatarray(self.xy)
        dxdy = self.dxdy
        pmin = at_xy + dxdy * self.minimum
        p25 = at_xy + dxdy * self.q25
        p50 = at_xy + dxdy * self.q50
        median_offset = (self.maximum - self.minimum) * self.dmedian
        pmedian_low = p50 - dxdy * median_offset
        pmedian_high = p50 + dxdy * median_offset
        p75 = at_xy + dxdy * self.q75
        pmax = at_xy + dxdy * self.maximum
        lines = [
            (self.thin, pmin, p25),
            (self.thick, p25, p75),
            (self.thicker, pmedian_low, pmedian_high),
            (self.thin, p75, pmax),
        ]
        for (lineWidth, (x0,y0), (x1,y1)) in lines:
            #print ("line", x0, y0, x1, y1)
            on_frame.line(x0, y0, x1, y1, color=self.color, lineWidth=lineWidth)
        dots = self.dots
        if dots is not None:
            (dx, dy) = dxdy
            dydx = floatarray([-dy, dx]) # perpendicular offset
            dot_origin = at_xy + ((self.maximum - self.minimum) * self.ddots) * dydx
            #print("origin", at_xy, "dot origin", dot_origin, (at_xy, pmax-pmin, self.ddots))
            for dot in dots:
                dotxy = dot_origin + dot * dxdy
                #print("dot at", dotxy)
                on_frame.circle(dotxy[0], dotxy[1], r=self.thick * 0.5, color=self.color, fill=False)

In [65]:
B = BoxPlot(10, 90, (30, 40, 70), dots=[10,22,44,55,77,90])
B2 = BoxPlot(20, 80, (30, 65, 70), color="green", dxdy=(1,0))
B3 = BoxPlot(14, 95, (20, 45, 70), color="blue", dxdy=(1,1))

In [66]:
swatch = dual_canvas.swatch(pixels=300, model_height=120)

DualCanvasWidget(status='deferring flush until render')

In [67]:
B.draw(swatch)
B2.draw(swatch)
B3.draw(swatch)
swatch.lower_left_axes(x_anchor=-10, y_anchor=-10, color="pink", min_y=-20, min_x=-20, max_x=100, max_y=100)

swatch.fit(margin=10)

In [68]:
def random_box_plot(color):
    marks = sorted(np.random.random(5) * 100)
    dots = np.random.random(8) * 100
    return BoxPlot(marks[0], marks[-1], marks[1:-1], color=color, dots=dots)
    

In [69]:
swatch = dual_canvas.swatch(pixels=300, model_height=120)

swatch.lower_left_axes(x_anchor=-10, y_anchor=-10, color="pink", min_y=-20, min_x=-20, max_x=100, max_y=100)

for i in range(10):
    x = i*10
    b = random_box_plot("green")
    b.draw(swatch, at_xy=(x, 0))
    
swatch.fit(margin=10)


DualCanvasWidget(status='deferring flush until render')

In [70]:
dx0 = floatarray([20, 0])
x0Labels = "man woman teen infant".split()
colors = "red green blue cyan magenta".split()

dx1 = dx0 * (len(x0Labels) + 2)
x1Labels = "NY NJ CA TX".split()

dy = floatarray([0, 120])
yLabels = "meat vegetables fruit".split()

In [71]:
swatch = dual_canvas.swatch(pixels=800, model_height=1200)

guide_origin = dy * (len(yLabels))

swatch.text(guide_origin[0], guide_origin[1], "AGE", color="white", background="black")
swatch.text(-40, 0, "FOOD", degrees=90, color="white", background="black")
swatch.text(0, -40, "STATE", color="white", background="black")

axis_x = dx1 * len(x1Labels)
swatch.right_axis(min_value=0, max_value=100, axis_origin=dict(x=axis_x[0], y=axis_x[1]))
qlabel_x = axis_x + 4*dx0

swatch.text(qlabel_x[0], qlabel_x[1], "AMOUNT", color="white", background="black", degrees=90)

for (ix0, x0Label) in enumerate(x0Labels):
    x0 = dx0 * ix0
    p = guide_origin + x0
    color = colors[ix0]
    swatch.text(p[0], p[1]+20, x0Label, degrees=90, color=color)

for (iy, yLabel) in enumerate(yLabels):
    y1 = iy * dy
    swatch.text(-20, y1[1], yLabel, degrees=90)
    for (ix1, x1Label) in enumerate(x1Labels):
        x1 = ix1 * dx1
        swatch.text(x1[0], -20, x1Label)
        for (ix0, x0Label) in enumerate(x0Labels):
            color = colors[ix0]
            x0 = dx0 * ix0
            p = y1 + x1 + x0
            b = random_box_plot(color)
            b.draw(swatch, at_xy=p)
            
swatch.fit(margin=20)

DualCanvasWidget(status='deferring flush until render')

## goal: make a plot function that generates something like the above:

```
box_plot_3(data, cat1="age", cat2="food", cat3="state", quant="amount")
```

# example dataset

In [None]:
%ls student_performance/

In [None]:
fn = "student_performance/student-por.csv"
from jp_doodle import data_tables
data_tables.widen_notebook()

In [None]:
import csv
f = open(fn)
reader = csv.reader(f, delimiter=";")
headers = next(reader)
body = list(reader)
data_tables.Table1(headers, body)

In [None]:
print(open("student_performance/student.txt").read())

In [None]:
values = {h: set() for h in headers}
for row in body:
    for (h, v) in zip(headers, row):
        values[h].add(v)

In [None]:
values