<a href="https://colab.research.google.com/github/pathwaycom/pathway-examples/blob/main/documentation/from_jupyter_to_deploy/part1_jupyter_exploration.ipynb" target="_parent"><img src="https://pathway.com/assets/colab-badge.svg" alt="Run In Colab" class="inline"/></a>

# Part 1: Static data exploration in Jupyter

This notebook is part of the first part of the tutorial [From interactive data exploration to deployment](https://pathway.com/developers/user-guide/from-jupyter-to-deploy/#part-1-static-data-exploration-in-jupyter).

## Installing dependencies

Uncomment the cell below to install Pathway into a Python 3.10+ Linux runtime.

> **If you are running in Google Colab, please run the colab notebook (Ctrl+F9)**, disregarding the 'not authored by Google' warning.
> 
> **The installation and loading time is less than 1 minute**.

In [None]:
# %%capture --no-display
# !pip install pathway

In [None]:
# Download CSV file
!wget -nc https://gist.githubusercontent.com/janchorowski/e351af72ecd8d206a34763a428826ab7/raw/ticker.csv

## Loading data

In [None]:
import datetime

import pathway as pw

fname = "ticker.csv"
schema = pw.schema_from_csv(fname)
data = pw.io.csv.read(fname, schema=schema, mode="static")
data

In [None]:
data = data.with_columns(
    t=pw.apply_with_type(
        datetime.datetime.fromtimestamp,
        pw.DateTimeNaive,
        data.t / 1000.0
    )
)
data

In [None]:
import bokeh.plotting

def vwap_history(src):
    fig = bokeh.plotting.figure(
        height=400, width=600,
        title="Volume-weighted average price",
        x_axis_type="datetime"
    )
    fig.line("t", "vwap", source=src)
    return fig

In [None]:
data.plot(vwap_history, sorting_col="t")

## Designing the algorithm

In [None]:
minute_20_stats = (
    data.windowby(
        pw.this.t,
        window=pw.temporal.sliding(
            hop=datetime.timedelta(minutes=1),
            duration=datetime.timedelta(minutes=20)
        ),
        instance=pw.this.ticker
    )
    .reduce(
        ticker=pw.this._pw_instance,
        t=pw.this._pw_window_end,
        volume=pw.reducers.sum(pw.this.volume),
        transact_total=pw.reducers.sum(pw.this.volume * pw.this.vwap),
        transact_total2=pw.reducers.sum(pw.this.volume * pw.this.vwap**2)
    )
    .with_columns(
        vwap=pw.this.transact_total / pw.this.volume
    )
    .with_columns(
        vwstd=(pw.this.transact_total2 / pw.this.volume - pw.this.vwap**2) ** 0.5
    )
    .with_columns(
        bollinger_upper=pw.this.vwap + 2 * pw.this.vwstd,
        bollinger_lower=pw.this.vwap - 2 * pw.this.vwstd
    )
)
minute_20_stats

In [None]:
minute_1_stats = (
    data
    .windowby(
        pw.this.t,
        window=pw.temporal.tumbling(datetime.timedelta(minutes=1)),
        instance=pw.this.ticker
    )
    .reduce(
        ticker=pw.this._pw_instance,
        t=pw.this._pw_window_end,
        volume=pw.reducers.sum(pw.this.volume),
        transact_total=pw.reducers.sum(pw.this.volume * pw.this.vwap)
    )
    .with_columns(
        vwap=pw.this.transact_total / pw.this.volume
    )
)
minute_1_stats

In [None]:
joint_stats = (
    minute_1_stats
    .join(
        minute_20_stats,
        pw.left.t == pw.right.t,
        pw.left.ticker == pw.right.ticker
    )
    .select(
        *pw.left,
        bollinger_lower=pw.right.bollinger_lower,
        bollinger_upper=pw.right.bollinger_upper
    )
    .with_columns(
        is_alert=(
            (pw.this.volume > 10000)
            & (
                (pw.this.vwap > pw.this.bollinger_upper)
                | (pw.this.vwap < pw.this.bollinger_lower)
            )
        )
    )
    .with_columns(
        action=pw.if_else(
            pw.this.is_alert,
            pw.if_else(pw.this.vwap > pw.this.bollinger_upper, "sell", "buy"),
            "hodl",
        )
    )
)
joint_stats

In [None]:
alerts = (
    joint_stats
    .filter(pw.this.is_alert)
    .select(pw.this.ticker, pw.this.t, pw.this.vwap, pw.this.action)
)
alerts

## Plotting Bollinger Bands

In [None]:
import bokeh.models

def stats_plotter(src):
    actions=["buy", "sell", "hodl"]
    color_map = bokeh.models.CategoricalColorMapper(
        factors=actions,
        palette=("#00ff00", "#ff0000", "#00000000")
    )
    
    fig = bokeh.plotting.figure(
        height=400, width=600,
        title="20 minutes Bollinger bands with last 1 minute average",
        x_axis_type="datetime"
    )
    
    fig.line("t", "vwap", source=src)
    
    band = bokeh.models.Band(
        base="t", lower="bollinger_lower", upper="bollinger_upper",
        fill_alpha=0.3, fill_color="gray", line_color="black",
        source=src
    )
    fig.add_layout(band)

    fig.scatter(
        "t", "vwap",
        size=10, marker="circle",
        color={"field": "action", "transform": color_map},
        source=src
    )
    
    return fig

joint_stats.plot(stats_plotter, sorting_col="t")