# Reductions

Reductions allow us to compute summary statistics and other useful aggregations over our data.

In this set of exercises, we'll analyze a timeseries of simulated price and volume ticks for a stock.

In [2]:
import numpy as np
import pandas as pd


def us_eastern(cs):
    return pd.Timestamp(cs, tz='US/Eastern').to_datetime64()


open_time = us_eastern('2018-05-04 9:30')
close_time = us_eastern('2018-05-04 16:00')


def generate_fake_ticks(seed=42):
    rand = np.random.RandomState(seed)

    time_between_ticks_ns = rand.poisson(
        int(1e9),
        (close_time - open_time).view('int64') // int(1e9),
    )
    time_between_ticks_ns[0] += open_time.astype('int64')  # starting time
    timestamps = time_between_ticks_ns.cumsum().astype('datetime64[ns]')

    # we might get a sample or two past the end
    timestamps = timestamps[timestamps < close_time]

    dollar_returns = (rand.standard_t(3.1, len(timestamps)) / 1000).round(3)
    dollar_returns[0] += 100  # starting price
    prices = dollar_returns.cumsum()

    volumes = rand.exponential(100, len(timestamps)).round().astype('int64')

    return timestamps, prices, volumes


timestamps, prices, volumes = generate_fake_ticks()

# Compute the mean of the volumes.

In [3]:
volumes.shape

(23400,)

In [4]:
np.mean(volumes)

101.79940170940171

# Compute the average time between ticks.

In [None]:
np.diff?

# Compute the number of ticks where the price increased and the number of ticks where the price decreased.

# Compute the 25th, 50th, and 75th percentiles of price difference.

# Compute the volume-weighted average price.

# Compute the timestamps where the lowest and highest prices occur.

In [None]:
np.argmin?

# Compute the start timestamp and stop timestamp of the maximum drawdown.

# Compute the total volume for each minute.

In [None]:
minute_labels = open_time + np.arange(1, 390, dtype='timedelta64[m]')