In [1]:
from functools import reduce
from dask import delayed
from ipywidgets import interact, SelectionSlider
from plotly import offline as plotly
import plotly.graph_objs as go
import pandas as pd
import fastparquet
import distributed
import dask
import json
import gzip
import os

In [2]:
BASE_PATH = '/home/mikeokslonger/data_unseen/trades.parquet/pair={}'
OUTPUT_DIRECTORY = '/home/mikeokslonger/data_unseen'

In [3]:
plotly.init_notebook_mode(connected=True)
cluster = distributed.LocalCluster()
client = distributed.Client(cluster)
cluster

VBox(children=(HTML(value='<h2>LocalCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n    …

In [4]:
def all_pairs():
    dirs = !ls /home/mikeokslonger/data_unseen/trades.parquet/ | grep pair
    pairs = [p.split('=')[1] for p in dirs]
    return pairs

def all_parts_for_pair(pair):
    paths = [os.path.join(BASE_PATH.format(pair), p, 'part.0.parquet') for p in os.listdir(BASE_PATH.format('ADX-USD'))]
    existing_paths = [path for path in paths if os.path.exists(path)]
    return existing_paths

def read_pair(pair, columns=None):
    all_paths = all_parts_for_pair(pair)
    df = pd.concat([pd.read_parquet(p, columns=columns) for p in all_paths]).sort_values('timestamp')
    return df.reset_index().drop('index', axis=1).assign(pair=pair)

def create_returns_series(pair, write=True):
    df = read_pair(pair)
    df['relative_returns'] = (df.price - df.price.shift(1)) / df.price
    df['relative_returns_int16'] = (df.relative_returns * (2**14)).fillna(0).astype('int16')
    df['normalized_relative_returns'] = (df['price'] - df['price'].mean()) / df.price.std()
    if write:
        fastparquet.write(f'{OUTPUT_DIRECTORY}/returns.parquet',
                          df, compression='snappy', file_scheme='hive',
                          partition_on=['pair'], write_index=False)
    else:
        return df

In [5]:
pairs = all_pairs()
@interact(pair=SelectionSlider(options=pairs))
def plot(pair):
    df = read_pair(pair, ['timestamp', 'price']).set_index('timestamp').resample('500S').first().dropna()
    df['returns'] = (df.price - df.price.shift(1)) / df.price
    df['normalized_returns'] = (df['price'] - df['price'].mean()) / df.price.std()
    plotly.iplot([go.Scatter(x=df.index, y=df.price, name='price'),
                  go.Scatter(x=df.index, y=df.returns, name='returns'),
                  go.Scatter(x=df.index, y=df.normalized_returns, name='normalized_returns')])

interactive(children=(SelectionSlider(description='pair', options=('ADX-USD', 'AIR-USD', 'AMM-USD', 'ATB-USD',…

In [7]:
@interact(pair=SelectionSlider(options=pairs))
def show_returns_series(pair):
    return create_returns_series(pair, False)[:20]

interactive(children=(SelectionSlider(description='pair', options=('ADX-USD', 'AIR-USD', 'AMM-USD', 'ATB-USD',…

In [7]:
### Write Returns Series
tasks = [delayed(create_returns_series)(pair) for pair in pairs]
futures = client.compute(tasks)
distributed.progress(futures)

VBox()