In [119]:
from math import sqrt, pi, e
import json
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import pandas as pd
from random import randint

In [120]:
def print_json(dictionary):
    print(json.dumps(dictionary, sort_keys=True, indent=4))

In [128]:
f = open('slippages.json', 'r')
# f = open('slippages_5m.json', 'r')
# f = open('slippages_10m.json', 'r')
# f = open('slippages_20m.json', 'r')
# f = open('slippages_8h.json', 'r')

slippages_data = json.loads(f.read())
sample_size = slippages_data['sample_size']
bids_slippages = slippages_data['bids']
asks_slippages = slippages_data['asks']

f.close()

def calculate_mean(l):
  return sum(l)/len(l)

In [129]:
def plot_slippages_over_time(time_start, time_sample):

  time_end = time_start+time_sample

  time = [t for t in range(time_start, time_end)]+[t for t in range(time_start, time_end)]
  slippages = bids_slippages[time_start:time_end] + asks_slippages[time_start:time_end]
  sides = ['Bid' for bid in bids_slippages[time_start:time_end]] + ['Ask' for ask in asks_slippages[time_start:time_end]]

  d = { 'time': time, 'slippage': slippages, 'side': sides }

  df = pd.DataFrame(data=d)

  fig = px.line(df, x='time', y='slippage', color='side', title='Slippages over time', labels = 
    {
      'time':'Time (seconds)', 'slippage':'Slippage', 'side': 'Side'
    })

  fig.show()

plot_slippages_over_time(0, sample_size)

In [123]:
def plot_slippages_frequency(time_start, time_sample):

    time_end = time_start+time_sample

    bids_slippages = slippages_data['bids'][time_start:time_end]
    asks_slippages = slippages_data['asks'][time_start:time_end]

    slippages = bids_slippages + asks_slippages
    frequencies = [1 for bid in bids_slippages] + [1 for ask in asks_slippages]
    sides = ['Bid' for bid in bids_slippages] + ['Ask' for ask in asks_slippages]

    d = { 'slippage': slippages, 'frequency': frequencies, 'side': sides }

    df = pd.DataFrame(data=d)

    fig = px.histogram(df, x='slippage', y='frequency', nbins=200, labels={
        'frequency': 'Frequency', 'slippage': 'Slippage'
    }, title='Slippages\' frequency')

    fig.update_layout(yaxis_title="Frequency") 

    fig.show()

plot_slippages_frequency(0, sample_size)

In [124]:
def calculate_statistics(slippages):
  # z necessary for 99% confidence
  # z = 2.58

  # z necessary for 99.99% confidence
  z = 3.89


  mean = 0
  variance = 0
  standard_deviation = 0

  mean = sum(slippages)/len(slippages)

  for slippage in slippages:
    variance += (slippage - mean)**2

  variance /= len(slippages)

  standard_deviation = sqrt(variance)

  sample_size = len(slippages)

  confidence_interval = [mean - z*(standard_deviation), mean + z*(standard_deviation)]
  # confidence_interval = [mean - z*(standard_deviation)/sqrt(sample_size), mean + z*(standard_deviation)/sqrt(sample_size)]

  data = {
    'mean': mean,
    'variance': variance,
    'standard_deviation': standard_deviation,
    'sample_size': sample_size,
    'confidence_interval': confidence_interval
  }

  return data

bids_statistics = calculate_statistics(bids_slippages)
asks_statistics = calculate_statistics(asks_slippages)

print_json(bids_statistics)
print()
print_json(asks_statistics)

{
    "confidence_interval": [
        -4.224110384117059,
        4.228301331760053
    ],
    "mean": 0.0020954738214972756,
    "sample_size": 28795,
    "standard_deviation": 1.08642824111531,
    "variance": 1.1803263230929057
}

{
    "confidence_interval": [
        -4.260480624974034,
        4.264821536753856
    ],
    "mean": 0.002170455889911493,
    "sample_size": 28795,
    "standard_deviation": 1.0957971930241504,
    "variance": 1.2007714882396072
}


In [125]:
# Calculate average prediction using all data possible

bids_predictions = 0
asks_predictions = 0

bids_statistics = calculate_statistics(bids_slippages)
asks_statistics = calculate_statistics(asks_slippages)

bids_confidence_interval = bids_statistics['confidence_interval']
asks_confidence_interval = asks_statistics['confidence_interval']

for time in range(sample_size):
    bids_sample = bids_slippages[time]
    asks_sample = asks_slippages[time]

    if bids_confidence_interval[0] <= bids_sample: bids_predictions += 1
    if asks_confidence_interval[0] <= asks_sample: asks_predictions += 1

print(bids_predictions/sample_size, asks_predictions/sample_size)

0.9924292411877061 0.9915957631533252


In [126]:
# Calculate a prediction for time_end slippage using data on the time window [time_start, time_end]

def calculate_prediction(time_start, time_end): 
    bid_prediction = 0
    ask_prediction = 0

    bids_statistics = calculate_statistics(bids_slippages[time_start:time_end])
    asks_statistics = calculate_statistics(asks_slippages[time_start:time_end])

    bids_confidence_interval = bids_statistics['confidence_interval']
    asks_confidence_interval = asks_statistics['confidence_interval']

    bids_sample = bids_slippages[time_end]
    asks_sample = asks_slippages[time_end]

    if bids_confidence_interval[0] <= bids_sample: bid_prediction += 1
    if asks_confidence_interval[0] <= asks_sample: ask_prediction += 1

    return (bid_prediction, ask_prediction)

print(calculate_prediction(0, sample_size-1))

(1, 1)


In [127]:
# Calculate many predictions for a given time sample

def calculate_predictions(attempts, time_sample):
    bids_predictions = 0
    asks_predictions = 0

    for _ in range(attempts):
        time_start = randint(0, sample_size-time_sample-1)
        time_end = time_start + time_sample

        (bid_prediction, ask_prediction) = calculate_prediction(time_start, time_end)

        bids_predictions += bid_prediction
        asks_predictions += ask_prediction
    
    return (bids_predictions/attempts, asks_predictions/attempts)

def plot_predictions_by_time_sample(attempts, time_samples):
    bids_predictions = []
    asks_predictions = []

    for time_sample in time_samples:
        (bid_prediction, ask_prediction) = calculate_predictions(attempts, time_sample)
        bids_predictions.append(bid_prediction)
        asks_predictions.append(ask_prediction)
    
    time = time_samples + time_samples
    predictions = bids_predictions + asks_predictions
    sides = ['Bid' for bid in bids_predictions] + ['Ask' for ask in asks_predictions]

    d = { 'time': time, 'prediction': predictions, 'side': sides }

    df = pd.DataFrame(data=d)

    fig = px.line(df, x='time', y='prediction', color='side', title='Predictions by Time Sample', labels = 
        {
        'time':'Time (seconds)', 'prediction':'Prediction', 'side': 'Side'
        })

    fig.show()

attempts = 1000
time_samples = [t for t in range(1, 120)]

plot_predictions_by_time_sample(attempts, time_samples)