# coinbase API Getting step one in pipeline, order book digest

the steps for acquring order book data are demonstrated in this notebook. These are API calls from coinbase's websocket API, with endpoint at [wss://advanced-trade-ws.coinbase.com](wss://advanced-trade-ws.coinbase.com), and detailed in documentation [here](https://docs.cloud.coinbase.com/advanced-trade-api/docs/ws-overview)

In [2]:
import csv
import cbpro
import numpy as np
import pandas as pd
from scipy.stats import skew
public_client = cbpro.PublicClient()
book = public_client.get_product_order_book('AVAX-USD', level=3)

In [166]:
RANGE = 0.025

In [176]:
da = book['asks']
df_asks = pd.DataFrame(da, columns=['price','qty','id'])
# df_asks.head(2)

In [177]:
db = book['bids']
df_bids = pd.DataFrame(db, columns=['price','qty','id'])

In [191]:
df_asks.columns

Index(['price', 'qty', 'id'], dtype='object')

In [202]:
def sum_qty_asks(df):
    df['price'] = df['price'].astype(float)
    df['qty'] = df['qty'].astype(float)
    top_price = float(df['price'].min())
    threshold = top_price * RANGE
    return df[df['price'] >= top_price - threshold]['qty'].sum()

In [203]:
def sum_qty_bids(df):
    df['price'] = df['price'].astype(float)
    df['qty'] = df['qty'].astype(float)
    top_price = float(df['price'].max())
    threshold = top_price * RANGE
    return df[df['price'] >= top_price - threshold]['qty'].sum()

In [204]:
tav = sum_qty_asks(df_asks)

In [205]:
tbv = sum_qty_bids(df_bids) 

In [206]:
def sum_bids_cap(df):
    df['price'] = df['price'].astype(float)
    df['qty'] = df['qty'].astype(float)
    top_price = float(df['price'].max())
    threshold = top_price * RANGE
    df['sum']= df['price'] * df['qty']
    caps = df[df['price'] >= top_price - threshold]['sum'].sum()
    return caps

In [207]:
def sum_asks_cap(df):
    df['price'] = df['price'].astype(float)
    df['qty'] = df['qty'].astype(float)
    top_price = float(df['price'].min())
    threshold = top_price * RANGE
    df['sum']= df['price'] * df['qty']
    caps = df[df['price'] >= top_price - threshold]['sum'].sum()
    return caps

In [208]:
bc = sum_bids_cap(df_bids)
bc

500717.5435899999

In [210]:
ac = sum_asks_cap(df_asks)
ac

12882208.18142

In [215]:
mp = df_asks.iloc[0]
# dfbids.iloc[0]

## Summarize the LOB sample statistics
utilize a dictionary, pushed to .csv. 

Typically, the update takes place on a live .csv, continuously updated on a write basis, over  a 24 hour period, every 5 seconds or so.

In [217]:
summary_dictionary = {"mp":mp,
                      "tav":tav,
                      "tbv":tbv,
                      "bc":bc,
                      "ac":ac}
summary_dictionary

{'mp': price                                   10.81
 qty                                      10.8
 id       6a6aa3bb-f264-4a46-aff0-89f1e0cbe157
 sum                                   116.748
 Name: 0, dtype: object,
 'tav': 259206.037,
 'tbv': 46662.55700000001,
 'bc': 500717.5435899999,
 'ac': 12882208.18142}

## Complete assembly

In [1]:
# #part two:
# skewness = skew(bidDist)
# print("skew segment:")
# if skewness > 1:
#     print(f"The distribution {skewness} is highly skewed right.")
# elif skewness < -1:
#     print(f"The distribution {skewness} is highly skewed left.")
# elif skewness > -1 and skewness < -0.5:
#     print(f"The distribution {skewness} is moderately skewed left.")
# elif skewness > -0.5 and skewness < -0.2:
#     print(f"The distribution {skewness} is slightly skewed left.")
# elif skewness > -0.2 and skewness < 0.2:
#     print(f"The distribution {skewness} is approximately symmetric.")
# elif skewness > 0.2 and skewness < 0.5:
#     print(f"The distribution {skewness} is slightly skewed right.")
# elif skewness > 0.5 and skewness <1:
#     print(f"The distribution {skewness} is moderately skewed right.")
# else:
#     print("Invalid data")

In [218]:
import csv
import time

def generateSummaryonLOB():
    tav = sum_qty_asks(df_asks)
    tbv = sum_qty_bids(df_bids) 
    bc = sum_bids_cap(df_bids)
    ac = sum_asks_cap(df_asks)
    mp = df_asks.iloc[0]
    summary_dictionary = {"mp":mp,
                      "tav":tav,
                      "tbv":tbv,
                      "bc":bc,
                      "ac":ac}

    filename  = date.now()
    
    with open(filename, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=['name', 'age'])
        writer.writeheader()
    
        while True:
            # data = {'name': 'John', 'age': 25}
            writer.writerow(summary_dictionary)
            file.flush()
while True:
    generateSummaryonLOB()
    time.sleep(10)

## Sample csv content

- mp: the price at the time the LOB was sampled
- tav: total ask volume, sum of volume of orders of type ask, or sell (volume of sell orders)
- tbv: total bid volume, sum of volume of orders of type bid, or buy (volume of buy orders)
- bc: bid capitaliazation. The dollar value of all bid orders, quantity multipled by price and summed by row
- ac: ask capitalization. The dollar value of all ask orders, quantity multipled by price and summed, by row
- time: unix timestamp, time the sample was taken


In [3]:
formation = pd.read_csv('2023-08-20.csv')
print(formation.columns)
formation.head(2)

Index(['mp', 'tav', 'tbv', 'bc', 'ac', 'time'], dtype='object')


Unnamed: 0,mp,tav,tbv,bc,ac,time
0,10.88,259554.671,25226.788,273778.637,12762330.0,1692541818
1,10.88,259558.34,24855.335,269749.689,12762370.0,1692541821


In [None]:
#  https://github.com/stefanbund/grus-code.git  

#  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

In [20]:
# ## successful csv writes, during a for loop?
# import csv
# import time  

# with open('file.csv', mode='w', newline='') as csv_file:
#     writer = csv.writer(csv_file)
#     writer.writerow(['time', 'string', 'month'])

#     for i in range(3):
#         # name = input('Enter name: ')
#         # department = input('Enter department: ')
#         # month = input('Enter month: ')
#         time = int(time.time())
#         string = 'this'
#         month = 'january'
#         writer.writerow([time, string, month])
