<a href="https://colab.research.google.com/github/tguinot/gestion-quant/blob/master/Lake_Example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Welcome to Lake

Lake is a service providing historical cryptocurrency market data in high detail, including L2 depth snapshots. It is tuned for convenient quant and machine-learning purposes and so offers high performance, caching and parallelization.

- Website: https://crypto-lake.com/
- API home: https://github.com/crypto-lake/lake-api
- Documentation: https://lake-api.readthedocs.io/

To experiment with the notebook, click _File -> Open in playground mode_ and then _Runtime -> Run all_.

<script async src="https://www.googletagmanager.com/gtag/js?id={{ site.gtag }}"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());
  gtag('config', '{{ site.gtag }}');
  gtag('config', 'UA-246341294-1');
</script>

In [None]:
!pip install -q lakeapi==0.8.0 cufflinks awscli

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.3/4.3 MB[0m [31m44.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m267.1/267.1 kB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.8/135.8 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m41.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m548.2/548.2 kB[0m [31m42.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.1/143.1 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

## Example: trades

In [None]:
import datetime

import lakeapi
import plotly
import cufflinks

cufflinks.go_offline()
plotly.io.renderers.default = "colab"

In [None]:
lakeapi.use_sample_data(anonymous_access = True)

trades = lakeapi.load_data(
    table="trades",
    start=datetime.datetime(2022, 10, 1),
    end=datetime.datetime(2022, 10, 4),
    symbols=["BTC-USDT"],
    exchanges=['BINANCE'],
)
trades.set_index('received_time', inplace = True)
trades

  0%|          | 0/3 [00:00<?, ?it/s]

Unnamed: 0_level_0,side,quantity,price,trade_id,origin_time,exchange,symbol
received_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-10-01 00:00:00.009630208,buy,0.00206,19423.93,1640546242,2022-09-30 23:59:59.989000192,BINANCE,BTC-USDT
2022-10-01 00:00:00.009739520,buy,0.00163,19423.93,1640546243,2022-09-30 23:59:59.992999936,BINANCE,BTC-USDT
2022-10-01 00:00:00.009781760,buy,0.01677,19423.92,1640546244,2022-09-30 23:59:59.994999808,BINANCE,BTC-USDT
2022-10-01 00:00:00.009803008,buy,0.00654,19423.93,1640546245,2022-09-30 23:59:59.994999808,BINANCE,BTC-USDT
2022-10-01 00:00:00.009821696,buy,0.00294,19423.93,1640546246,2022-09-30 23:59:59.994999808,BINANCE,BTC-USDT
...,...,...,...,...,...,...,...
2022-10-03 23:59:59.993817600,buy,0.01568,19628.56,1649526367,2022-10-03 23:59:59.989000192,BINANCE,BTC-USDT
2022-10-03 23:59:59.993905152,buy,0.00754,19628.82,1649526368,2022-10-03 23:59:59.989000192,BINANCE,BTC-USDT
2022-10-03 23:59:59.994291968,buy,0.02031,19628.90,1649526369,2022-10-03 23:59:59.989000192,BINANCE,BTC-USDT
2022-10-03 23:59:59.994332160,buy,0.00075,19628.90,1649526370,2022-10-03 23:59:59.990000128,BINANCE,BTC-USDT


In [None]:
trades['price'].resample('15Min').ohlc().iplot(kind = 'candle', title = '15m candles from trades')

## Example: depth snapshots

In [None]:
books = lakeapi.load_data(
    table="book",
    start=datetime.datetime(2022, 10, 1),
    end=datetime.datetime(2022, 10, 2),
    symbols=["BTC-USDT"],
    # columns=['receipt_time', 'bid_0_price', 'ask_0_price'],
    exchanges=None,
)
books.set_index('received_time', inplace = True)

In [None]:
books[['bid_0_price', 'ask_0_price']][:2000].iplot(title = 'BTC-USDT bid-ask spread')

## Internals / performance

In [None]:
books.shape

(863465, 84)

In [None]:
books.dtypes

origin_time        datetime64[ns]
sequence_number             Int64
bid_0_price               float64
bid_0_size                float64
bid_1_price               float64
                        ...      
ask_18_size               float64
ask_19_price              float64
ask_19_size               float64
exchange                 category
symbol                   category
Length: 84, dtype: object

In [None]:
# RAM usage in MBs
books.memory_usage().sum() / 1e6

575.931387

Storage backend is on S3, sample data contain:

In [None]:
!aws s3 ls --recursive --no-sign-request 's3://sample.crypto.lake/'

2022-12-06 19:06:22     902713 book/exchange=ASCENDEX/symbol=FTRB-USDT/dt=2022-11-01/1.snappy.parquet
2022-12-06 19:06:25    2093568 book/exchange=ASCENDEX/symbol=FTRB-USDT/dt=2022-11-02/1.snappy.parquet
2022-12-06 19:06:29    1899431 book/exchange=ASCENDEX/symbol=FTRB-USDT/dt=2022-11-03/1.snappy.parquet
2022-12-06 19:06:32    1917420 book/exchange=ASCENDEX/symbol=FTRB-USDT/dt=2022-11-04/1.snappy.parquet
2022-12-06 19:06:36    2018916 book/exchange=ASCENDEX/symbol=FTRB-USDT/dt=2022-11-05/1.snappy.parquet
2022-12-06 19:06:40    2152204 book/exchange=ASCENDEX/symbol=FTRB-USDT/dt=2022-11-06/1.snappy.parquet
2022-12-06 19:06:43    2445310 book/exchange=ASCENDEX/symbol=FTRB-USDT/dt=2022-11-07/1.snappy.parquet
2022-12-06 19:06:47    2074912 book/exchange=ASCENDEX/symbol=FTRB-USDT/dt=2022-11-08/1.snappy.parquet
2022-12-06 19:06:51    1794744 book/exchange=ASCENDEX/symbol=FTRB-USDT/dt=2022-11-09/1.snappy.parquet
2022-12-06 19:06:54    2567519 book/exchange=ASCENDEX/symbol=FTRB-USDT/dt=2022-11-