# Imports

In [2]:
import arcticdb as adb
from pathlib import Path
import polars as pl
import pandas as pd, numpy as np
from dotenv import find_dotenv, dotenv_values
from datetime import datetime
from tqdm.notebook import tqdm

from crypto_trading.arctic import ArcticHandler

In [3]:
%load_ext autoreload
%autoreload 2

# Config / Params

## Credentials

In [4]:
config = dotenv_values(find_dotenv())
user = config.get('USERNAME_PG')
password = config.get('PASSWORD_PG')

## Database Connection

In [5]:
uri = f"postgresql://{user}:{password}@localhost:5432/crypto_data"

# Initiate Data Store

## Create db

In [6]:
db_name = 'crypto_trading'
arctic_handler = ArcticHandler(db_name=db_name)

In [21]:
arctic_handler.arctic_db.list_libraries()

['ohlc_1m']

In [7]:
lib_name = 'ohlc_1m'
arctic_handler.set_lib(lib_name=lib_name)
lib_ohlc_1m = arctic_handler.get_lib(lib_name)

# Ingest Data

## Get Existing Symbol Info

In [14]:
query_symbol_info_mv = """
    SELECT
        symbol,
        last_datetime
    FROM perps_last_datetime
    WHERE 
        exchange = 'binance'
        AND symbol NOT IN ('ETHUSDT', 'BTCUSDT', 'SOLUSDT', 'BNBUSDT')
"""
df_symbol_info = pl.read_database_uri(query=query_symbol_info_mv, uri=uri)

In [13]:
list(df_symbol_info.filter(~df_symbol_info["symbol"].isin(['ETHUSDT', 'BTCUSDT', 'SOLUSDT', 'BNBUSDT']))['symbol'])

AttributeError: 'Series' object has no attribute 'isin'

## Pull in some data from postgres

In [19]:
# symbols = ['ETHUSDT', 'BNBUSDT']
symbols = list(df_symbol_info['symbol'])

In [14]:
symbols = [raw_symbol.split('_')[1] for raw_symbol in lib_ohlc_1m.list_symbols()]

{item.symbol.split('_')[1]:item.metadata.get('last_datetime') for item in lib_ohlc_1m.read_metadata_batch(symbols=lib_ohlc_1m.list_symbols())}

{'USDCUSDT': Timestamp('2024-06-12 20:34:00'),
 'WAVESUSDT': Timestamp('2024-06-12 20:34:00'),
 '1000SATSUSDT': Timestamp('2024-06-12 20:31:00'),
 'STORJUSDT': Timestamp('2024-06-12 20:33:00'),
 'NEOUSDC': Timestamp('2024-06-12 20:33:00'),
 'ARKUSDT': Timestamp('2024-06-12 20:31:00'),
 'LUNABUSD': Timestamp('2022-05-13 01:35:00'),
 'NTRNUSDT': Timestamp('2024-06-12 20:33:00'),
 'WIFUSDT': Timestamp('2024-06-12 20:34:00'),
 'DGBUSDT': Timestamp('2024-06-11 23:36:00'),
 'DOTECOUSDT': Timestamp('2021-02-19 09:37:00'),
 'ROSEUSDT': Timestamp('2024-06-12 20:33:00'),
 'MOVRUSDT': Timestamp('2024-06-12 20:33:00'),
 'TIAUSDT': Timestamp('2024-06-12 20:34:00'),
 'ICPBUSD': Timestamp('2023-03-22 09:28:00'),
 'ALTUSDT': Timestamp('2024-06-12 20:31:00'),
 'MINAUSDT': Timestamp('2024-06-12 20:33:00'),
 'DODOBUSD': Timestamp('2023-12-19 09:37:00'),
 '1000XECUSDT': Timestamp('2024-06-12 20:31:00'),
 'FTMUSDT': Timestamp('2024-06-12 20:32:00'),
 'IOTAUSDT': Timestamp('2024-06-12 20:33:00'),
 'YFIIUSDT

In [19]:
lib_ohlc_1m.tail('binance_BTCUSDT').data

Unnamed: 0_level_0,exchange,symbol,open,high,low,close,base_volume,quote_volume,base_buy_volume,quote_buy_volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-06-12 20:28:00,binance,BTCUSDT,67907.6,67932.0,67885.6,67885.6,259.329,17612030.0,135.397,9195219.0
2024-06-12 20:29:00,binance,BTCUSDT,67885.6,67949.6,67882.5,67941.1,167.231,11359120.0,88.757,6028229.0
2024-06-12 20:30:00,binance,BTCUSDT,67941.2,67941.2,67890.4,67937.7,183.672,12473050.0,58.497,3972918.0
2024-06-12 20:31:00,binance,BTCUSDT,67937.8,68022.1,67931.2,68017.0,342.47,23284030.0,247.4,16819670.0
2024-06-12 20:32:00,binance,BTCUSDT,68016.9,68022.5,67982.4,67994.7,97.896,6656807.0,34.93,2375071.0


In [13]:
q = adb.QueryBuilder()
q = q[q.index >= pd.to_datetime('6/1/2024')]

lib_ohlc_1m.read_batch(symbols=symbols[:])#, query_builder=q)

[VersionedItem(symbol='BTCUSDT', library='ohlc_1m', data=<class 'pandas.core.frame.DataFrame'>, version=0, metadata={'last_datetime': Timestamp('2024-06-08 19:18:00')}, host='LMDB(path=//home/quantfiction/arctic_demo)', timestamp=1718044451970394066),
 VersionedItem(symbol='SOLUSDT', library='ohlc_1m', data=<class 'pandas.core.frame.DataFrame'>, version=0, metadata={'last_datetime': Timestamp('2024-06-09 06:18:00')}, host='LMDB(path=//home/quantfiction/arctic_demo)', timestamp=1718044458210463834)]

In [22]:
import ccxt

In [23]:
bnf_client = ccxt.binanceusdm(config={
    'enableRateLimit':True,
})

In [40]:
list_ohlc_data = bnf_client.fapipublic_get_klines(params={
    'interval':'1m',
    'symbol':'BTCUSDT',
    'startTime':0,
    'limit':1500,
})

df_ohlc_data = (
    pd.DataFrame(
        data=list_ohlc_data,
        columns=[
            'start_time',
            'open',
            'high',
            'low',
            'close',
            'base_volume',
            'end_time',
            'quote_volume',
            'num_trades',
            'base_buy_volume',
            'quote_buy_volume',
            'ignore',
        ],
    )
    .astype({
        'start_time':np.int64,
        'open':np.float64,
        'high':np.float64,
        'low':np.float64,
        'close':np.float64,
        'quote_volume':np.float64,
        'base_volume':np.float64,
        'num_trades':np.int64,
        'base_buy_volume':np.float64,
        'quote_buy_volume':np.float64,
    })        
    .assign(datetime = lambda x: pd.to_datetime(x['start_time'], unit='ms'))
    .drop(['start_time','end_time','ignore'], axis='columns')
    .set_index('datetime')
    .sort_index()
)

display(df_ohlc_data.head())

Unnamed: 0_level_0,open,high,low,close,base_volume,quote_volume,num_trades,base_buy_volume,quote_buy_volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-09-08 17:57:00,10000.0,10000.0,10000.0,10000.0,0.001,10.0,1,0.0,0.0
2019-09-08 17:58:00,10000.0,10000.0,10000.0,10000.0,0.0,0.0,0,0.0,0.0
2019-09-08 17:59:00,10000.0,10000.0,10000.0,10000.0,0.001,10.0,1,0.001,10.0
2019-09-08 18:00:00,10000.0,10000.0,10000.0,10000.0,0.0,0.0,0,0.0,0.0
2019-09-08 18:01:00,10000.0,10000.0,10000.0,10000.0,0.0,0.0,0,0.0,0.0


In [41]:
(
    df_ohlc_data
    .resample('D')
    .agg({
        'open':'first',
        'high':'max',
        'low':'min',
        'close':'last',
        'quote_volume':'sum',
        'base_volume':'sum',
        'num_trades':'sum',
        'quote_buy_volume':'sum',
        'base_buy_volume':'sum',
    })
)

Unnamed: 0_level_0,open,high,low,close,quote_volume,base_volume,num_trades,quote_buy_volume,base_buy_volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-09-08,10000.0,10412.65,10000.0,10391.63,32096300.0,3096.291,3754,403.3562,0.039
2019-09-09,10391.63,10475.54,10077.22,10261.32,125611900.0,12215.285,9604,38353830.0,3718.024
