## Data Acquisition

In [None]:
import time
from collections import deque
from datetime import date, timedelta, datetime
from concurrent.futures import ThreadPoolExecutor

import requests
from loguru import logger

import pandas as pd

In [None]:
api_v4_trades = "https://api.mercadobitcoin.net/api/v4/{symbol}/orderbook"

In [None]:
 # fetch most recent trades
response_orderbook = requests.get(url=api_v4_trades.format(symbol="BTC-BRL"))

if response_orderbook.status_code != 200:
    logger.error(f"Error to fetch orderbook data: {response_orderbook.text}")

response_json = response_orderbook.json()

orderbook = [{"price": data[0], "volume": data[1], "type": "sell"}  for data in response_json["asks"]]
orderbook.extend([{"price": data[0], "volume": data[1], "type": "buy"}  for data in response_json["bids"]])

In [None]:
print(*orderbook)

In [None]:
btc_orderbook_df = pd.DataFrame.from_dict(orderbook, orient='columns')

In [None]:
btc_orderbook_df.head()

In [None]:
btc_orderbook_df.info()

In [None]:
btc_orderbook_df["price"] = pd.to_numeric(btc_orderbook_df["price"])
btc_orderbook_df["volume"] = pd.to_numeric(btc_orderbook_df["volume"])

In [None]:
btc_orderbook_df.drop_duplicates()

In [None]:
btc_orderbook_df.info()

In [None]:
btc_orderbook_df.to_csv("btc_orderbook_df.csv", sep='\t', index=False)
btc_orderbook_df.to_parquet("btc_orderbook_df.parquet", engine="fastparquet")

## Exploratory Data Analysis

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from pycaret.clustering import *
mpl.rcParams['figure.dpi'] = 300

In [None]:
btc_orderbook_df.hist(bins = 30, figsize = (12,10), grid = False)
plt.show()

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(btc_orderbook_df.corr().round(decimals=2), annot=True)
plt.show()

In [None]:
btc_orderbook_df.columns[:2]

In [None]:
plot_kws = {'scatter_kws': {'s': 2}, 'line_kws': {'color': 'red'}}
sns.pairplot(btc_orderbook_df, kind='reg', vars=btc_orderbook_df.columns[:2], plot_kws=plot_kws)
plt.show()

## Model

In [None]:
cluster = setup(btc_orderbook_df, session_id=7652, index=False, normalize=True, ignore_features=["type"], use_gpu=True)

In [None]:
kmeans = create_model('kmeans')

In [None]:
evaluate_model(kmeans)

In [None]:
print(kmeans)

In [None]:
plot_model(kmeans, 'elbow')

In [None]:
plot_model(kmeans)

In [None]:
plot_model(kmeans, plot='silhouette')

In [None]:
plot_model(kmeans, plot='distribution')

In [None]:
plot_model(kmeans, plot="distance")

In [None]:
models()

In [None]:
model = create_model('dbscan', num_clusters=4)

In [None]:
print(model)

In [None]:
plot_model(model)

In [None]:
plot_model(model, plot='distribution')