## Data Acquisition

In [None]:
import time
from collections import deque
from datetime import date, timedelta, datetime
from concurrent.futures import ThreadPoolExecutor

import requests
from loguru import logger

import pandas as pd

In [None]:
api_v4_trades = "https://api.mercadobitcoin.net/api/v4/{symbol}/trades"
trades = deque()

In [None]:
 # fetch most recent trades
response_trades = requests.get(url=api_v4_trades.format(symbol="BTC-BRL"))

if response_trades.status_code != 200:
    extra = {"error": response_trades}
    logger.bind(**extra).error("Error to fetch initial trade data")

trades.extend(response_trades.json())

initial_trade = trades[0]["tid"]
next_trades = initial_trade - 1000

logger.info(f"Initial trade: {initial_trade}")

In [None]:
def fetch_trades(payload):
    response_trades = requests.get(url=api_v4_trades.format(symbol="BTC-BRL"), params=payload)
    # logger.info(f"URL: {response_trades.url}")

    if response_trades.status_code != 200:
        logger.error(f"Error to fetch initial trade data: {response_trades.text}")

    return response_trades.json()

In [None]:
while next_trades > 0:
    with ThreadPoolExecutor() as executor:
        # time.sleep(1)
        future = executor.submit(fetch_trades, {"since": next_trades})
        future_result = future.result()
        trades.extendleft(reversed(future_result))
        next_trades -= 1000

        if future_result[-1]['date'] < 1696118349:
            logger.info("Done!")
            break


In [None]:
print(trades[0])

In [None]:
columns = ["tid", "date", "type", "price", "amount"]

btc_trades_df = pd.DataFrame(trades, columns=[c for c in columns])
btc_trades_df.set_index('tid', inplace=True)

btc_trades_df['price'] = pd.to_numeric(btc_trades_df['price'])
btc_trades_df['amount'] = pd.to_numeric(btc_trades_df['amount'])

In [None]:
btc_trades_df.drop_duplicates()

In [None]:
btc_trades_df.info()

In [None]:
btc_trades_df.head()

In [None]:
btc_trades_df.to_csv("btc_trades_df.csv", sep='\t', index=False)
btc_trades_df.to_parquet("btc_trades_df.parquet", engine="fastparquet")

## Exploratory Data Analysis

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from pycaret.clustering import *
mpl.rcParams['figure.dpi'] = 300

In [None]:
btc_trades_df.hist(bins = 30, figsize = (12,10), grid = False)
plt.show()

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(btc_trades_df.corr().round(decimals=2), annot=True)
plt.show()

In [None]:
# plot_kws = {'scatter_kws': {'s': 2}, 'line_kws': {'color': 'red'}}
# sns.pairplot(btc_trades_df, kind='reg', vars=btc_trades_df['price'], plot_kws=plot_kws)
# plt.show()

## Model

In [None]:
cluster = setup(btc_trades_df, session_id=7652, index=False, normalize=True, ignore_features=["tid", "date"], use_gpu=True)

In [None]:
kmeans = create_model('kmeans')

In [None]:
plot_model(kmeans, 'elbow')

In [None]:
plot_model(kmeans)

In [None]:
plot_model(kmeans, plot='silhouette')

In [None]:
plot_model(kmeans, plot='distribution')