## Data Acquisition

In [1]:
import pandas as pd

In [2]:
btc_trades_df = pd.read_parquet('../datasets/btc_trades_df.parquet', engine='fastparquet')

In [3]:
# timestamp to python datetime
btc_trades_df['date'] = pd.to_datetime(btc_trades_df['date'], unit='s')

In [4]:
btc_trades_df = btc_trades_df.set_index(['date'])

In [5]:
btc_trades_df.head(10)

Unnamed: 0_level_0,type,price,amount
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-09-29 17:03:28,buy,135078.45,0.0155
2023-09-29 17:03:28,buy,135078.45,0.0444
2023-09-29 17:03:28,buy,135083.0,0.159676
2023-09-29 17:03:32,buy,135083.0,0.0686
2023-09-29 17:04:08,buy,135187.0,0.000148
2023-09-29 17:04:18,sell,135187.0,0.0057
2023-09-29 17:04:18,sell,135184.411374,0.0125
2023-09-29 17:04:18,sell,135150.627619,0.0302
2023-09-29 17:04:18,sell,135083.04,0.018883
2023-09-29 17:05:10,buy,135187.0,5.8e-05


## Exploratory Data Analysis

In [6]:
# TBD

## Isolation Forest

In [7]:
import numpy as np
from sklearn.ensemble import IsolationForest

In [8]:
model = IsolationForest(n_estimators=1_000, max_samples='auto', contamination=float(0.1), max_features=1.0)

In [9]:
df = btc_trades_df.loc['2023-09-29':'2023-09-29'][['price', 'amount']]

In [10]:
df

Unnamed: 0_level_0,price,amount
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-09-29 17:03:28,135078.450000,0.015500
2023-09-29 17:03:28,135078.450000,0.044400
2023-09-29 17:03:28,135083.000000,0.159676
2023-09-29 17:03:32,135083.000000,0.068600
2023-09-29 17:04:08,135187.000000,0.000148
...,...,...
2023-09-29 23:53:43,135573.140000,0.000221
2023-09-29 23:55:02,135479.486492,0.000023
2023-09-29 23:57:12,135479.486492,0.000015
2023-09-29 23:58:26,135535.520000,0.000015


In [11]:
model.fit(df[['price', 'amount']])

In [12]:
df['scores'] = model.decision_function(df[['price', 'amount']])
df['anomaly'] = model.predict(df[['price', 'amount']])

In [13]:
model.estimators_[:9]

[ExtraTreeRegressor(max_depth=8, max_features=1, random_state=897128210),
 ExtraTreeRegressor(max_depth=8, max_features=1, random_state=856178413),
 ExtraTreeRegressor(max_depth=8, max_features=1, random_state=1726614300),
 ExtraTreeRegressor(max_depth=8, max_features=1, random_state=649024386),
 ExtraTreeRegressor(max_depth=8, max_features=1, random_state=1496796870),
 ExtraTreeRegressor(max_depth=8, max_features=1, random_state=1944051816),
 ExtraTreeRegressor(max_depth=8, max_features=1, random_state=310155059),
 ExtraTreeRegressor(max_depth=8, max_features=1, random_state=658607616),
 ExtraTreeRegressor(max_depth=8, max_features=1, random_state=1302072862)]

In [14]:
scores = model.score_samples(df[['price', 'amount']])

In [15]:
np.mean(scores)

-0.44040340681027323

In [16]:
anomaly = df.loc[df['anomaly'] == -1]
anomaly_index = list(anomaly.index)
print(anomaly)

                             price    amount    scores  anomaly
date                                                           
2023-09-29 17:03:28  135078.450000  0.044400 -0.066770       -1
2023-09-29 17:03:28  135083.000000  0.159676 -0.168207       -1
2023-09-29 17:03:32  135083.000000  0.068600 -0.095548       -1
2023-09-29 17:04:18  135150.627619  0.030200 -0.049890       -1
2023-09-29 17:15:11  135266.000000  0.017043 -0.008749       -1
...                            ...       ...       ...      ...
2023-09-29 22:14:21  135669.000000  0.092699 -0.061568       -1
2023-09-29 22:17:48  135669.000000  0.040504 -0.019226       -1
2023-09-29 23:04:02  135811.920000  0.097879 -0.093959       -1
2023-09-29 23:04:02  135815.250000  0.067767 -0.072863       -1
2023-09-29 23:35:33  135429.040000  0.037892 -0.060123       -1

[112 rows x 4 columns]
