# Arbitrage Detection Inference 
Use two trained models to determine whether there will be an arbitrage opportunity.

In [1]:
import sys
import os
import pandas as pd
from sklearn.metrics import root_mean_squared_error, r2_score

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from IPython.display import display

%matplotlib inline
print(os.getcwd())

/Users/das/DATASCI210/arbitrage_playground/notebooks


In [2]:
# change the active path to the parent directory 
if True: 
    print("Moving active path to parent directory")
    os.chdir('..')
    print(os.getcwd())

Moving active path to parent directory
/Users/das/DATASCI210/arbitrage_playground


In [7]:
# import test code
from src.arbutils import load_model, etherscan_request, merge_pool_data, LGBM_Preprocessing, XGB_preprocessing, calculate_min_investment


In [28]:
# API inputs: TODO - put these in a config file...
api_key = "16FCD3FTVWC3KDK17WS5PTWRQX1E2WEYV2"
pool0_address = "0x8ad599c3a0ff1de082011efddc58f1908eb6e6d8"
pool1_address = "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"

price_model_name = "percent_change_1min_forecast_LGBM"
gasfee_model_name = "gas_fees_1min_forecast_XGBoost"

In [29]:
# fetch data from etherscan.io.
p0 = etherscan_request('tokentx', api_key, address=pool0_address)
p1 = etherscan_request('tokentx', api_key, address=pool1_address)

# merge data from both pools.
both_pools = merge_pool_data(p0,p1)

# LGBM Preprocessing
#
# Creates LGBM features
# Only using the features for predicting the latest value. 
# Other return arguments ignored.
#
_, X_pct, _, _ = LGBM_Preprocessing(both_pools,forecast_window_min=1)
X_time = X_pct['time']
X_pct = X_pct[['percent_change', 'rolling_mean_8', 'lag_1', 'lag_2']]
lgbm_model = load_model(price_model_name)

# XGBoost Preprocessing
#
# Creates XGBoost features
# Only using the features for predicting the latest value. 
# Other return arguments ignored.
#
X_gas_fees, _, _ = XGB_preprocessing(both_pools,forecast_window_min=1)
xgb_model = load_model(gasfee_model_name)


y_pct_pred = lgbm_model.predict(X_pct)
y_gas_fees_pred = xgb_model.predict(X_gas_fees)



Model percent_change_1min_forecast_LGBM loaded successfully from /Users/das/DATASCI210/arbitrage_playground/models/percent_change_1min_forecast_LGBM.pkl
Model gas_fees_1min_forecast_XGBoost loaded successfully from /Users/das/DATASCI210/arbitrage_playground/models/gas_fees_1min_forecast_XGBoost.pkl


In [30]:
pred_dict = {
    'time':X_time,
    'total_gas_fees_prediction':y_gas_fees_pred,
    'percent_change_prediction':y_pct_pred,
}

df = calculate_min_investment(pd.DataFrame(pred_dict),'total_gas_fees_prediction','percent_change_prediction')
df.head()

Unnamed: 0,time,total_gas_fees_prediction,percent_change_prediction,min_amount_to_invest
2906,2024-12-30 06:30:47,25.508982,4e-05,10044.197637
2907,2024-12-30 06:30:59,25.574257,-0.000727,-14405.022298
2908,2024-12-30 06:31:23,24.625664,-0.001054,-16994.747003
2909,2024-12-30 06:31:35,26.080437,-0.001004,-17400.94338
