# Imports

In [1]:
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from scipy.optimize import minimize, Bounds, LinearConstraint
import plotly.graph_objs as go
import pandas as pd
import requests
import numpy as np
import yfinance as yf
import matplotlib
%matplotlib inline
import random
import cvxpy as cp
import matplotlib.pyplot as plt
import datetime as dt
from prophet import Prophet
from sklearn.metrics import r2_score, mean_absolute_error
from stable_baselines3.common.vec_env import DummyVecEnv
import torch
from flipside import Flipside

import os
from dotenv import load_dotenv

import datetime as dt
from datetime import timedelta

from sklearn.linear_model import LinearRegression

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [2]:
os.chdir('..')

In [3]:
from python_scripts.utils import flipside_api_results, set_random_seed, to_time, clean_prices
from sql_scripts.queries import prices, volume 

In [4]:
os.chdir('notebooks')

# Environment Variables

In [5]:
set_random_seed(20)

In [6]:
load_dotenv()
flipside_api_key = os.getenv("FLIPSIDE_API_KEY")

# Data Collection

## BTC Volume

In [7]:
btc = yf.Ticker('BTC-USD')
btc_df = btc.history(period='max')
btc_df = btc_df['Volume'].to_frame('BTC Volume')
btc_df

Unnamed: 0_level_0,BTC Volume
Date,Unnamed: 1_level_1
2014-09-17 00:00:00+00:00,21056800
2014-09-18 00:00:00+00:00,34483200
2014-09-19 00:00:00+00:00,37919700
2014-09-20 00:00:00+00:00,36863600
2014-09-21 00:00:00+00:00,26580100
...,...
2024-08-19 00:00:00+00:00,25911207712
2024-08-20 00:00:00+00:00,31613400008
2024-08-21 00:00:00+00:00,32731154072
2024-08-22 00:00:00+00:00,27625734377


## DEX Volume & Asset Prices (Onchain Data)

In [8]:
def pull_data(api=False):
    if api == True:
        prices_df = flipside_api_results(prices, flipside_api_key)
        volume_df = flipside_api_results(prices, flipside_api_key)
    else:
        prices_path = '../data/prices.csv'
        prices_df = pd.read_csv(prices_path)
        volume_path = '../data/volume.csv'
        volume_df = pd.read_csv(volume_path)

    return prices_df, volume_df 

prices_df = flipside_api_results(prices, flipside_api_key)
prices_path = '../data/prices.csv'
prices_df.to_csv(prices_path, index=False)

volume_df = flipside_api_results(volume, flipside_api_key)
volume_path = '../data/volume.csv'
volume_df.to_csv(volume_path, index=False)

In [9]:
prices_df, volume_df = pull_data(api=False)

In [10]:
prices_df

Unnamed: 0,hour,symbol,price,__row_index
0,2024-08-23T21:00:00.000Z,WETH,2747.320,0
1,2024-08-23T21:00:00.000Z,WBTC,63507.000,1
2,2024-08-23T20:00:00.000Z,WETH,2732.160,2
3,2024-08-23T20:00:00.000Z,WBTC,63270.000,3
4,2024-08-23T19:00:00.000Z,WETH,2727.840,4
...,...,...,...,...
105948,2018-02-14T05:00:00.000Z,WETH,839.535,105948
105949,2018-02-14T04:00:00.000Z,WETH,839.535,105949
105950,2018-02-14T03:00:00.000Z,WETH,839.535,105950
105951,2018-02-14T02:00:00.000Z,WETH,839.535,105951


In [11]:
clean_prices_df = clean_prices(prices_df)
clean_prices_df = to_time(clean_prices_df)
if '__row_index' in clean_prices_df.columns:
    clean_prices_df.drop(columns=['__row_index'], inplace=True)
clean_prices_df

clean_prices_df

DatetimeIndex(['2018-02-14 01:00:00+00:00', '2018-02-14 02:00:00+00:00',
               '2018-02-14 03:00:00+00:00', '2018-02-14 04:00:00+00:00',
               '2018-02-14 05:00:00+00:00', '2018-02-14 06:00:00+00:00',
               '2018-02-14 07:00:00+00:00', '2018-02-14 08:00:00+00:00',
               '2018-02-14 09:00:00+00:00', '2018-02-14 10:00:00+00:00',
               ...
               '2024-08-23 12:00:00+00:00', '2024-08-23 13:00:00+00:00',
               '2024-08-23 14:00:00+00:00', '2024-08-23 15:00:00+00:00',
               '2024-08-23 16:00:00+00:00', '2024-08-23 17:00:00+00:00',
               '2024-08-23 18:00:00+00:00', '2024-08-23 19:00:00+00:00',
               '2024-08-23 20:00:00+00:00', '2024-08-23 21:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='dt', length=57189, freq=None)


Unnamed: 0_level_0,BTC Price,ETH Price
dt,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-02-14 01:00:00+00:00,,839.535
2018-02-14 02:00:00+00:00,,839.535
2018-02-14 03:00:00+00:00,,839.535
2018-02-14 04:00:00+00:00,,839.535
2018-02-14 05:00:00+00:00,,839.535
...,...,...
2024-08-23 17:00:00+00:00,61413.0,2674.400
2024-08-23 18:00:00+00:00,61564.0,2675.120
2024-08-23 19:00:00+00:00,62716.0,2727.840
2024-08-23 20:00:00+00:00,63270.0,2732.160


In [12]:
volume_df = to_time(volume_df)
volume_df.rename(columns={"volume":"DEX Volume"}, inplace=True)
if '__row_index' in volume_df.columns:
    volume_df.drop(columns=['__row_index'], inplace=True)
volume_df

DatetimeIndex(['2024-08-23 20:00:00+00:00', '2024-08-23 19:00:00+00:00',
               '2024-08-23 18:00:00+00:00', '2024-08-23 17:00:00+00:00',
               '2024-08-23 16:00:00+00:00', '2024-08-23 15:00:00+00:00',
               '2024-08-23 14:00:00+00:00', '2024-08-23 13:00:00+00:00',
               '2024-08-23 12:00:00+00:00', '2024-08-23 11:00:00+00:00',
               ...
               '2019-09-27 03:00:00+00:00', '2019-09-27 00:00:00+00:00',
               '2019-09-26 22:00:00+00:00', '2019-09-26 21:00:00+00:00',
               '2019-09-26 19:00:00+00:00', '2019-09-26 18:00:00+00:00',
               '2019-09-26 17:00:00+00:00', '2019-09-26 14:00:00+00:00',
               '2019-09-26 13:00:00+00:00', '2019-09-26 10:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='dt', length=42228, freq=None)


Unnamed: 0_level_0,DEX Volume
dt,Unnamed: 1_level_1
2024-08-23 20:00:00+00:00,5.588734e+07
2024-08-23 19:00:00+00:00,2.136648e+08
2024-08-23 18:00:00+00:00,3.260078e+08
2024-08-23 17:00:00+00:00,2.633585e+08
2024-08-23 16:00:00+00:00,1.448834e+08
...,...
2019-09-26 18:00:00+00:00,1.400500e+03
2019-09-26 17:00:00+00:00,1.549980e+03
2019-09-26 14:00:00+00:00,1.137598e+04
2019-09-26 13:00:00+00:00,2.274700e+02


# Feature Engineering