In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

## Gas price data

#### This notebooks combines Gas price data, Binance ETH/USD data, and block gaslimit/gas usage

#### Gas price data was retrieved from google big query, which provides a queryable database of Ethereum blockchaind data

#### Retrieved from the GCP BigQuery API using the following query;

In [None]:
SELECT t0.avg_gas_price,

t0.block_timestamp,

t0.max_gas_price,

t0.min_gas_price 

FROM (SELECT
  block_timestamp, MIN(gas_price) as min_gas_price, MAX(gas_price) as max_gas_price, AVG(gas_price) as avg_gas_price
  
FROM 'bigquery-public-data.crypto_ethereum.transactions'

WHERE
  DATE(block_timestamp) between  "2021-11-26"  AND "2022-01-26"
  
GROUP BY block_timestamp) AS t0

ORDER BY block_timestamp DESC 

## Process dataset for later modelling 

In [1]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

In [3]:
#Load Query CSV generate by above query
df = pd.read_csv (r'C:\Users\conal\Desktop\MCM\Practicum\data\min,max,average gas 11-26 to 01-26 QUERY 2.csv', header=0)

#Read datetime, sort by chronologcailly and index data by datetime
df['datetime'] = pd.to_datetime(df['block_timestamp'], format = '%Y-%m-%d %H:%M:%S UTC')
df['timestamp'] = df['datetime'].astype(np.int64) // 10**9
df = df.sort_values(by='block_timestamp',ascending=False)
df = df.set_index('datetime')

#Resampel data, taking mean over 1 minute window labelled with the tiem at the left side of the window
df_block_gas_price = df.resample('1T').mean()
df_block_gas_price.tail()


  df['timestamp'] = df['datetime'].astype(np.int64) // 10**9


Unnamed: 0_level_0,avg_gas_price,max_gas_price,min_gas_price,timestamp
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-26 23:55:00,151250800000.0,307852000000.0,143456600000.0,1643241000.0
2022-01-26 23:56:00,161142400000.0,307971800000.0,148508000000.0,1643241000.0
2022-01-26 23:57:00,139859800000.0,672555600000.0,121381600000.0,1643241000.0
2022-01-26 23:58:00,155558300000.0,522729700000.0,142239000000.0,1643242000.0
2022-01-26 23:59:00,184758000000.0,931700000000.0,163241000000.0,1643242000.0


In [4]:
#Load ETH/USD data previously downloaded from binance and merged
df = pd.read_csv (r'C:\Users\conal\Desktop\MCM\Practicum\data\ETH\eth_price_data_2021-01-01_2022-03-02.csv')

#Read datetime, sort by chronologcailly and index data by datetime, unix timestamp will be used later for coherence plots
df['Open_time'] = df['Open_time'].floordiv(1000)
df['datetime'] = pd.to_datetime(df['Open_time'], unit='s')
df= df.sort_values(by='Open_time',ascending=False)

df = df.set_index('datetime')
df_eth = df[['Open_time', 'Open']]
df_eth.tail()

Unnamed: 0_level_0,Open_time,Open
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-01 00:04:00,1609459440,737.38
2021-01-01 00:03:00,1609459380,737.74
2021-01-01 00:02:00,1609459320,738.78
2021-01-01 00:01:00,1609459260,737.12
2021-01-01 00:00:00,1609459200,736.42


## Block gas limit, and block gas used data were also retrieved

In [None]:
##Below query used to retrieve data from bigquery 

SELECT timestamp, gas_limit, gas_used
FROM
  `bigquery-public-data.crypto_ethereum.blocks`
WHERE
  DATE(timestamp) between  "2021-11-26"  AND "2022-01-26"

ORDER BY timestamp DESC 

In [5]:
#Load data, convert account for datetime formatting, add unix timestamp, sort by datetime, resample
df = pd.read_csv (r'C:\Users\conal\Desktop\MCM\Practicum\data\gas limit, gas used 11-26 to 01-26.csv', header=0)
df['datetime'] = pd.to_datetime(df['timestamp'], format = '%Y-%m-%d %H:%M:%S UTC')
df['timestamp'] = df['datetime'].astype(np.int64) // 10**9
df = df.sort_values(by='timestamp',ascending=False)
df = df.set_index('datetime')
df_block_gas_usage = df.resample('1T').mean()
df_block_gas_usage.tail()

  df['timestamp'] = df['datetime'].astype(np.int64) // 10**9


Unnamed: 0_level_0,timestamp,gas_limit,gas_used
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-01-26 23:55:00,1643241000.0,30014650.0,17536640.0
2022-01-26 23:56:00,1643241000.0,30017560.0,9751164.0
2022-01-26 23:57:00,1643241000.0,30005850.0,14760700.0
2022-01-26 23:58:00,1643242000.0,30009760.0,26145420.0
2022-01-26 23:59:00,1643242000.0,30029300.0,30026290.0


# Merging datasets

We will merge datsets for for simple access in later modelling

In [6]:
#Merge block gas limit/usage with ETH/USD exchagne data
merged_eth_usage = df_block_gas_usage.merge(df_eth, left_index=True, right_index=True)

In [7]:
merged_eth_usage.head()

Unnamed: 0_level_0,timestamp,gas_limit,gas_used,Open_time,Open
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-11-26 00:00:00,1637885000.0,29992670.0,13471630.0,1637884800,4524.48
2021-11-26 00:01:00,1637885000.0,29999990.0,15168190.0,1637884860,4529.9
2021-11-26 00:02:00,1637885000.0,30043940.0,18203550.0,1637884920,4528.51
2021-11-26 00:03:00,1637885000.0,29999990.0,18020440.0,1637884980,4533.54
2021-11-26 00:04:00,1637885000.0,30014630.0,15731360.0,1637885040,4534.97


In [8]:
#Merge Gas price data
df_merged = merged_eth_usage.merge(df_block_gas_price, left_index=True, right_index=True)
df_merged.head()

Unnamed: 0_level_0,timestamp_x,gas_limit,gas_used,Open_time,Open,avg_gas_price,max_gas_price,min_gas_price,timestamp_y
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-11-26 00:00:00,1637885000.0,29992670.0,13471630.0,1637884800,4524.48,96879510000.0,340147800000.0,81685530000.0,1637885000.0
2021-11-26 00:01:00,1637885000.0,29999990.0,15168190.0,1637884860,4529.9,100555400000.0,391827700000.0,82407920000.0,1637885000.0
2021-11-26 00:02:00,1637885000.0,30043940.0,18203550.0,1637884920,4528.51,91898400000.0,435102000000.0,75408290000.0,1637885000.0
2021-11-26 00:03:00,1637885000.0,29999990.0,18020440.0,1637884980,4533.54,91047110000.0,378714800000.0,83793990000.0,1637885000.0
2021-11-26 00:04:00,1637885000.0,30014630.0,15731360.0,1637885040,4534.97,108141500000.0,535510500000.0,96352650000.0,1637885000.0


In [11]:
#Convert gas price data from wei to gwei, to giga-wei, or gwei, the most commonly used ETH denominator when referencing gas price 
df_merged_gwei = df_merged
df_merged_gwei['min_gas_price'] = df_merged['min_gas_price'].apply(lambda x: x/1000000000)
df_merged_gwei['avg_gas_price'] = df_merged['avg_gas_price'].apply(lambda x: x/1000000000)
df_merged_gwei['max_gas_price'] = df_merged['max_gas_price'].apply(lambda x: x/1000000000)
#Get proportion of allocated block gas that was actually used
df_merged_gwei['block_utilisation'] = df_merged['gas_used']/df_merged['gas_limit']
df_merged_gwei = df_merged_gwei.drop(['timestamp_x', 'Open_time', 'timestamp_y'], axis=1)
df_merged_gwei.head()


Unnamed: 0_level_0,gas_limit,gas_used,Open,avg_gas_price,max_gas_price,min_gas_price,block_utilisation
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-26 00:00:00,29992670.0,13471630.0,4524.48,9.687951e-08,3.401477e-07,8.168553e-08,0.449164
2021-11-26 00:01:00,29999990.0,15168190.0,4529.9,1.005554e-07,3.918277e-07,8.240792e-08,0.505607
2021-11-26 00:02:00,30043940.0,18203550.0,4528.51,9.18984e-08,4.35102e-07,7.540829e-08,0.605898
2021-11-26 00:03:00,29999990.0,18020440.0,4533.54,9.104711e-08,3.787148e-07,8.379399e-08,0.600682
2021-11-26 00:04:00,30014630.0,15731360.0,4534.97,1.081415e-07,5.355105e-07,9.635265e-08,0.524123


## Save merged data at different sampling rates

In [76]:
df_merged_gwei.to_csv(r'C:\Users\conal\Desktop\MCM\Practicum\data\ETH,gas,usage merged 11-26 to 01-26.csv', sep=',')

In [13]:
df_merged_gwei_5min = df_merged_gwei.resample('5T').mean()
df_merged_gwei_60min = df_merged_gwei.resample('60T').mean()
df_merged_gwei_10min = df_merged_gwei.resample('10T').mean()

In [78]:
df_merged_gwei_5min.to_csv(r'C:\Users\conal\Desktop\MCM\Practicum\data\ETH,gas,usage merged_5min 11-26 to 01-26.csv', sep=',')

In [79]:
df_merged_gwei_60min.to_csv(r'C:\Users\conal\Desktop\MCM\Practicum\data\ETH,gas,usage merged_60min 11-26 to 01-26.csv', sep=',')

In [14]:

df_merged_gwei_10min.to_csv(r'C:\Users\conal\Desktop\MCM\Practicum\data\ETH,gas,usage merged_10min 11-26 to 01-26.csv', sep=',')