In [115]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

### Data loads

In [116]:
col_names = ['Entry Date UTC', 'Price']

# Load CME ETH data from Trading View (1h)
cme_data_1h_tv = pd.read_csv('../data/cme/cme_1h_tv.csv', usecols=col_names)
cme_data_1h_tv['Entry Date UTC'] = pd.to_datetime(cme_data_1h_tv['Entry Date UTC'], utc=True)

# Load CME ETH data from CME Group (1m, 5m, 1h)
cme_data_1m_cme = pd.read_csv('../data/cme/cme_1m_cme.csv', usecols=col_names)
cme_data_1m_cme['Entry Date UTC'] = pd.to_datetime(
        cme_data_1m_cme['Entry Date UTC'],
        format='%Y%m%dT%H:%M:%S',
        utc=True
    )
cme_data_1m_cme.set_index('Entry Date UTC', inplace=True)
cme_data_1h_cme = cme_data_1m_cme[cme_data_1m_cme.index.minute == 0]
cme_data_5m_cme = cme_data_1m_cme[cme_data_1m_cme.index.minute % 5 == 0]
cme_data_1h_cme.reset_index(inplace=True)
cme_data_5m_cme.reset_index(inplace=True)

# Load ETH data from Trading View (1h)
eth_data_1h_tv = pd.read_csv('../data/eth/eth_1h_tv.csv', usecols=col_names)
eth_data_1h_tv['Entry Date UTC'] = pd.to_datetime(eth_data_1h_tv['Entry Date UTC'], utc=True)

# Load ETH data from Dune prices.usd(5m, 1h)
eth_data_5m_dune = pd.read_csv('../data/eth/eth_5m_dune.csv', usecols=col_names)
eth_data_5m_dune['Entry Date UTC'] = pd.to_datetime(eth_data_5m_dune['Entry Date UTC'], utc=True)
eth_data_5m_dune.set_index('Entry Date UTC', inplace=True)
eth_data_1h_dune = eth_data_5m_dune[eth_data_5m_dune.index.minute == 0]
eth_data_1h_dune.reset_index(inplace=True)

# Load stETH data from Dune dex.trades [1m, 5m, 1h]
# steth_data_1m_dunest = pd.read_csv('../data/steth/steth_1m.csv', usecols=col_names)
# steth_data_1m_dunest = steth_data_1m_dunest['Entry Date UTC'] = pd.to_datetime(steth_data_1m_dunest['Entry Date UTC'])
# steth_data_1m_dunest.set_index('Entry Date UTC', inplace=True)
# steth_data_1h_dunest = steth_data_1m_dunest[steth_data_1m_dunest.index.minute == 0]
# steth_data_5m_dunest = steth_data_1m_dunest[steth_data_1m_dunest.index.minute % 5 == 0]
# steth_data_1h_dunest.reset_index(inplace=True)
# steth_data_5m_dunest.reset_index(inplace=True)
steth_data_1h_dune = pd.read_csv('../data/steth/steth_1h.csv', usecols=col_names)
steth_data_1h_dune['Entry Date UTC'] = pd.to_datetime(
    steth_data_1h_dune['Entry Date UTC'],
    format='%Y-%m-%dT%H:%M:%S',  # Adjusted format to match your sample
    utc=True
)


In [117]:
steth_data_1h_dune.head()

Unnamed: 0,Entry Date UTC,Price
0,2022-01-01 01:00:00+00:00,3706.18
1,2022-01-01 02:00:00+00:00,3721.96
2,2022-01-01 03:00:00+00:00,3713.88
3,2022-01-01 04:00:00+00:00,3713.68
4,2022-01-01 05:00:00+00:00,3704.22


### Merge CME ETH [Trading View] & ETH [Trading View]

In [118]:
# Merge datasets
dataset_1h_tv = pd.merge(cme_data_1h_tv, eth_data_1h_tv, on='Entry Date UTC', suffixes=('_cme', '_eth'))

# Calculate diff & return on prices
dataset_1h_tv['Price_cme_delta'] = dataset_1h_tv['Price_cme'].diff()
dataset_1h_tv['Price_eth_delta'] = dataset_1h_tv['Price_eth'].diff()
dataset_1h_tv['Price_cme_return'] = dataset_1h_tv['Price_cme'].pct_change()
dataset_1h_tv['Price_eth_return'] = dataset_1h_tv['Price_eth'].pct_change()

# Remove null values
dataset_1h_tv.dropna(subset=['Price_cme_return', 'Price_eth_return'], inplace=True)

### Merge CME ETH [CME Group] & ETH [Trading View]

In [119]:
# Merge datasets
dataset_1h_cme_tv = pd.merge(cme_data_1h_cme, eth_data_1h_tv, on='Entry Date UTC', suffixes=('_cme', '_eth'))

# Calculate diff & return on prices
dataset_1h_cme_tv['Price_cme_delta'] = dataset_1h_cme_tv['Price_cme'].diff()
dataset_1h_cme_tv['Price_eth_delta'] = dataset_1h_cme_tv['Price_eth'].diff()
dataset_1h_cme_tv['Price_cme_return'] = dataset_1h_cme_tv['Price_cme'].pct_change()
dataset_1h_cme_tv['Price_eth_return'] = dataset_1h_cme_tv['Price_eth'].pct_change()

# Remove null values
dataset_1h_cme_tv.dropna(subset=['Price_cme_return', 'Price_eth_return'], inplace=True)

### Merge CME ETH [Trading View] & ETH [Dune's prices.usd]

In [120]:
# Merge datasets
dataset_1h_tv_dune = pd.merge(cme_data_1h_tv, eth_data_1h_dune, on='Entry Date UTC', suffixes=('_cme', '_eth'))

# Calculate diff & return on prices
dataset_1h_tv_dune['Price_cme_delta'] = dataset_1h_tv_dune['Price_cme'].diff()
dataset_1h_tv_dune['Price_eth_delta'] = dataset_1h_tv_dune['Price_eth'].diff()
dataset_1h_tv_dune['Price_cme_return'] = dataset_1h_tv_dune['Price_cme'].pct_change()
dataset_1h_tv_dune['Price_eth_return'] = dataset_1h_tv_dune['Price_eth'].pct_change()

# Remove null values
dataset_1h_tv_dune.dropna(subset=['Price_cme_return', 'Price_eth_return'], inplace=True)

### Merge CME ETH [Trading View] & stETH [Dune's dex.trades]

In [121]:
cme_data_1h_tv.head()

Unnamed: 0,Entry Date UTC,Price
0,2022-01-03 00:00:00+00:00,3839.0
1,2022-01-03 01:00:00+00:00,3824.5
2,2022-01-03 02:00:00+00:00,3810.5
3,2022-01-03 03:00:00+00:00,3805.0
4,2022-01-03 04:00:00+00:00,3795.0


In [122]:
steth_data_1h_dune.head()

Unnamed: 0,Entry Date UTC,Price
0,2022-01-01 01:00:00+00:00,3706.18
1,2022-01-01 02:00:00+00:00,3721.96
2,2022-01-01 03:00:00+00:00,3713.88
3,2022-01-01 04:00:00+00:00,3713.68
4,2022-01-01 05:00:00+00:00,3704.22


In [123]:
# Merge datasets
dataset_1h_tv_dunest = pd.merge(cme_data_1h_tv, steth_data_1h_dune, on='Entry Date UTC', suffixes=('_cme', '_eth'))

# Calculate diff & return on prices
dataset_1h_tv_dunest['Price_cme_delta'] = dataset_1h_tv_dunest['Price_cme'].diff()
dataset_1h_tv_dunest['Price_eth_delta'] = dataset_1h_tv_dunest['Price_eth'].diff()
dataset_1h_tv_dunest['Price_cme_return'] = dataset_1h_tv_dunest['Price_cme'].pct_change()
dataset_1h_tv_dunest['Price_eth_return'] = dataset_1h_tv_dunest['Price_eth'].pct_change()

# Remove null values
dataset_1h_tv_dunest.dropna(subset=['Price_cme_return', 'Price_eth_return'], inplace=True)

### Correlations

In [127]:
# Calculate correlations for both datasets
correlation_1h_tv = dataset_1h_tv['Price_cme_return'].corr(dataset_1h_tv['Price_eth_return'])
correlation_1h_cme_tv = dataset_1h_cme_tv['Price_cme_return'].corr(dataset_1h_cme_tv['Price_eth_return'])
correlation_1h_tv_dune = dataset_1h_tv_dune['Price_cme_return'].corr(dataset_1h_tv_dune['Price_eth_return'])
correlation_1h_tv_dunest = dataset_1h_tv_dunest['Price_cme_return'].corr(dataset_1h_tv_dunest['Price_eth_return'])

print('cme eth [trading view] vs. eth [trading view]')
print(f"1h corr: {correlation_1h_tv}\n")

print('cme eth [cme group] vs. eth [trading view]')
print(f"1h corr: {correlation_1h_cme_tv}\n")

print('cme eth [trading view] vs. eth [dune prices.usd]')
print(f"1h corr: {correlation_1h_tv_dune}\n")

print('cme eth [trading view] vs. eth [dune dex.trades]')
print(f"1h corr: {correlation_1h_tv_dunest}")

cme eth [trading view] vs. eth [trading view]
1h corr: 0.9850574832964637

cme eth [cme group] vs. eth [trading view]
1h corr: 0.9808721585756048

cme eth [trading view] vs. eth [dune prices.usd]
1h corr: 0.9191664770716793

cme eth [trading view] vs. eth [dune dex.trades]
1h corr: 0.4907356385732146


In [130]:
# dataset_1h_tv_dunest.head(15)

In [129]:
dataset_1h_cme_tv.head(25)

Unnamed: 0,Entry Date UTC,Price_cme,Price_eth,Price_cme_delta,Price_eth_delta,Price_cme_return,Price_eth_return
1,2023-01-03 00:00:00+00:00,1211.5,1213.69,-2.5,-1.22,-0.002059,-0.001004
2,2023-01-03 01:00:00+00:00,1212.0,1215.41,0.5,1.72,0.000413,0.001417
3,2023-01-03 02:00:00+00:00,1210.0,1213.73,-2.0,-1.68,-0.00165,-0.001382
4,2023-01-03 03:00:00+00:00,1209.5,1213.8,-0.5,0.07,-0.000413,5.8e-05
5,2023-01-03 06:00:00+00:00,1214.0,1217.47,4.5,3.67,0.003721,0.003024
6,2023-01-03 08:00:00+00:00,1213.5,1216.34,-0.5,-1.13,-0.000412,-0.000928
7,2023-01-03 09:00:00+00:00,1212.0,1215.34,-1.5,-1.0,-0.001236,-0.000822
8,2023-01-03 10:00:00+00:00,1214.0,1217.31,2.0,1.97,0.00165,0.001621
9,2023-01-03 11:00:00+00:00,1211.5,1215.5,-2.5,-1.81,-0.002059,-0.001487
10,2023-01-03 12:00:00+00:00,1210.5,1214.68,-1.0,-0.82,-0.000825,-0.000675
