In [1]:
import pandas as pd

In [2]:
prices = pd.read_csv('prices.log', header=None, names=["Price", "Timestamp"])
predictions = pd.read_csv('predictions.log', header=None, names=["Price", "Timestamp"])

In [3]:
prices

Unnamed: 0,Price,Timestamp
0,2868.38,2025-02-02T19:06:58.0000000
1,2868.76,2025-02-02T19:06:43.0000000
2,2866.99,2025-02-02T19:06:28.0000000
3,2865.53,2025-02-02T19:06:13.0000000
4,2863.27,2025-02-02T19:05:58.0000000
...,...,...
1055,2487.67,2025-02-02T23:31:42.0000000
1056,2486.44,2025-02-02T23:31:27.0000000
1057,2487.56,2025-02-02T23:31:12.0000000
1058,2496.49,2025-02-02T23:30:57.0000000


In [4]:
predictions

Unnamed: 0,Price,Timestamp
0,2920.47,2025-02-02T19:08:15.4451460-05:00
1,2915.56,2025-02-02T19:10:47.2129620-05:00
2,2911.10,2025-02-02T19:13:18.9226220-05:00
3,2907.30,2025-02-02T19:15:50.9208080-05:00
4,2894.95,2025-02-02T19:18:23.6867150-05:00
...,...,...
101,2572.13,2025-02-02T23:24:07.1765690-05:00
102,2565.23,2025-02-02T23:26:38.9509820-05:00
103,2552.77,2025-02-02T23:29:10.4187690-05:00
104,2554.74,2025-02-02T23:31:42.0567440-05:00


In [5]:
try:
    prices['Timestamp'] = pd.to_datetime(prices['Timestamp']).dt.tz_localize('US/Eastern')
    predictions['Timestamp'] = pd.to_datetime(predictions['Timestamp']).dt.tz_localize('US/Eastern')
except: 
    print("conversion error. continuing...")

prices['Timestamp'] = pd.to_datetime(prices['Timestamp']).dt.tz_convert('UTC')
predictions['Timestamp'] = pd.to_datetime(predictions['Timestamp']).dt.tz_convert('UTC')

conversion error. continuing...


In [6]:
# Sort both DataFrames by Timestamp
prices = prices.sort_values('Timestamp')
predictions = predictions.sort_values('Timestamp')

# Perform the merge with a 2-second tolerance
merged_df = pd.merge_asof(
    prices,
    predictions,
    on='Timestamp',
    tolerance=pd.Timedelta(seconds=2),
    direction='nearest',  # Options: 'backward', 'forward', 'nearest'
    suffixes=('_real', '_pred')
)

# Display the merged DataFrame
df = merged_df.dropna()
df

Unnamed: 0,Price_real,Timestamp,Price_pred
14,2858.75,2025-02-03 00:08:14+00:00,2920.47
24,2855.15,2025-02-03 00:10:47+00:00,2915.56
34,2853.70,2025-02-03 00:13:18+00:00,2911.10
44,2843.14,2025-02-03 00:15:50+00:00,2907.30
54,2823.70,2025-02-03 00:18:23+00:00,2894.95
...,...,...,...
1014,2504.56,2025-02-03 04:21:35+00:00,2575.25
1024,2505.74,2025-02-03 04:24:07+00:00,2572.13
1034,2503.44,2025-02-03 04:26:38+00:00,2565.23
1044,2497.35,2025-02-03 04:29:10+00:00,2552.77


##### Calculate % Error

In [7]:
# Calculate % Difference
df['% Difference'] = ((df['Price_real'] - df['Price_pred']) / df['Price_pred']) * 100

# Calculate the average of the absolute values
average_abs = df['% Difference'].abs().mean()
print(f"Average difference: {average_abs}%")

Average difference: 2.5506125640742896%


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['% Difference'] = ((df['Price_real'] - df['Price_pred']) / df['Price_pred']) * 100


##### Calculate % Correct BUY/SELL predictions

In [8]:
prices_prices = list(prices['Price'])
prices_timestamp = list(prices['Timestamp'])
prediction_timestamps = list(df['Timestamp'])
predictions_prices = list(df['Price_pred'])

In [9]:
correct_buy_predictions = 0
incorrect_buy_predictions = 0
correct_sell_predictions = 0
incorrect_sell_predictions = 0

for i in range(len(prediction_timestamps)):
    timestamp = prediction_timestamps[i]

    # Find index of real price
    index = prices_timestamp.index(timestamp)
    # Real price of ETH
    real_price = prices_prices[index]
    #Predicted price of ETH
    predicted_price = predictions_prices[i]
    if index - 5 < 0:
        continue
    #Price 5 timestamps ago
    previous_price = prices_prices[index - 5]

    #Check if both prediction and real price were up or down
    if (real_price - previous_price > 0 and predicted_price - previous_price > 0):
        correct_buy_predictions += 1
    elif( real_price - previous_price < 0 and predicted_price - previous_price > 0):
        incorrect_buy_predictions += 1
    elif (real_price - previous_price < 0 and predicted_price - previous_price < 0):
        correct_sell_predictions += 1
    elif (real_price - previous_price > 0 and predicted_price - previous_price < 0):
        incorrect_sell_predictions += 1

print(f"SELL Predictions: {correct_sell_predictions + incorrect_sell_predictions} Correct: {correct_sell_predictions} Incorrect: {incorrect_sell_predictions}")
print(f"BUY Predictions: {correct_buy_predictions + incorrect_buy_predictions} Correct: {correct_buy_predictions} Incorrect: {incorrect_buy_predictions}")

SELL Predictions: 1 Correct: 0 Incorrect: 1
BUY Predictions: 103 Correct: 37 Incorrect: 66
