In [235]:
import pandas as pd

In [236]:
prices = pd.read_csv('prices.log', header=None, names=["Price", "Timestamp"])
predictions = pd.read_csv('predictions.log', header=None, names=["Price", "Timestamp"])

In [237]:
prices

Unnamed: 0,Price,Timestamp
0,3255.00,2025-02-01T12:16:01.0000000
1,3252.21,2025-02-01T12:15:45.0000000
2,3252.61,2025-02-01T12:15:28.0000000
3,3251.29,2025-02-01T12:15:13.0000000
4,3253.54,2025-02-01T12:14:58.0000000
...,...,...
2745,3099.71,2025-02-01T23:53:45.0000000
2746,3100.30,2025-02-01T23:53:30.0000000
2747,3099.60,2025-02-01T23:53:13.0000000
2748,3101.97,2025-02-01T23:52:58.0000000


In [238]:
predictions

Unnamed: 0,Price,Timestamp
0,3253.11,2025-02-01T12:17:19.3159310-05:00
1,3260.11,2025-02-01T12:19:51.3749920-05:00
2,3264.04,2025-02-01T12:22:22.9350380-05:00
3,3264.75,2025-02-01T12:24:54.7937520-05:00
4,3262.08,2025-02-01T12:27:26.9266930-05:00
...,...,...
270,3099.93,2025-02-01T23:46:12.7103430-05:00
271,3102.03,2025-02-01T23:48:44.4935200-05:00
272,3106.09,2025-02-01T23:51:16.1968970-05:00
273,3108.51,2025-02-01T23:53:48.2663370-05:00


In [239]:
try:
    prices['Timestamp'] = pd.to_datetime(prices['Timestamp']).dt.tz_localize('US/Eastern')
    predictions['Timestamp'] = pd.to_datetime(predictions['Timestamp']).dt.tz_localize('US/Eastern')
except: 
    print("conversion error. continuing...")

prices['Timestamp'] = pd.to_datetime(prices['Timestamp']).dt.tz_convert('UTC')
predictions['Timestamp'] = pd.to_datetime(predictions['Timestamp']).dt.tz_convert('UTC')

conversion error. continuing...


In [240]:
# Sort both DataFrames by Timestamp
prices = prices.sort_values('Timestamp')
predictions = predictions.sort_values('Timestamp')

# Perform the merge with a 2-second tolerance
merged_df = pd.merge_asof(
    prices,
    predictions,
    on='Timestamp',
    tolerance=pd.Timedelta(seconds=2),
    direction='nearest',  # Options: 'backward', 'forward', 'nearest'
    suffixes=('_real', '_pred')
)

# Display the merged DataFrame
df = merged_df.dropna()
df

Unnamed: 0,Price_real,Timestamp,Price_pred
24,3264.16,2025-02-01 17:19:50+00:00,3260.11
44,3263.37,2025-02-01 17:24:53+00:00,3264.75
74,3256.57,2025-02-01 17:32:31+00:00,3260.34
84,3256.20,2025-02-01 17:35:02+00:00,3257.33
104,3258.30,2025-02-01 17:40:07+00:00,3258.47
...,...,...,...
2694,3093.99,2025-02-02 04:41:08+00:00,3094.60
2704,3099.25,2025-02-02 04:43:41+00:00,3097.50
2714,3098.14,2025-02-02 04:46:12+00:00,3099.93
2724,3103.74,2025-02-02 04:48:44+00:00,3102.03


##### Calculate % Error

In [241]:
# Calculate % Difference
df['% Difference'] = ((df['Price_real'] - df['Price_pred']) / df['Price_pred']) * 100

# Calculate the average of the absolute values
average_abs = df['% Difference'].abs().mean()
print(f"Average difference: {average_abs}%")

Average difference: 0.13976567995043715%


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['% Difference'] = ((df['Price_real'] - df['Price_pred']) / df['Price_pred']) * 100


##### Calculate % Correct BUY/SELL predictions

In [242]:
prices_prices = list(prices['Price'])
prices_timestamp = list(prices['Timestamp'])
prediction_timestamps = list(df['Timestamp'])
predictions_prices = list(df['Price_pred'])

In [244]:
correct_buy_predictions = 0
incorrect_buy_predictions = 0
correct_sell_predictions = 0
incorrect_sell_predictions = 0

for i in range(len(prediction_timestamps)):
    timestamp = prediction_timestamps[i]

    # Find index of real price
    index = prices_timestamp.index(timestamp)
    # Real price of ETH
    real_price = prices_prices[index]
    #Predicted price of ETH
    predicted_price = predictions_prices[i]
    if index - 5 < 0:
        continue
    #Price 5 timestamps ago
    previous_price = prices_prices[index - 5]

    #Check if both prediction and real price were up or down
    if (real_price - previous_price > 0 and predicted_price - previous_price > 0):
        correct_buy_predictions += 1
    elif( real_price - previous_price < 0 and predicted_price - previous_price > 0):
        incorrect_buy_predictions += 1
    elif (real_price - previous_price < 0 and predicted_price - previous_price < 0):
        correct_sell_predictions += 1
    elif (real_price - previous_price > 0 and predicted_price - previous_price < 0):
        incorrect_sell_predictions += 1

print(f"SELL Predictions: {correct_sell_predictions + incorrect_sell_predictions} Correct: {correct_sell_predictions} Incorrect: {incorrect_sell_predictions}")
print(f"BUY Predictions: {correct_buy_predictions + incorrect_buy_predictions} Correct: {correct_buy_predictions} Incorrect: {incorrect_buy_predictions}")

SELL Predictions: 20 Correct: 10 Incorrect: 10
BUY Predictions: 160 Correct: 76 Incorrect: 84
