In [None]:
import pandas as pd
import numpy as np

In [None]:
# Load data

btc = pd.read_csv('btc_preds.csv')
macro = pd.read_csv('macro_preds.csv')
btc = btc.reset_index(drop=True)
macro = macro.reset_index(drop=True)
df = pd.concat([btc, macro], axis=1)

print(df.columns)

In [None]:
data = pd.read_csv('Preprocessed/data.csv')
test = sliced_data = data[(data['Date'] > '2023-12-31')]

In [None]:
df = df.dropna()
test = test.dropna()

In [None]:
# Calculate pred_sum
df['pred_sum'] = (0.05 * df['macro_predictions'] + 0.95 * df['btc_predictions']) 
df['pred_sum'] = df['pred_sum'] * 1.0  # Ensure the result is multiplied by 1.0

# Select 10 random samples
random_samples = df.sample(n=3)

# Print the random samples along with pred_sum
print(random_samples[['btc_predictions',
                      'macro_predictions',  'pred_sum', 'btc_actual']])


In [None]:
# Randomly sample 3 days from the first 200 days and 3 days from the last 165 days 
# This is because the first 200 fit alright but the last days fit poorly

from tabulate import tabulate

random_sample_1 = df.iloc[1:199].sample(n=3)  

random_sample_2 = df.iloc[200:365].sample(n=3)  

random_sample_1.index = [f"Day {i+1}" for i in random_sample_1.index]
random_sample_2.index = [f"Day {i+1}" for i in random_sample_2.index]
random_samples_combined = pd.concat([random_sample_1, random_sample_2])

table = random_samples_combined[['btc_predictions', 'macro_predictions', 'pred_sum', 'btc_actual']]

print(tabulate(table, headers='keys', tablefmt='pretty', showindex=True))

In [None]:
# Calculate percent error for btc_predictions

random_samples_combined['percent_error'] = (random_samples_combined['pred_sum'] - random_samples_combined['btc_actual']) / random_samples_combined['btc_actual'] * 100
styled_df = random_samples_combined[['btc_predictions', 'macro_predictions', 'pred_sum', 'btc_actual', 'percent_error']].style \
    .set_table_styles([{
        'selector': 'thead th',
        'props': [('background-color', '#4CAF50'), ('color', 'white')]
    }, {
        'selector': 'tbody tr:nth-child(even)',
        'props': [('background-color', '#f2f2f2')]
    }, {
        'selector': 'tbody tr:nth-child(odd)',
        'props': [('background-color', '#ffffff')]
    }]) 

# Display df
styled_df

In [None]:
import matplotlib.pyplot as plt

df['pred_sum'] = (0.05 * df['macro_predictions'] + 0.95 * df['btc_predictions']) 
df['pred_sum'] = df['pred_sum'] * 1.0

print(df['pred_sum'][:5])

plt.figure(figsize=(10, 6))
plt.plot(df['pred_sum'], label='BTC + Macro Predictions', color='blue')
plt.plot(test['btc_close'].reset_index(drop=True), label='Actual Price', color='red', linestyle='--')
plt.xlabel('Time (Days)')
plt.ylabel('Price')
plt.title('Weighted Sum vs Actual Bitcoin Price')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

y_true = test['btc_close'].reset_index(drop=True)  # Reset the index to align the actual values
y_true = y_true[:363]
y_pred = df['pred_sum']

mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

# Print the metrics
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R² (Coefficient of Determination): {r2:.2f}")