In [None]:
from prophet import Prophet
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm
import matplotlib.pyplot as plt

from prophetmodel import ProphetModel
from seriesdata import SeriesDataset
from residualnn import ResidualNetwork

In [None]:
# Make sure code is running on GPU

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# Training Loop

def trainm(model, loader, optimizer, scheduler, loss_fn, epochs):

    model.train()
    samples = len(loader.dataset)
    epoch = 0

    for e in range(epochs):

        total_loss = 0.0
        progress = tqdm(loader, desc=f'Epoch {epoch+1}/{epochs}', leave=True)
        
        for i, (x, y) in enumerate(progress):
            x, y = x.to(device), y.to(device)
            
            optimizer.zero_grad()
            out = model(x).squeeze(1)
            loss = loss_fn(out, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            avg_loss = total_loss / samples
            

            if torch.isnan(loss):
                print(f"NaN loss encountered in epoch {epoch+1}, batch. Stopping.")
                return model 

            progress.set_postfix({
                'Loss': f'{avg_loss:.4f}'
            })

        epoch += 1
        scheduler.step()


def eval(model, loader, loss_fn):
    model.eval()

    total_loss = 0.0
    samples = len(loader)
    
    with torch.no_grad():
        for x, y in val_loader: 
            
            out = model(x).squeeze(1)
            loss = loss_fn(out, y)
            total_loss += loss.item()

    avg_loss = total_loss / samples
    return avg_loss

In [None]:
# Sentiment extraction

import pandas as pd


macro_sent = pd.read_csv('vix_data.csv')
macro_sent['date'] = pd.to_datetime(macro_sent['date']).dt.date
macro_sent = macro_sent[(macro_sent['date'] >= pd.to_datetime('2019-03-18').date()) & 
                        (macro_sent['date'] <= pd.to_datetime('2024-12-31').date())]
macro_sent.rename(columns={'date': 'Date', ' value': 'vix'}, inplace=True)

start_date = macro_sent['Date'].min()
end_date = macro_sent['Date'].max()
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
macro_sent.set_index('Date', inplace=True)
macro_sent = macro_sent.reindex(date_range, method=None)
macro_sent['vix'] = macro_sent['vix'].ffill()
macro_sent.reset_index(inplace=True)
macro_sent.rename(columns={'index': 'Date'}, inplace=True)
macro_sent['Date'] = pd.to_datetime(macro_sent['Date']).dt.date

print(macro_sent.head(5))
print(len(macro_sent))

In [None]:
# Convert VIX to sentiment score

def vix_tos(vix, rv, high=30, low=15):
    sign = np.sign(rv)
    if vix >= high:
        return -1  
    elif vix < high and vix > low:
        return 0  
    elif vix <= low:
        return sign  

In [None]:
# Forward fill empty slots in data

data = pd.read_csv('Preprocessed/data.csv')[7:]
data['Date'] = pd.to_datetime(data['Date'])
full_date_range = pd.date_range(start=data['Date'].min(), end=data['Date'].max(), freq='D')
data = data.set_index('Date').reindex(full_date_range)
data = data.ffill()
data.reset_index(inplace=True)
data.rename(columns={'index': 'Date'}, inplace=True)

In [None]:
# Merge data with sentiment

macro_sent['sp500_log_return'] = np.array(data['sp500_log_return'])
sentiments = []
newdf = macro_sent.copy()

for _, row in macro_sent.iterrows():
    sentiment = vix_tos(row['vix'], row['sp500_log_return'])
    sentiments.append(sentiment)

data['macro_sentiment'] = sentiments
print(newdf.head())

train = data[(data['Date'] >= '2019-03-11') & (data['Date'] <= '2023-12-31')]
test = sliced_data = data[(data['Date'] > '2023-12-31')]
print(test.columns)

In [None]:
sp500_regressors = [
    'sp500_rolling_mean_return_7', 
    'sp500_prev_log_return_1',  
    'sp500_prev_log_return_2', 
    'sp500_prev_log_return_3', 
    'sp500_log_return_volatility',
    'sp500_log_return',
    'sp500_close',
    'Inflation',            
    'bonds_close',          
    'macro_sentiment'
]

"""
Order of array:
changepoint_range
changepoint_prior_scale
seasonality_prior_scale
"""
changepoints = [0.9, 0.1, 0.1]
pmodel = ProphetModel(train, 'btc_close', 'Date', sp500_regressors, changepoints)
pmodel.add_seasonality(name='quadrennial', period=1461, fourier_order=5)
pmodel.fit()

ptest = train.rename(columns={'Date': "ds", 'btc_close': "y"})
forecast = pmodel.predict_past(1748, ptest, 'D')

In [None]:
# Graph predictions on train data

merged = pd.merge(ptest, forecast[['ds', 'yhat']], on='ds', how='left')
merged = merged.dropna()

plt.figure(figsize=(12, 6))
plt.plot(merged['ds'], merged['y'], label='Log Actual Return', color='blue')
plt.plot(merged['ds'], merged['yhat'], label='Predicted Return', color='red', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Log Return')
plt.title('Log-Transformed Actual vs Predicted BTC Returns')

plt.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.show()


In [None]:
# Compute MAE, MSE, and R2

mae = mean_absolute_error(merged['y'], merged['yhat'])
mse = mean_squared_error(merged['y'], merged['yhat'])
r2 = r2_score(merged['y'], merged['yhat'])

print(f"MAE (Mean Absolute Error): {mae}")
print(f"MSE (Mean Squared Error): {mse}")
print(f"R² (R-squared): {r2}")

In [None]:
# Compute residuals

merged['residual'] = merged['yhat'] - merged['y']
merged.rename(columns={'y': 'btc_close'}, inplace=True)

In [None]:
# Create dataframe for MLP, set regressors

result = merged[[ 
                 'yhat',
                 'sp500_log_return', 
                 'residual', 
                 'sp500_prev_log_return_1', 
                 'sp500_prev_log_return_2', 
                 'sp500_prev_log_return_3',
                 'sp500_rolling_mean_return_7',
                 'sp500_log_return_volatility',
                 'Inflation',
                 'bonds_close',
                 'macro_sentiment'
                ]]

regressors = ['yhat',
                 'sp500_log_return',  
                 'sp500_prev_log_return_1', 
                 'sp500_prev_log_return_2', 
                 'sp500_prev_log_return_3',
                 'sp500_rolling_mean_return_7',
                 'sp500_log_return_volatility',
                 'Inflation',
                 'bonds_close',
                 'macro_sentiment']

In [None]:
class TimeSeriesDataset(Dataset):
    def __init__(self, df, in_cols, target_column):
        self.features = torch.tensor(df[in_cols].values, dtype=torch.float32)
        self.target = torch.tensor(df[target_column].values, dtype=torch.float32)
    
    def __len__(self):
        return len(self.target)
    
    def __getitem__(self, idx):
        return self.features[idx], self.target[idx]
        
dataset = TimeSeriesDataset(result, regressors, target_column='residual')
dataloader = DataLoader(dataset, batch_size=64)
network = ResidualNetwork(10, [128, 64, 16]).to(device)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(network.parameters(), lr=5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
trainm(network, dataloader, optimizer, scheduler, loss_fn, 150)

In [None]:
# Autoregressive predictor

def pre(prophet, mlp, dat, span=2):
    mlp.eval()
    preds = []
    vpreds = []
    residuals = []
    
    for i in range(1, span):

        # Prophet portion
        current = dat.iloc[i]
        new_date = current['ds']
        prev = dat.iloc[i-1]
        forecast = prophet.predict(1, pd.DataFrame([current]), 'D', start=new_date)
        yhat = forecast['yhat']
        residual = yhat - prev['y']
        residuals.append(residual)
        vpreds.append(yhat)


        # MLP Portion
        vector = np.stack([
            yhat[0],
            current['sp500_log_return'],
            current['sp500_prev_log_return_1'],
            current['sp500_prev_log_return_2'],
            current['sp500_prev_log_return_3'],
            current['sp500_rolling_mean_return_7'],
            current['sp500_log_return_volatility'],
            current['Inflation'],
            current['bonds_close'],
            current['macro_sentiment']
        ])

        vector = torch.tensor(vector, dtype=torch.float32).to(device)
        
        out = mlp(vector)
        out_cpu = out.cpu().detach().numpy()

        price = yhat - out_cpu
        preds.append(price)
        
    return preds, vpreds, residuals

In [None]:
test_set = test[['Date', 'btc_close'] + sp500_regressors]
prophet_subset = test_set.rename(columns={'Date': 'ds', 'btc_close': 'y'})

In [None]:
# Graph predictions
# preds = Overall prediction (Prophet -> MLP -> out)
# vpreds = Prophet raw predictions
# residuals = Predicted residual value

preds, vpreds, residuals = pre(pmodel, network, prophet_subset, span=365)
for i in range(10): 
    print(f"Pred: {preds[i][0]} | VPred: {vpreds[i][0]} | Residual: {residuals[i][0]} | Test: {test['btc_close'].iloc[i]}")

test_residuals = test['btc_close'].iloc[:364].values

plt.figure(figsize=(10, 6))
plt.plot(range(len(preds)), np.array(preds), label='Predictions', color='blue')
plt.plot(range(len(test_residuals)), test_residuals, label='Actual Price', color='red', linestyle='--')
plt.xlabel('Time')
plt.ylabel('Price')
plt.title('Predicted vs Actual BTC Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
preds_flat = [item[0] for item in preds]  # Flatten the list of lists
vpreds_flat = [item[0] for item in vpreds]
residuals_flat = [item[0] for item in residuals]

df = pd.DataFrame({
    'macro_predictions': preds_flat,
    'macro_vpredictions': vpreds_flat,
    'macro_residuals': residuals_flat,
    'macro_actual': test_residuals[:365]  # Make sure we match the lengths
})

df.head()
df.to_csv('macro_preds.csv', index=False)

In [None]:
mae = mean_absolute_error(preds, test_residuals)
mse = mean_squared_error(preds, test_residuals)
r2 = r2_score(preds, test_residuals)

print(f"MAE (Mean Absolute Error): {mae}")
print(f"MSE (Mean Squared Error): {mse}")
print(f"RMSE (Root Mean Squared Error): {np.sqrt(mse)}")
print(f"R² (R-squared): {r2}")

In [None]:
import pandas as pd

df = pd.DataFrame({'macro_predictions': [pred.iloc[0] for pred in preds]})
df.to_csv('macro_predictions.csv', index=False)

print("Predictions have been saved as 'macro_predictions.csv'.")

In [None]:
# Save models

import joblib

joblib.dump(pmodel, 'Models/macro_prophet.pkl')
torch.save(network, 'Models/macro_mlp.pth')