In [None]:
from prophet import Prophet
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm

from prophetmodel import ProphetModel
from lstmmodel import LSTMModel
from seriesdata import SeriesDataset
from residualnn import ResidualNetwork

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# Training Loop

def trainm(model, loader, optimizer, scheduler, loss_fn, epochs):

    model.train()
    samples = len(loader.dataset)
    epoch = 0

    for e in range(epochs):

        total_loss = 0.0
        progress = tqdm(loader, desc=f'Epoch {epoch+1}/{epochs}', leave=True)
        
        for i, (x, y) in enumerate(progress):
            x, y = x.to(device), y.to(device)
            
            optimizer.zero_grad()
            out = model(x).squeeze(1)
            loss = loss_fn(out, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            avg_loss = total_loss / samples
            

            if torch.isnan(loss):
                print(f"NaN loss encountered in epoch {epoch+1}, batch. Stopping.")
                return model 

            progress.set_postfix({
                'Loss': f'{avg_loss:.4f}'
            })

        epoch += 1
        scheduler.step()



def eval(model, loader, loss_fn):
    model.eval()

    total_loss = 0.0
    samples = len(loader)
    
    with torch.no_grad():
        for x, y in val_loader: 
            
            out = model(x).squeeze(1)
            loss = loss_fn(out, y)
            total_loss += loss.item()

    avg_loss = total_loss / samples
    return avg_loss

In [None]:
btc_sent = pd.read_csv('btc_sent.csv')
btc_sent.drop(columns=['Accurate Sentiments'], inplace=True)
btc_sent['Date'] = pd.to_datetime(btc_sent['Date']).dt.date
btc_sent = btc_sent.drop_duplicates(subset=['Date'])
start_date = btc_sent['Date'].min()
end_date = btc_sent['Date'].max()
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
btc_sent.set_index('Date', inplace=True)
btc_sent = btc_sent.reindex(date_range, method=None)
btc_sent['Short Description'] = btc_sent['Short Description'].ffill()
btc_sent.reset_index(inplace=True)
btc_sent.rename(columns={'index': 'Date'}, inplace=True)

print(btc_sent.head(5))
print(len(btc_sent))

In [None]:
# Sentiment analysis 

import lmstudio as lms

model = lms.llm("gemma-3-12b-it-qat")
sys_prompt = 'Analyze this statement and determine if it is positive, negative, or neutral. Output only 1, 0, or -1'

def get_sent(stmt):
    result = model.respond(f'{sys_prompt}: {stmt}')
    return int(str(result))

In [None]:
sentiments = []

for index, row in btc_sent.iterrows():
    date = row['Date']
    statement = row['Short Description'] 
    sentiment = get_sent(statement)
    sentiments.append(sentiment)

    # Just to keep track of progress
    if index % 100 == 0:
        print(f"Iteration {index}: {sentiment}")

btc_sent['btc_sentiment'] = sentiments

print(btc_sent.head())

In [None]:
start_date = btc_sent['Date'].min()
end_date = btc_sent['Date'].max()

print(start_date, end_date)

In [None]:
# Convert volatility to sentiment when necessary

def vtos(volatility, rv, high=0.05, low=0.01):
    sign = np.sign(rv)
    if volatility >= high:
        return 1 * sign
    elif volatility < high and volatility > low:
        return 0
    elif volatility <= low:
        return sign

In [None]:
data = pd.read_csv('Preprocessed/data.csv')[7:]
data.head(1)
data['Date'] = pd.to_datetime(data['Date'])
btc_sent['Date'] = pd.to_datetime(btc_sent['Date'])
data = pd.merge(data, btc_sent, on='Date', how='outer')

def compute_if_nan(row):
    if pd.isna(row['btc_sentiment']):
        return vtos(row['btc_log_return_volatility'], row['btc_log_return'])
    else:
        return row['btc_sentiment']

data['btc_sentiment'] = data.apply(compute_if_nan, axis=1)


data['Date'] = pd.to_datetime(data['Date'])
data = data.drop(columns=['Short Description'])

train = data[(data['Date'] >= '2019-03-11') & (data['Date'] <= '2023-12-31')]
test = sliced_data = data[(data['Date'] > '2023-12-31')]
print(test.columns)

In [None]:
btc_regressors = [
    'btc_rolling_mean_return_7', 
    'btc_prev_log_return_1',  
    'btc_prev_log_return_2', 
    'btc_prev_log_return_3', 
    'btc_log_return_volatility',
    'btc_log_return',
    'btc_sentiment',
]

"""
changepoint_range
changepoint_prior_scale
seasonality_prior_scale
"""
changepoints = [0.9, 0.1, 0.1]

pmodel = ProphetModel(train, 'btc_close', 'Date', btc_regressors, changepoints) 
pmodel.add_seasonality(name='quadrennial', period=1461, fourier_order=5)
pmodel.fit()

ptest = train.rename(columns={'Date': "ds", 'btc_close': "y"})

tt2 = test.rename(columns={'Date': "ds", 'btc_close': "y"})
forecast = pmodel.predict_past(1748, ptest, 'D')
print(len(forecast))
pmodel.plot(forecast)
forecast2 = pmodel.predict(365, tt2, 'D')
pmodel.plot(forecast2)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

merged = pd.merge(ptest, forecast[['ds', 'yhat']], on='ds', how='left')
merged = merged.dropna()

plt.figure(figsize=(12, 6))
plt.plot(merged['ds'], merged['y'], label='Log Actual Return', color='blue')
plt.plot(merged['ds'], merged['yhat'], label='Predicted Return', color='red', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Log Return')
plt.title('Log-Transformed Actual vs Predicted BTC Returns')

plt.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.show()


In [None]:
mae = mean_absolute_error(merged['y'], merged['yhat'])
mse = mean_squared_error(merged['y'], merged['yhat'])
r2 = r2_score(merged['y'], merged['yhat'])

print(f"MAE (Mean Absolute Error): {mae}")
print(f"MSE (Mean Squared Error): {mse}")
print(f"R² (R-squared): {r2}")

In [None]:
merged['residual'] = merged['yhat'] - merged['y']
merged.rename(columns={'y': 'btc_close'}, inplace=True)

In [None]:
result = merged[[ 
                 'yhat',
                 'btc_log_return', 
                 'residual', 
                 'btc_prev_log_return_1', 
                 'btc_prev_log_return_2', 
                 'btc_prev_log_return_3',
                 'btc_rolling_mean_return_7',
                 'btc_log_return_volatility',
                 'btc_sentiment'
                ]]

In [None]:
import torch.optim as optim

class TimeSeriesDataset(Dataset):
    def __init__(self, df, target_column):
        self.features = torch.tensor(df.drop(columns=[target_column]).values, dtype=torch.float32)
        self.target = torch.tensor(df[target_column].values, dtype=torch.float32)
    
    def __len__(self):
        return len(self.target)
    
    def __getitem__(self, idx):
        return self.features[idx], self.target[idx]
        
dataset = TimeSeriesDataset(result, target_column='residual')
dataloader = DataLoader(dataset, batch_size=64)
network = ResidualNetwork(8, [128, 64, 16]).to(device)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(network.parameters(), lr=5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
trainm(network, dataloader, optimizer, scheduler, loss_fn, 150)

In [None]:
def pre(prophet, lstm, dat, span=1):
    lstm.eval()
    preds = []
    vpreds = []
    residuals = []
    current = dat.iloc[7:8]  # 7th item (index 7)
    new_date = current['ds'].values[0]
    prev = dat.iloc[0:7]     # items 0 through 6
    previmm = dat.iloc[6:7]  # last item of prev
    
    for _ in range(span):

        # Prophet portion
        forecast = prophet.predict(1, current, 'D', start=new_date)
        yhat = forecast['yhat'].values
        vpreds.append(yhat)
        residual = yhat - prev.iloc[-1:]['y'].values
        log_return = previmm['btc_log_return'].values
        btc_close = previmm['y'].values

        # MLP Portion
        btc_prev_log_return_1 = current['btc_prev_log_return_1'].values
        btc_prev_log_return_2 = current['btc_prev_log_return_2'].values
        btc_prev_log_return_3 = current['btc_prev_log_return_3'].values
        btc_rolling_mean_return_7 = current['btc_rolling_mean_return_7'].values
        btc_log_return_volatility = current['btc_log_return_volatility'].values
        btc_sentiment = current['btc_sentiment']
        
        vector = np.stack([
            yhat,
            log_return,
            btc_prev_log_return_1,
            btc_prev_log_return_2,
            btc_prev_log_return_3,
            btc_rolling_mean_return_7,
            btc_log_return_volatility,
            btc_sentiment
        ], axis=1)

        tensor = torch.tensor(vector, dtype=torch.float32).to(device)
        tensor = tensor.unsqueeze(0)
        
        pred_residual = lstm(tensor).item()
        residuals.append(pred_residual)
        

        volatility_factor = 1 + (btc_log_return_volatility * 5.0)
        pred_price = yhat + pred_residual
        preds.append(pred_price)
        
        last_date = pd.to_datetime(current['ds'].iloc[-1])
        new_date = last_date + pd.Timedelta(days=1)

        # Because the model is fully autoregressive, we recompute all of these values each iteration
        new_row = {
            'ds': new_date,
            'btc_prev_log_return_1': log_return,
            'btc_prev_log_return_2': current['btc_prev_log_return_1'].iloc[-1],
            'btc_prev_log_return_3': current['btc_prev_log_return_2'].iloc[-1],
            'btc_rolling_mean_return_7': current['btc_rolling_mean_return_7'].iloc[-1],  
            'btc_log_return_volatility': current['btc_log_return_volatility'].iloc[-1],  
            'btc_log_return': np.log(pred_price / btc_close),
            'btc_sentiment': vtos(current['btc_log_return_volatility'].iloc[-1], log_return),
        }

        current['y'] = pred_price
        prev = pd.concat([prev, current])
        previmm = prev.iloc[-1:]
        current = pd.DataFrame(new_row, index=[0])
        current['y'] = 0
        last_returns = prev['btc_log_return'].iloc[-7:].tolist()
        current['btc_rolling_mean_return_7'] = sum(last_returns) / 7
        current['btc_log_return_volatility'] = np.std(last_returns) 
        
    return preds, vpreds, residuals


In [None]:
from sklearn.preprocessing import MinMaxScaler

regressors = ['Date',
              'btc_rolling_mean_return_7', 
              'btc_prev_log_return_1', 
              'btc_prev_log_return_2', 
              'btc_prev_log_return_3',
              'btc_log_return_volatility',
              'btc_log_return',
              'btc_close',
              'btc_sentiment'
             ]

initial = test[regressors]
initial = initial.rename(columns={'Date': 'ds', 'btc_close': 'y'})

In [None]:
import matplotlib.pyplot as plt

preds, vpreds, residuals = pre(pmodel, network, initial, span=365)
for i in range(10):  
    print(f"Pred: {preds[i]} | VPred: {vpreds[i]} | Residual: {residuals[i]} | Test: {test['btc_close'].iloc[i]}")

test_residuals = test['btc_close'].values

plt.figure(figsize=(10, 6))
plt.plot(range(len(preds)), np.array(preds), label='Predictions', color='blue')
plt.plot(range(len(test_residuals)), test_residuals, label='Actual Price', color='red', linestyle='--')
plt.xlabel('Time (Days)')
plt.ylabel('Price')
plt.title('Prophet Prediction vs Actual Bitcoin Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
preds_flat = [item[0] for item in preds]  # Flatten the list of lists
vpreds_flat = [item[0] for item in vpreds]

df = pd.DataFrame({
    'btc_predictions': preds_flat,
    'btc_vpredictions': vpreds_flat,
    'btc_residuals': residuals,
    'btc_actual': test_residuals[:365]  # Make sure we match the lengths
})

df.to_csv('btc_preds.csv', index=False)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Something funky happened with the lists, so reformat them
preds = pd.Series(preds).dropna()
test_residuals = pd.Series(test_residuals).dropna()
mae = mean_absolute_error(preds, test_residuals)
mse = mean_squared_error(preds, test_residuals)
r2 = r2_score(preds, test_residuals)
mse = mean_squared_error(preds, test_residuals)
rmse = np.sqrt(mse)


print(f"Mean Absolute Error: {mae}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R² (R-squared): {r2}")

In [None]:
import pandas as pd

preds = preds.rename('btc_predictions')
preds = preds.apply(lambda x: x[0] if isinstance(x, (list, np.ndarray)) else x)
preds.to_csv('btc_predictions.csv', index=False)

print("Predictions have been saved as 'predictions.csv'.")

In [None]:
# Save models

import joblib

joblib.dump(pmodel, 'Models/btc_prophet.pkl')
torch.save(network, 'Models/btc_mlp.pth')