In [None]:
%pip install numpy pandas matplotlib statsmodels scikit-learn joblib tensorflow


In [None]:
import os
import glob
import json
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime, timedelta

from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler

import joblib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ─── suppress warnings ─────────────────────────────────────────────────────────
warnings.filterwarnings("ignore")

# ─── directories ───────────────────────────────────────────────────────────────
PROJECT_DIR    = os.path.expanduser("~/User/crypto proj")
PREPROC_DIR    = os.path.join(PROJECT_DIR, "data", "preprocessed")
MODEL_DIR      = os.path.join(PROJECT_DIR, "models", "time_series")
VIS_DIR        = os.path.join(PROJECT_DIR, "visualizations", "time_series")
SUMMARY_PATH   = os.path.join(MODEL_DIR, "time_series_results.json")

for d in (MODEL_DIR, VIS_DIR):
    os.makedirs(d, exist_ok=True)

# ─── which coins ────────────────────────────────────────────────────────────────
CRYPTOS = {
    'BTC-USD': 'Bitcoin',
    'ETH-USD': 'Ethereum',
    'DOGE-USD': 'Dogecoin'
}

# ─── helpers ────────────────────────────────────────────────────────────────────
def load_latest_preprocessed(symbol):
    """Load the most recent date-stamped preprocessed CSV for a symbol."""
    base = symbol.split('-')[0]
    pattern = os.path.join(PREPROC_DIR, f"{base}_preprocessed_*.csv")
    files = glob.glob(pattern)
    if not files:
        raise FileNotFoundError(f"No preprocessed files found for {base}")
    latest = max(files, key=os.path.getmtime)
    print(f"Loaded {os.path.basename(latest)}")
    return pd.read_csv(latest, parse_dates=['Date'], index_col='Date')

def check_stationarity(ts, window=12):
    """ADF test and rolling mean/std plot; return True if p-value<0.05."""
    adf = adfuller(ts.dropna(), autolag='AIC')
    out = pd.Series(adf[:4],
                    index=['Test Statistic','p-value','#Lags','#Observations'])
    for k,v in adf[4].items():
        out[f'Crit Value ({k})'] = v
    print(out.to_string(), "\n")
    plt.figure(figsize=(10,4))
    plt.plot(ts, label='Original')
    plt.plot(ts.rolling(window).mean(), label='Rolling Mean')
    plt.plot(ts.rolling(window).std(), label='Rolling Std')
    plt.legend(); plt.title(f"{ts.name} Stationarity")
    plt.savefig(os.path.join(VIS_DIR, f"{ts.name}_stationarity.png"))
    plt.close()
    return adf[1] < 0.05

def make_stationary(ts):
    """Difference ts up to d=2 until stationary."""
    if check_stationarity(ts):
        return ts, 0
    for d in (1,2):
        diff = ts.diff(d).dropna()
        if check_stationarity(diff):
            print(f"Stationary after differencing d={d}\n")
            return diff, d
    print("Still non-stationary after d=2; using d=1\n")
    return ts.diff(1).dropna(), 1

def determine_arima_parameters(ts, d):
    """Plot ACF/PACF and return (p,d,q), defaults to (1,d,1)."""
    plt.figure(figsize=(10,6))
    plt.subplot(211)
    plot_acf(ts, ax=plt.gca(), lags=30)
    plt.subplot(212)
    plot_pacf(ts, ax=plt.gca(), lags=30)
    plt.tight_layout()
    plt.savefig(os.path.join(VIS_DIR, f"{ts.name}_acf_pacf.png"))
    plt.close()
    return 1, d, 1

def train_arima(ts, order, train_frac=0.8):
    """Train ARIMA and evaluate on hold-out; save fit plot."""
    split = int(len(ts)*train_frac)
    train, test = ts[:split], ts[split:]
    model = ARIMA(train, order=order).fit()
    preds = model.forecast(steps=len(test))

    rmse = np.sqrt(mean_squared_error(test, preds))
    mae  = mean_absolute_error(test, preds)
    r2   = r2_score(test, preds)

    plt.figure(figsize=(10,4))
    plt.plot(train, label='Train')
    plt.plot(test, label='Test')
    plt.plot(preds, label=f'ARIMA{order}', color='red')
    plt.legend(); plt.title(f"ARIMA{order} Fit")
    plt.savefig(os.path.join(VIS_DIR, f"{ts.name}_arima_fit.png"))
    plt.close()

    return model, preds, rmse, mae, r2

def forecast_arima(model, steps=30):
    """Forecast future and return Series with next‐day index."""
    fcast = model.forecast(steps=steps)
    last = model.data.dates[-1]
    idx = pd.date_range(start=last+timedelta(days=1), periods=steps, freq='D')
    return pd.Series(fcast, index=idx, name='ARIMA_Forecast')

def prepare_lstm(ts, look_back=60, train_frac=0.8):
    """Scale ts to [0,1], build look_back sequences, split train/test."""
    arr = ts.values.reshape(-1,1)
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(arr)
    X, y = [], []
    for i in range(look_back, len(scaled)):
        X.append(scaled[i-look_back:i,0])
        y.append(scaled[i,0])
    X, y = np.array(X), np.array(y)
    X = X.reshape((X.shape[0], X.shape[1], 1))
    split = int(len(X)*train_frac)
    return X[:split], X[split:], y[:split], y[split:], scaler

def train_lstm(X_tr, y_tr, X_te, y_te, scaler, name, epochs=50, batch_size=32):
    """Build, train LSTM, evaluate, and save fit & loss plots."""
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(X_tr.shape[1],1)),
        Dropout(0.2),
        LSTM(50),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    hist = model.fit(X_tr, y_tr,
                     validation_data=(X_te, y_te),
                     epochs=epochs, batch_size=batch_size, verbose=1)

    y_pred = model.predict(X_te)
    y_te_inv   = scaler.inverse_transform(y_te.reshape(-1,1))
    y_pred_inv = scaler.inverse_transform(y_pred)

    rmse = np.sqrt(mean_squared_error(y_te_inv, y_pred_inv))
    mae  = mean_absolute_error(y_te_inv, y_pred_inv)
    r2   = r2_score(y_te_inv, y_pred_inv)

    # fit plot
    plt.figure(figsize=(8,4))
    plt.plot(y_te_inv, label='Actual')
    plt.plot(y_pred_inv, label='Pred', color='green')
    plt.title(f'LSTM Fit ({name})')
    plt.legend()
    plt.savefig(os.path.join(VIS_DIR, f"{name}_lstm_fit.png"))
    plt.close()

    # loss plot
    plt.figure(figsize=(6,3))
    plt.plot(hist.history['loss'], label='train')
    plt.plot(hist.history['val_loss'], label='val')
    plt.title(f'LSTM Loss ({name})')
    plt.legend()
    plt.savefig(os.path.join(VIS_DIR, f"{name}_lstm_loss.png"))
    plt.close()

    return model, rmse, mae, r2, scaler

def forecast_lstm(model, last_seq, scaler, steps=30):
    """Auto-regress for steps ahead, return inverse-transformed Series."""
    seq = last_seq.copy()
    preds = []
    for _ in range(steps):
        p = model.predict(seq.reshape(1, -1, 1))[0,0]
        preds.append(p)
        seq = np.roll(seq, -1)
        seq[-1] = p
    inv = scaler.inverse_transform(np.array(preds).reshape(-1,1))
    idx = pd.date_range(start=datetime.now()+timedelta(days=1),
                        periods=steps, freq='D')
    return pd.Series(inv.flatten(), index=idx, name='LSTM_Forecast')

# ─── main ───────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    summary = {}

    print("Starting time series modeling...\n")
    for symbol, nice in CRYPTOS.items():
        print(f"=== {nice} ===")

        df = load_latest_preprocessed(symbol)
        ts = df['Close']
        ts.name = nice

        # raw plot
        plt.figure(figsize=(8,3))
        plt.plot(ts); plt.title(f"{nice} Close")
        plt.savefig(os.path.join(VIS_DIR, f"{nice}_raw.png"))
        plt.close()

        # stationarity
        ts_stat, d = make_stationary(ts)

        # ARIMA
        p, _, q = determine_arima_parameters(ts_stat, d)
        print(f"→ ARIMA order: ({p},{d},{q})")
        ar_model, ar_preds, ar_rmse, ar_mae, ar_r2 = train_arima(ts, (p,d,q))
        ar_fcast = forecast_arima(ar_model, steps=30)

        # ARIMA forecast plot
        plt.figure(figsize=(8,3))
        plt.plot(ts[-90:], label='Last 90d')
        plt.plot(ar_fcast, label='ARIMA')
        plt.legend(); plt.title(f"{nice} ARIMA 30d")
        plt.savefig(os.path.join(VIS_DIR, f"{nice}_arima_fcast.png"))
        plt.close()

        # LSTM
        X_tr, X_te, y_tr, y_te, scaler = prepare_lstm(ts)
        lstm_model, lstm_rmse, lstm_mae, lstm_r2, scaler = train_lstm(
            X_tr, y_tr, X_te, y_te, scaler, nice
        )
        last_seq = X_te[-1].flatten()
        lstm_fcast = forecast_lstm(lstm_model, last_seq, scaler, steps=30)

        # forecast comparison
        plt.figure(figsize=(8,3))
        plt.plot(ts[-90:], label='Last 90d')
        plt.plot(ar_fcast, label='ARIMA')
        plt.plot(lstm_fcast, label='LSTM')
        plt.legend(); plt.title(f"{nice} 30d Forecasts")
        plt.savefig(os.path.join(VIS_DIR, f"{nice}_forecast_comp.png"))
        plt.close()

        # save models and scaler
        joblib.dump(ar_model, os.path.join(MODEL_DIR, f"{nice}_arima.pkl"))
        lstm_model.save(os.path.join(MODEL_DIR, f"{nice}_lstm.h5"))
        joblib.dump(scaler, os.path.join(MODEL_DIR, f"{nice}_scaler.pkl"))

        summary[nice] = {
            'ARIMA': {'order': (p,d,q), 'rmse': ar_rmse, 'mae': ar_mae, 'r2': ar_r2},
            'LSTM':  {'rmse': lstm_rmse, 'mae': lstm_mae, 'r2': lstm_r2}
        }

    # write summary
    with open(SUMMARY_PATH, 'w') as f:
        json.dump(summary, f, indent=2)

    print(f"\nDone! Models in: {MODEL_DIR}\nPlots in: {VIS_DIR}\nSummary at: {SUMMARY_PATH}")
