In [None]:
from tensorflow import keras

# Path depends on where you uploaded
model = keras.models.load_model("/content/model.keras")

print(model.summary())

In [10]:
import os

FEATURE_COLS = [
    # --- Base OHLCV ---
    "Open", "High", "Low", "Close", "TickVolume",

    # --- Returns ---
    "Close_ret", "Open_ret", "High_ret", "Low_ret",

    # --- Volatility ---
    "Range",

    # --- Time features ---
    "minute_of_day", "slot5", "slot5_sin", "slot5_cos",
    "minutes_from_open", "minutes_to_close", "percent_session_elapsed",
    "is_open", "is_close",

    # --- Day of week ---
    "day_of_week", "dow_sin", "dow_cos", "is_monday", "is_friday",

    # --- Technical indicators ---
    "SMA10", "SMA20", "EMA10", "VWAP",
    "RSI14", "MACD", "MACD_signal", "MACD_diff",
    "Bollinger_high", "Bollinger_low", "Bollinger_mavg",
    "ATR14"
]

TARGET_COLS = [
    "Open", "High", "Low", "Close", "TickVolume"
]

MODEL_PATH = "artifacts_saved"

In [None]:
import joblib
import json
def load_model_predict():
    """
    Load artifacts and run future prediction on a new CSV.
    """
    FUTURE_STEPS = 30
    ART_NAME = "artifacts_20250907"
    ART_DIR = "/content"  # adjust as needed

    # 1) Load model & scalers (FIX: .keras + Keras 3 loader)
    model = keras.models.load_model("/content/model.keras")
    x_scaler = joblib.load(os.path.join(ART_DIR, "x_scaler.joblib"))
    y_scaler = joblib.load(os.path.join(ART_DIR, "y_scaler.joblib"))

    # 2) Load metadata
    with open(os.path.join(ART_DIR, "meta.json"), "r") as f:
        meta = json.load(f)
    feature_cols = meta["feature_cols"]
    target_cols = meta["target_cols"]
    window_size = int(meta["window_size"])

    # 3) Build predictor
    predictor = Predictor(
        model=model,
        feature_cols=feature_cols,
        target_cols=target_cols,
        window_size=window_size,
        x_scaler=x_scaler,
        y_scaler=y_scaler,
    )

    # 4) Load & preprocess new data (FIX: keep preprocessing consistent)
    cp = CsvPreprocessor()
    df_new = cp.preprocess(cp.load("/content/xauusd_M1_exness_2025-08-01.csv"))

    # 5) Predict future
    future_df = predictor.predict_future(df_new, steps=FUTURE_STEPS)

    # 6) Save
    future_df.to_csv(f"future_predictions_{ART_NAME}.csv", index=False)
    print("Predictions saved to future_predictions.csv")

load_model_predict()


In [None]:
!pip install ta

In [28]:
from typing import Sequence
import numpy as np
import pandas as pd
import ta
from __future__ import annotations
from dataclasses import dataclass
from typing import Tuple, Dict, Any, Optional
import numpy as np


@dataclass(frozen=True)
class SplitConfig:
    window_size: int = 60
    ratios: Tuple[float, float, float] = (0.7, 0.2, 0.1)  # train, test, val


@dataclass(frozen=True)
class TrainConfig:
    epochs: int = 100
    batch_size: int = 32
    verbose: int = 1


@dataclass(frozen=True)
class ScalerBundle:
    x_scaler: Any
    y_scaler: Any


@dataclass(frozen=True)
class DatasetSplit:
    X_train: np.ndarray
    y_train: np.ndarray
    z_train: np.ndarray
    idx_train: np.ndarray

    X_test: np.ndarray
    y_test: np.ndarray
    z_test: np.ndarray
    idx_test: np.ndarray

    X_val: np.ndarray
    y_val: np.ndarray
    z_val: np.ndarray
    idx_val: np.ndarray

    feature_cols: Tuple[str, ...]
    target_cols: Tuple[str, ...]
    window_size: int
    scalers: ScalerBundle


@dataclass(frozen=True)
class TrainReport:
    test_loss_scaled_mse: float
    mape_per_target: Dict[str, float]
    accuracy_per_target: Dict[str, float]
    history: Optional[Dict[str, list]] = None

class Predictor:
    def __init__(self, model, feature_cols: Sequence[str], target_cols: Sequence[str],
                 window_size: int, x_scaler, y_scaler):
        self.model = model
        self.feature_cols = list(feature_cols)   # FIX: use list
        self.target_cols = list(target_cols)     # FIX: use list
        self.window_size = int(window_size)
        self.x_scaler = x_scaler
        self.y_scaler = y_scaler


    # --- Helper: add engineered features automatically ---
    def _add_features(self, df: pd.DataFrame) -> pd.DataFrame:
        """Add engineered features (returns, volatility, time, day-of-week, technicals)."""
        df = df.copy()

        # --- Returns ---
        for col in ["Close", "Open", "High", "Low"]:
            if f"{col}_ret" not in df.columns:
                df[f"{col}_ret"] = df[col].pct_change()

        # --- Volatility (Range) ---
        if "Range" not in df.columns:
            df["Range"] = df["High"] - df["Low"]

        # --- Time features ---
        if "minute_of_day" not in df.columns:
            df["minute_of_day"] = df["Time"].dt.hour * 60 + df["Time"].dt.minute
        if "slot5" not in df.columns:
            df["slot5"] = df["minute_of_day"] // 5
            df["slot5_sin"] = np.sin(2 * np.pi * df["slot5"] / 288)
            df["slot5_cos"] = np.cos(2 * np.pi * df["slot5"] / 288)

        # Session boundaries (UTC 01:05 → 23:54)
        session_open, session_close = 65, 1434
        df["minutes_from_open"] = df["minute_of_day"] - session_open
        df["minutes_to_close"] = session_close - df["minute_of_day"]
        df["percent_session_elapsed"] = (
            df["minutes_from_open"] / (session_close - session_open)
        ).clip(0, 1)

        # Flags
        df["is_open"] = (df["minute_of_day"] == session_open).astype(int)
        df["is_close"] = (df["minute_of_day"] == session_close).astype(int)

        # --- Day-of-week features ---
        df["day_of_week"] = df["Time"].dt.dayofweek
        df["dow_sin"] = np.sin(2 * np.pi * df["day_of_week"] / 5)
        df["dow_cos"] = np.cos(2 * np.pi * df["day_of_week"] / 5)
        df["is_monday"] = (df["day_of_week"] == 0).astype(int)
        df["is_friday"] = (df["day_of_week"] == 4).astype(int)

        # --- Technical Indicators ---
        if "SMA10" not in df.columns:
            df["SMA10"] = df["Close"].rolling(10).mean()
        if "SMA20" not in df.columns:
            df["SMA20"] = df["Close"].rolling(20).mean()
        if "EMA10" not in df.columns:
            df["EMA10"] = df["Close"].ewm(span=10, adjust=False).mean()
        if "VWAP" not in df.columns:
            df["VWAP"] = (df["Close"] * df["TickVolume"]).cumsum() / df["TickVolume"].cumsum()

        if "RSI14" not in df.columns:
            df["RSI14"] = ta.momentum.RSIIndicator(df["Close"], window=14).rsi()

        if "MACD" not in df.columns:
            macd = ta.trend.MACD(close=df["Close"])
            df["MACD"] = macd.macd()
            df["MACD_signal"] = macd.macd_signal()
            df["MACD_diff"] = macd.macd_diff()

        if "Bollinger_high" not in df.columns:
            boll = ta.volatility.BollingerBands(close=df["Close"], window=20, window_dev=2)
            df["Bollinger_high"] = boll.bollinger_hband()
            df["Bollinger_low"] = boll.bollinger_lband()
            df["Bollinger_mavg"] = boll.bollinger_mavg()

        if "ATR14" not in df.columns:
            atr = ta.volatility.AverageTrueRange(
                high=df["High"], low=df["Low"], close=df["Close"], window=14
            )
            df["ATR14"] = atr.average_true_range()

        # Drop NaNs introduced by indicators
        return df.dropna()

    # --- Helper: convert to sliding windows ---
    def _to_windows(self, scaled_X: np.ndarray) -> np.ndarray:
        w = self.window_size
        N = scaled_X.shape[0]
        if N <= w:
            raise ValueError(f"Need N > window_size. Got N={N}, window_size={w}")
        M = N - w
        return np.stack([scaled_X[i:i + w, :] for i in range(M)], axis=0)

    def predict_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
        df = self._add_features(df)

        for c in self.feature_cols:
            if c not in df.columns:
                raise KeyError(f"Missing feature '{c}' in new dataframe (expected {c})")

        raw_X = df[list(self.feature_cols)].to_numpy(dtype=float)
        scaled_X = self.x_scaler.transform(raw_X)
        X_win = self._to_windows(scaled_X)

        y_pred_s = self.model.predict(X_win, verbose=0)
        y_pred_inv = self.y_scaler.inverse_transform(y_pred_s)

        out_idx = df.index[self.window_size: self.window_size + y_pred_inv.shape[0]]
        return pd.DataFrame(y_pred_inv, index=out_idx, columns=self.target_cols)

    def predict_next_from_tail(self, df_tail: pd.DataFrame) -> np.ndarray:
        df_tail = self._add_features(df_tail)

        if len(df_tail) != self.window_size:
            raise ValueError(f"df_tail must have exactly window_size={self.window_size} rows")

        raw = df_tail[list(self.feature_cols)].to_numpy(dtype=float)
        scaled = self.x_scaler.transform(raw)
        X = np.expand_dims(scaled, axis=0)

        y_pred_s = self.model.predict(X, verbose=0)[0]
        return self.y_scaler.inverse_transform(y_pred_s.reshape(1, -1))[0]

    def predict_future(self, df: pd.DataFrame, steps: int = 30) -> pd.DataFrame:
        """
        Recursive multi-step forecast.
        Each prediction is appended and used for the next input.
        """
        df = self._add_features(df)

        for c in self.feature_cols:
            if c not in df.columns:
                raise KeyError(f"Missing feature '{c}' in new dataframe (expected {c})")

        df_work = df.copy()
        preds = []

        for _ in range(steps):
            # Build input from last window
            seq = df_work.tail(self.window_size)
            X_in = seq[self.feature_cols].to_numpy(dtype=float)
            X_in = self.x_scaler.transform(X_in).reshape(1, self.window_size, len(self.feature_cols))

            # Predict next OHLCV
            y_pred_s = self.model.predict(X_in, verbose=0)[0]
            y_pred = self.y_scaler.inverse_transform(y_pred_s.reshape(1, -1))[0]

            # Create new row (with predicted OHLCV)
            next_row = {col: val for col, val in zip(self.target_cols, y_pred)}
            next_row["Time"] = df_work["Time"].iloc[-1] + pd.Timedelta(minutes=1)

            # --- IMPORTANT ---
            # Keep the predicted OHLCV fixed, only add extra features
            df_temp = pd.DataFrame([next_row])
            df_temp = self._add_features(pd.concat([df_work, df_temp], ignore_index=True)).iloc[[-1]]

            # Merge features into next_row without overwriting OHLCV
            for c in self.feature_cols:
                if c not in next_row and c in df_temp.columns:
                    next_row[c] = df_temp[c].values[0]

            # Append the enriched prediction
            df_work = pd.concat([df_work, pd.DataFrame([next_row])], ignore_index=True)
            preds.append(next_row)

            print("Next row:", next_row)

        return pd.DataFrame(preds)



from __future__ import annotations
from dataclasses import dataclass
from typing import Sequence
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


@dataclass
class SequenceSplitter:
    """
    Builds time-ordered Train/Val/Test windows.
    Uses PROVIDED scalers (no refit) when resuming.
    """
    config: SplitConfig

    def split(
        self,
        df: pd.DataFrame,
        feature_cols: Sequence[str],
        target_cols: Sequence[str],
        window_size: int,
        ratios: Sequence[float],
        x_scaler: object,
        y_scaler: object,
    ) -> DatasetSplit:
        # enforce config (and validate), but keep the signature compatible
        w = window_size
        assert abs(sum(ratios) - 1.0) < 1e-9, "ratios must sum to 1.0"

        for c in list(feature_cols) + list(target_cols):
            if c not in df.columns:
                raise KeyError(f"Column '{c}' not in DataFrame. Available: {list(df.columns)}")

        raw_X = df[list(feature_cols)].to_numpy(dtype=float)
        raw_y = df[list(target_cols)].to_numpy(dtype=float)

        # transform with PROVIDED scalers (do not refit here)
        scaled_X = x_scaler.transform(raw_X)
        scaled_y = y_scaler.transform(raw_y)

        N = len(df)
        if N <= w:
            raise ValueError(f"Need N > window_size. Got N={N}, window_size={w}")

        # number of *target* positions we can produce
        M = N - w

        # NOTE: order is train, val, test  (not train, test, val)
        n_train = int(M * ratios[0])
        n_val   = int(M * ratios[1])
        n_test  = M - n_train - n_val

        def build(target_indices: range):
            X, y, z, idx = [], [], [], []
            for i in target_indices:
                X.append(scaled_X[i - w:i, :])
                y.append(scaled_y[i, :])
                z.append(raw_y[i, :])   # unscaled targets (for inspection)
                idx.append(i)
            X = np.asarray(X, dtype=np.float32)
            y = np.asarray(y, dtype=np.float32)
            z = np.asarray(z, dtype=np.float32)
            idx = np.asarray(idx, dtype=np.int64)
            return X, y, z, idx

        train_targets = range(w, w + n_train)
        val_targets   = range(w + n_train, w + n_train + n_val)
        test_targets  = range(w + n_train + n_val, w + M)

        X_train, y_train, z_train, idx_train = build(train_targets)
        X_val,   y_val,   z_val,   idx_val   = build(val_targets)
        X_test,  y_test,  z_test,  idx_test  = build(test_targets)

        scalers = ScalerBundle(x_scaler=x_scaler, y_scaler=y_scaler)
        return DatasetSplit(
            X_train, y_train, z_train, idx_train,
            X_test, y_test, z_test, idx_test,
            X_val, y_val, z_val, idx_val,
            tuple(feature_cols), tuple(target_cols), w, scalers,
        )

from __future__ import annotations
import pandas as pd
import numpy as np
from typing import Iterable
import ta


class CsvPreprocessor:
    """
    Loads and cleans the Exness M1 CSV:
    - parse Time to UTC
    - sort by Time & reset index
    - drop Spread/RealVolume (if present)
    - add engineered features (OHLCV, returns, volatility, time, technicals, day-of-week)
    - session assumed 01:05–23:54 UTC, Monday–Friday
    """

    def __init__(self, drop_cols: Iterable[str] = ("Spread", "RealVolume")):
        self.drop_cols = tuple(drop_cols)

        # All engineered features (now 29 total, including day-of-week)
        self.feature_cols = [
            # Returns
            "Close_ret", "Open_ret", "High_ret", "Low_ret",
            # Volatility
            "Range",
            # Time features
            "minute_of_day", "slot5", "slot5_sin", "slot5_cos",
            "minutes_from_open", "minutes_to_close", "percent_session_elapsed",
            "is_open", "is_close",
            # Day of week
            "day_of_week", "dow_sin", "dow_cos", "is_monday", "is_friday",
            # Technical indicators
            "SMA10", "SMA20", "EMA10", "VWAP",
            "RSI14", "MACD", "MACD_signal", "MACD_diff",
            "Bollinger_high", "Bollinger_low", "Bollinger_mavg",
            "ATR14"
        ]

    def load(self, path: str) -> pd.DataFrame:
        df = pd.read_csv(path)
        empty_rows = df.isnull().all(axis=1)
        if empty_rows.any():
            print("Empty rows found at indices:")
            print(empty_rows[empty_rows].index)
        else:
            print("No empty rows found in the dataset.")
        return df

    def preprocess(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        # --- Time handling ---
        if "Time" not in df.columns:
            raise KeyError("Expected 'Time' column in CSV")
        df["Time"] = pd.to_datetime(df["Time"], utc=True)
        df.sort_values(by="Time", ascending=True, inplace=True)
        df.reset_index(drop=True, inplace=True)

        # Drop extra cols if present
        df = df.drop(columns=list(self.drop_cols), errors="ignore")

        # numeric columns (all except Time)
        num_cols = df.columns.drop(["Time"]).tolist()
        df[num_cols] = df[num_cols].replace({',': ''}, regex=True)
        df[num_cols] = df[num_cols].astype("float64")

        # Add engineered features
        df = self._add_features(df)

        # Drop NaNs introduced by rolling indicators
        df = df.dropna().reset_index(drop=True)

        return df

    def _add_features(self, df: pd.DataFrame) -> pd.DataFrame:
        # --- Returns ---
        for col in ["Close", "Open", "High", "Low"]:
            df[f"{col}_ret"] = df[col].pct_change()

        # --- Volatility (Range) ---
        df["Range"] = df["High"] - df["Low"]

        # --- Time features ---
        df["minute_of_day"] = df["Time"].dt.hour * 60 + df["Time"].dt.minute
        df["slot5"] = df["minute_of_day"] // 5
        df["slot5_sin"] = np.sin(2 * np.pi * df["slot5"] / 288)
        df["slot5_cos"] = np.cos(2 * np.pi * df["slot5"] / 288)

        # Define daily session boundaries (UTC 01:05 → 23:54)
        session_open = 65  # 01:05 = 60 + 5
        session_close = 1434  # 23:54 = 23*60 + 54

        df["minutes_from_open"] = df["minute_of_day"] - session_open
        df["minutes_to_close"] = session_close - df["minute_of_day"]
        df["percent_session_elapsed"] = (
                df["minutes_from_open"] / (session_close - session_open)
        ).clip(0, 1)

        # Flags
        df["is_open"] = (df["minute_of_day"] == session_open).astype(int)
        df["is_close"] = (df["minute_of_day"] == session_close).astype(int)

        # --- Day-of-week features ---
        df["day_of_week"] = df["Time"].dt.dayofweek  # 0=Mon … 4=Fri
        df["dow_sin"] = np.sin(2 * np.pi * df["day_of_week"] / 5)
        df["dow_cos"] = np.cos(2 * np.pi * df["day_of_week"] / 5)
        df["is_monday"] = (df["day_of_week"] == 0).astype(int)
        df["is_friday"] = (df["day_of_week"] == 4).astype(int)

        # --- Technical Indicators ---
        df["SMA10"] = df["Close"].rolling(10).mean()
        df["SMA20"] = df["Close"].rolling(20).mean()
        df["EMA10"] = df["Close"].ewm(span=10, adjust=False).mean()
        df["VWAP"] = (df["Close"] * df["TickVolume"]).cumsum() / df["TickVolume"].cumsum()

        # RSI (14)
        df["RSI14"] = ta.momentum.RSIIndicator(df["Close"], window=14).rsi()

        # MACD (12, 26, 9)
        macd = ta.trend.MACD(close=df["Close"])
        df["MACD"] = macd.macd()
        df["MACD_signal"] = macd.macd_signal()
        df["MACD_diff"] = macd.macd_diff()

        # Bollinger Bands (20, 2)
        boll = ta.volatility.BollingerBands(close=df["Close"], window=20, window_dev=2)
        df["Bollinger_high"] = boll.bollinger_hband()
        df["Bollinger_low"] = boll.bollinger_lband()
        df["Bollinger_mavg"] = boll.bollinger_mavg()

        # ATR (14)
        atr = ta.volatility.AverageTrueRange(
            high=df["High"], low=df["Low"], close=df["Close"], window=14
        )
        df["ATR14"] = atr.average_true_range()

        return df

In [None]:
load_model_predict()