In [2]:
import pickle
import numpy as np
import pandas as pd
import os
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import load_model
import util
import importlib
importlib.reload(util)



<module 'util' from '/Users/kolten/Desktop/optiver_project/models/util.py'>

In [3]:



MODEL_PATH = 'config_v256d03.h5'
SCALER_PATH = 'config_v256d03_scalers.pkl'

with open(SCALER_PATH, 'rb') as f:
    config = pickle.load(f)
model = load_model(MODEL_PATH, compile=False)

# Step 1: Compute snapshot features
snapshot_features_df = pd.read_csv("snapshot_features_df.csv")

rolling_features_df = pd.read_csv("stock_22753_df.csv")

# Preview
print("Rolling features:", rolling_features_df.shape)
display(rolling_features_df.head())

print("Rolling features:", snapshot_features_df.shape)
display(snapshot_features_df.head())

Rolling features: (746066, 25)


Unnamed: 0,stock_id,time_id,start_time,wap_mean,wap_std,wap_max,wap_min,spread_pct_mean,spread_pct_std,spread_pct_max,...,depth_ratio_mean,depth_ratio_std,depth_ratio_max,depth_ratio_min,log_return_mean,log_return_std,log_return_max,log_return_min,realized_volatility,datetime
0,22753.0,6.0,1800.0,126.41733,0.073325,126.578742,126.305623,0.000149,4.6e-05,0.000237,...,10.330577,37.81343,299.9997,0.006536,-6e-06,8.4e-05,0.00044,-0.000264,0.000185,
1,22753.0,6.0,1802.0,126.41386,0.071664,126.578742,126.305623,0.000149,4.5e-05,0.000237,...,10.19528,37.800391,299.9997,0.006536,-5e-06,8.4e-05,0.00044,-0.000264,0.000129,
2,22753.0,6.0,1805.0,126.410701,0.070046,126.578742,126.305623,0.000151,4.6e-05,0.000237,...,10.656852,38.531688,299.9997,0.006536,-5e-06,8.3e-05,0.00044,-0.000264,0.000329,
3,22753.0,6.0,1807.0,126.40781,0.069242,126.578742,126.305623,0.000151,4.5e-05,0.000237,...,10.453838,38.506543,299.9997,0.006536,-5e-06,8.2e-05,0.00044,-0.000264,0.000446,
4,22753.0,6.0,1810.0,126.405019,0.067859,126.578742,126.305623,0.000152,4.6e-05,0.000237,...,10.422791,38.512527,299.9997,0.006536,-4e-06,8.4e-05,0.00044,-0.000264,0.000438,


Rolling features: (3730669, 18)


Unnamed: 0,stock_id,time_id,seconds_in_bucket,bid_price1,ask_price1,bid_price2,ask_price2,bid_size1,ask_size1,bid_size2,ask_size2,mid_price,wap,bid_ask_spread,spread_pct,imbalance,depth_ratio,log_return
0,22753,6,1800.0,126.57,126.58,126.56,126.59,1,100,26,118,126.575,126.570098,0.01,7.9e-05,-0.980198,0.01,0.0
1,22753,6,1800.0,126.57,126.58,126.56,126.59,1,100,26,118,126.575,126.570098,0.01,7.9e-05,-0.980198,0.01,0.0
2,22753,6,1801.0,126.54,126.56,126.53,126.57,100,4,29,226,126.55,126.55923,0.02,0.000158,0.923077,24.999994,-8.6e-05
3,22753,6,1801.0,126.54,126.56,126.53,126.57,100,4,29,226,126.55,126.55923,0.02,0.000158,0.923077,24.999994,0.0
4,22753,6,1802.0,126.56,126.57,126.55,126.58,100,104,110,108,126.565,126.564901,0.01,7.9e-05,-0.019608,0.961538,4.5e-05


In [4]:
seq_df = util.generate_tick_sequences(
    snapshot_features_df,
    feature_cols=["wap","spread_pct","imbalance","depth_ratio","log_return"],
    window=config.get('window', util.WINDOW_SIZE),
    horizon=config.get('horizon', util.FORECAST_HORIZON),
    step=config.get('step', util.STEP)
)


In [5]:
subset = seq_df.iloc[-len(seq_df) // 5:]  # 取最近20%
X = np.stack(subset['X'].values)         # shape: (n_samples, window, n_features)
y_true = subset['y'].values              # shape: (n_samples,)



In [6]:
n_samples, window, n_feats = X.shape
X_scaled = config['x_scaler'].transform(X.reshape(-1, n_feats)).reshape(n_samples, window, n_feats)
y_scaled = config['y_scaler'].transform(y_true.reshape(-1, 1)).flatten()


In [9]:
y_pred_scaled = model.predict(X_scaled)
y_pred = config['y_scaler'].inverse_transform(y_pred_scaled).flatten()

results_df = pd.DataFrame({
    'time_id': subset['time_id'].values,
    'start_time':subset['start_time'].values,
    'y_true': subset['y'].values,
    'y_pred': y_pred
})

mse = mean_squared_error(y_true, y_pred)
qlike = np.mean((y_true - y_pred) ** 2 / (y_true ** 2))

print(f"Performance:")
print(f"  • MSE   = {mse:.6e}")
print(f"  • QLIKE = {qlike:.6e}")

[1m4176/4176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m394s[0m 94ms/step
Performance:
  • MSE   = 5.298305e-08
  • QLIKE = 2.746045e+01


In [8]:
print(len(seq_df['time_id'].values))
print(len(seq_df['start_time'].values))
print(len(seq_df['y'].values))
print(len(y_pred))


668071
668071
668071
133615
