# Week 4 — Training notebook: train and save a SimpleModelWrapper

This notebook trains a small linear ridge model (the repo's `SimpleModelWrapper`) on a demo price series, evaluates it, and saves a model artifact to `models/week4_model.npz`.

Notes:
- If you want scikit-learn improvements, install the dev extras: `pip install -e '.[dev]'` or `pip install scikit-learn`.
- Run this notebook from the repo root so relative paths resolve correctly.


In [None]:
# 1) Setup: imports, env, plotting, seed

import sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import time

# Ensure repo root is on path when run from repo root
ROOT = Path('.').resolve()
MODELS_DIR = ROOT / 'models'
MODELS_DIR.mkdir(exist_ok=True)

np.random.seed(42)
sns.set(style='whitegrid')

# Import our repo pipeline
try:
    from qt.ml.pipeline import FeatureBuilder, SimpleModelWrapper
except Exception as e:
    print('Could not import qt.ml.pipeline (is the package on PYTHONPATH?).', e)
    raise

print('Imports OK — notebook ready')


In [None]:
# 2) Helper functions: loaders, feature builder wrapper, metrics

def load_demo_prices(csv_path=None):
    """Load a demo price series from `notebooks/` CSV if present, else return synthetic prices.

    Returns
    -------
    pd.Series
        Indexed by integer steps, dtype float.
    """
    if csv_path is None:
        csv_path = Path('notebooks') / 'equity_s1.0_i0.0.csv'
    csv_path = Path(csv_path)
    if csv_path.exists():
        df = pd.read_csv(csv_path)
        # try common column names
        col = None
        for c in ('price', 'close', 'mid', 'mid_price'):
            if c in df.columns:
                col = c
                break
        if col is None:
            # fallback: take first numeric column
            col = df.select_dtypes(include=[float, int]).columns[0]
        prices = pd.Series(df[col].values)
        return prices
    # synthetic GBM-like series
    n = 1000
    dt = 1/252
    mu = 0.05
    sigma = 0.2
    S0 = 100.0
    increments = np.random.normal(loc=(mu - 0.5 * sigma**2)*dt, scale=sigma*np.sqrt(dt), size=n)
    logS = np.log(S0) + np.cumsum(increments)
    prices = pd.Series(np.exp(logS))
    return prices


def build_features(prices, window=10):
    fb = FeatureBuilder(window=window)
    X, y = fb.build(prices.values)
    return X, y


def mse(a, b):
    return np.mean((np.asarray(a) - np.asarray(b))**2)


# quick doc examples
if False:
    p = load_demo_prices()
    X, y = build_features(p, window=10)
    print(X.shape, y.shape)


In [None]:
# 3) Generate or load sample data

prices = load_demo_prices()
print('Loaded prices — length:', len(prices))
print(prices.head())

plt.figure(figsize=(10,4))
plt.plot(prices.values)
plt.title('Demo price series')
plt.ylabel('Price')
plt.show()


In [None]:
# 4) Data preprocessing & train/validation split

window = 10
X, y = build_features(prices, window=window)

# simple normalization (fit on training split later)
n = X.shape[0]
train_size = int(n * 0.8)
X_train, X_val = X[:train_size], X[train_size:]
y_train, y_val = y[:train_size], y[train_size:]

print('X_train', X_train.shape, 'y_train', y_train.shape)
print('X_val', X_val.shape, 'y_val', y_val.shape)


In [None]:
# 5) Core algorithm: fit SimpleModelWrapper and evaluate

model = SimpleModelWrapper(alpha=1.0)
start = time.time()
model.fit(X_train, y_train)
print(f'Fitted model in {time.time()-start:.3f}s')

# Predict on validation
y_pred = model.predict(X_val)
val_mse = mse(y_val, y_pred)
print('Validation MSE:', val_mse)

# Show a few values
import pandas as pd
pd.DataFrame({'y_val': y_val[:10], 'y_pred': y_pred[:10]})


In [None]:
# 6) Visualization: predicted vs actual returns

plt.figure(figsize=(10,4))
plt.plot(y_val, label='actual')
plt.plot(y_pred, label='pred')
plt.legend()
plt.title('Validation: actual vs predicted next-step returns')
plt.show()

# Scatter
plt.figure(figsize=(6,6))
sns.scatterplot(x=y_val, y=y_pred)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Predicted vs Actual Returns')
plt.show()


In [None]:
# 7) Save and load artifact

# Save coefficients (numpy savez)
model_path = MODELS_DIR / 'week4_model.npz'
np.savez(model_path, coef=model.coef_, intercept=model.intercept_)
print('Saved model to', model_path)

# Load back
loaded = np.load(model_path)
coef_loaded = loaded['coef']
intercept_loaded = float(loaded['intercept'])
print('Loaded coef shape', coef_loaded.shape, 'intercept', intercept_loaded)

# Demonstrate restoring into a new SimpleModelWrapper
m2 = SimpleModelWrapper(alpha=1.0)
m2.coef_ = coef_loaded
m2.intercept_ = intercept_loaded
# quick sanity
assert np.allclose(m2.predict(X_val), y_pred)
print('Restored model predicts same as saved model')


In [None]:
# 8) Unit tests (in-notebook examples) — small pytest-style checks

# We'll write some tiny tests to `tests/unit/test_week4_training.py`

test_file = Path('tests/unit/test_week4_training.py')
if not test_file.exists():
    test_file.parent.mkdir(parents=True, exist_ok=True)
    test_file.write_text('''import numpy as np\nfrom qt.ml.pipeline import FeatureBuilder, SimpleModelWrapper\n\ndef test_feature_alignment():\n    prices = np.linspace(100, 110, 50)\n    X, y = FeatureBuilder(window=5).build(prices)\n    assert X.shape[0] == y.shape[0]\n\ndef test_model_fit_predict_shape():\n    prices = np.linspace(100, 110, 100)\n    X, y = FeatureBuilder(window=5).build(prices)\n    m = SimpleModelWrapper(alpha=1.0)\n    m.fit(X[:50], y[:50])\n    p = m.predict(X[50:60])\n    assert p.shape[0] == 10\n''')
    print('Wrote example pytest tests to', test_file)
else:
    print('Test file already exists:', test_file)

print('\nYou can run `pytest tests/unit/test_week4_training.py -q` to run these checks.')


In [None]:
# 9) Run tests from notebook (example)

print('Running pytest for the small generated tests...')
import subprocess
res = subprocess.run([sys.executable, '-m', 'pytest', 'tests/unit/test_week4_training.py', '-q'], capture_output=True, text=True)
print(res.stdout)
print('Return code:', res.returncode)


In [None]:
# 10) Performance profiling example (cProfile)

import cProfile, pstats, io
pr = cProfile.Profile()
pr.enable()
# run a short training
model_tmp = SimpleModelWrapper(alpha=1.0)
model_tmp.fit(X_train[:200], y_train[:200])
pr.disable()
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
ps.print_stats(10)
print(s.getvalue())


In [None]:
# 11) Usage example: how to load model from `models/week4_model.npz` in strategy

print('Example snippet to integrate in `qt/strategies/momentum.py`:')
print('''
from pathlib import Path
import numpy as np
from qt.ml.pipeline import SimpleModelWrapper

model_path = Path('models') / 'week4_model.npz'
loaded = np.load(model_path)
m = SimpleModelWrapper(alpha=1.0)
m.coef_ = loaded['coef']
m.intercept_ = float(loaded['intercept'])
# then call m.predict(X) where X is the features built from latest prices
''')


In [None]:
# Final instructions

# To execute this notebook from the repo root (assumes venv activated):
# 1) Install dev extras if you want scikit-learn: `pip install -e '.[dev]'`
# 2) Run:
#    jupyter nbconvert --to notebook --execute notebooks/week4_train.ipynb --inplace

print('Notebook complete — model saved to', str(model_path))
