In [12]:
import pandas as pd
import numpy as np

In [13]:
CSV_PATH      = "Data/SBIN.csv"   # ← update this
TIMESTAMP_COL = "DateTime"               # or whatever your time column is called
PRICE_COL     = "SBINEQN"                   # or your price column name
OUTPUT_CSV    = "sbin_metrics.csv"

In [14]:
def compute_stylized_facts(df, intervals):
    metrics = []
    for label, rule in intervals.items():
        # aggregate to last‐tick in each bin
        agg = df[PRICE_COL].resample(rule).last().dropna().to_frame(name="price")
        # compute log‐returns
        agg["log_ret"] = np.log(agg["price"]).diff().dropna()
        # metrics
        realized_vol = np.sqrt((agg["log_ret"] ** 2).sum())
        skew         = agg["log_ret"].skew()
        kurt         = agg["log_ret"].kurtosis()
        metrics.append({
            "interval":    label,
            "n_points":    len(agg),
            "realized_vol": realized_vol,
            "skewness":     skew,
            "kurtosis":     kurt
        })
    return pd.DataFrame(metrics)

In [15]:
df = pd.read_csv(CSV_PATH, parse_dates=[TIMESTAMP_COL])
df = df.set_index(TIMESTAMP_COL).sort_index()

# 2) Define the aggregation intervals
intervals = {
    "5s": "5s",
    "20s": "20s",
    "5min": "5min",
    "15min": "15min",
    "30min": "30min"
}

# 3) Compute metrics
metrics_df = compute_stylized_facts(df, intervals)

# 4) Output
print("\nSBIN Stylized‐Fact Metrics:\n")
print(metrics_df.to_string(index=False))
metrics_df.to_csv(OUTPUT_CSV, index=False)
print(f"\nMetrics saved to {OUTPUT_CSV}")


SBIN Stylized‐Fact Metrics:

interval  n_points  realized_vol  skewness  kurtosis
      5s      1697      0.011131  0.532386  7.869298
     20s       457      0.009913  0.008707  1.610792
    5min        32      0.011413 -0.169364  0.552102
   15min        11      0.009430 -0.116998 -0.094001
   30min         6      0.005332 -0.441496 -2.785880

Metrics saved to sbin_metrics.csv


In [16]:
# In your notebook:

import numpy as np
from skopt import gp_minimize
from skopt.space import Real, Integer
import pandas as pd

# 1) Your target vols from Step 1:
target = {
    "5s":   0.011131,
    "20s":  0.009913,
    "5min": 0.011413,
    "15min":0.009430,
    "30min":0.005332
}

# 2) Wrap your existing functions:
#    run_one_day_return_ticks(mu, sigma, num_bots, vol_thresh, momentum_aggression)
#    compute_metrics(df_ticks, intervals)

intervals = {
    "5s":   "5S",
    "20s":  "20S",
    "5min": "5T",
    "15min":"15T",
    "30min":"30T"
}

def objective(x):
    mu, sigma, num_bots, vol_thresh, agg = x
    num_bots = int(num_bots)
    # 1) simulate
    df = run_one_day_return_ticks(mu, sigma, num_bots, vol_thresh, agg)
    # 2) metrics
    mets = compute_metrics(df, intervals)
    # 3) SSE on vols
    err = sum((mets[f"{k}_vol"] - target[k])**2 for k in target)
    return err

# 3) Define your search space
space  = [
    Real(0.0,   0.001,   name="mu"),
    Real(0.0001,0.002,   name="sigma"),
    Integer(10,  5000,   name="num_bots"),
    Real(0.0001,0.005,   name="vol_thresh"),
    Real(0.01,   0.2,    name="agg")
]

# 4) Run Bayesian optimization
res = gp_minimize(
    func=objective,
    dimensions=space,
    n_calls=30,        # number of simulations to run
    n_initial_points=5,
    random_state=42
)

# 5) Extract best parameters
best_mu, best_sigma, best_bots, best_thresh, best_agg = res.x
best_error = res.fun

print("Best params:")
print(f"  mu={best_mu:.6f}, sigma={best_sigma:.6f}, num_bots={best_bots},")
print(f"  vol_thresh={best_thresh:.6f}, momentum_aggression={best_agg:.3f}")
print("Objective (SSE):", best_error)

# Optionally, assemble results into a DataFrame
records = pd.DataFrame(res.x_iters, columns=[d.name for d in space])
records["error"] = res.func_vals
display(records)


NameError: name 'run_one_day_return_ticks' is not defined