In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import joblib

# -------------------------------
# Data Loading and Preprocessing
# -------------------------------
# Load test.csv
df = pd.read_csv("datasets/test.csv")

# Define bid and ask price columns as in your ML files
levels = 5
bid_price_cols = [f"bids[{i}].price" for i in range(levels)]
ask_price_cols = [f"asks[{i}].price" for i in range(levels)]
feature_columns = bid_price_cols + ask_price_cols

# Create the target variable (predict next mark_price) and drop NaNs
df["target"] = df["mark_price"].shift(-1)
df.dropna(inplace=True)

# Use the entire test.csv as the training dataset
X = df[feature_columns]
y = df["target"]

# -------------------------------
# Train and Save the Models
# -------------------------------

# 1. Linear Regression (refer to lr.py)
lr_model = LinearRegression()
lr_model.fit(X, y)
joblib.dump(lr_model, "ML/lr_model.pkl")
print("Linear Regression model saved as ML/lr_model.pkl")

# 2. AdaBoost Regressor (refer to adaboost.py)
ada_model = AdaBoostRegressor(random_state=42)
ada_model.fit(X, y)
joblib.dump(ada_model, "ML/adaboost_model.pkl")
print("AdaBoost model saved as ML/adaboost_model.pkl")

# 3. Gradient Boosting Regressor (refer to GBR.py)
gb_model = GradientBoostingRegressor(random_state=42)
gb_model.fit(X, y)
joblib.dump(gb_model, "ML/GB_model.pkl")
print("Gradient Boosting model saved as ML/GB_model.pkl")

# 4. Support Vector Regressor (refer to svr.py)
svr_model = SVR(kernel='rbf')
svr_model.fit(X, y)
joblib.dump(svr_model, "ML/svr_model.pkl")
print("SVR model saved as ML/svr_model.pkl")
