# Model Training

Train baseline logistic regression and XGBoost models.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

from src.models.logistic_model import LogisticModel
from src.models.xgboost_model import XGBoostModel
from src.services.feature_engineering import add_features
from src.services.preprocessing import build_preprocessor, infer_feature_types
from src.services.trainer import train_model

DATA_PATH = "../data/raw/campaign_data.csv"
TARGET = "response"

df = add_features(pd.read_csv(DATA_PATH))
X = df.drop(columns=[TARGET])
y = df[TARGET]

numeric_cols, categorical_cols = infer_feature_types(df, TARGET)
preprocessor = build_preprocessor(numeric_cols, categorical_cols)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

logistic_pipeline, _ = train_model(LogisticModel(), X_train, y_train, preprocessor)
xgb_pipeline, _ = train_model(XGBoostModel(), X_train, y_train, preprocessor)

logistic_pipeline, xgb_pipeline