# Model Prototyping Notebook

Start from the gold feature tables or silver facts to build new models.

In [None]:
# ruff: noqa: E402
import os  # noqa: F401
import sys
from pathlib import Path

# make repo modules importable
NOTEBOOK_DIR = (
    Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
)
REPO_ROOT = (
    NOTEBOOK_DIR if (NOTEBOOK_DIR / "params.py").exists() else NOTEBOOK_DIR.parent
)
if str(REPO_ROOT) not in sys.path:
    sys.path.append(str(REPO_ROOT))

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb  # noqa: F401

import params  # noqa: F401
from Utils.db_utils import create_sql_engine

engine = create_sql_engine()
features = pd.read_sql("select * from gold.castaway_episode_features", con=engine)
features.head()

## Training example
Split features and train a baseline model.

In [None]:
# ruff: noqa: E402
# Example baseline
X = features["feature_payload"].apply(lambda x: pd.json_normalize(x).fillna(0)).tolist()
X = pd.concat(X).fillna(0)
y = features["feature_payload"].apply(lambda x: x.get("misc", {}).get("winer", False))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

preds = clf.predict(X_test)
print(classification_report(y_test, preds))