In [1]:
from titanic_model_processor.main import make_pipeline, fit, export
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

In [2]:
X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True)

In [3]:
X.rename(columns={
    "home.dest": "homedest"
}, inplace=True)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [5]:
pipeline = make_pipeline(
    num_vars=("age", "fare"), 
    cat_vars=("pclass", "sex", "embarked"),
    use_pca=True,
    components=8,
    classifier=RandomForestClassifier(n_estimators=100),
    scaler=StandardScaler(),
    num_imp_strategy="median")

In [6]:
pipeline = fit(pipeline=pipeline, x_train=X_train, y_train=y_train)

In [7]:
pipeline.score(X_train, y_train)

0.9694364851957975

In [8]:
pipeline.score(X_test, y_test)

0.7824427480916031

In [9]:
export(pipeline=pipeline, file="titanic.pkl")