In [None]:
import great_expectations as gx
import pandas as pd
import mlflow
import sklearn
import pathlib

import demo_code as demo
import mlflow

In [None]:
mlflow.autolog()

## Load cleaned data

In [None]:
DATA_DIR = pathlib.Path("/notebooks/data")
CLEANED_DATA_DIR = DATA_DIR / "cleaned"

In [None]:
df_features = pd.read_csv(CLEANED_DATA_DIR / "heart_data.csv")
df_features.head()

## Feature engineering

In [None]:
# Drop dataset annotation.
df_features = df_features.drop(["dataset"], axis=1)

# Scale (standardize) numeric features.
standard_scalar = sklearn.preprocessing.StandardScaler()

for col in ["age", "trestbps", "chol", "thalach", "oldpeak"]:
    df_features[col] = standard_scalar.fit_transform(df_features[[col]])

# One hot encode cp.
cp_ohe = pd.DataFrame(
    sklearn.preprocessing.OneHotEncoder().fit_transform(df_features[["cp"]]).todense()
)
cp_ohe.columns = ["cp_0", "cp_1", "cp_2", "cp_3"]
df_features = pd.concat([df_features, cp_ohe], axis=1)

df_features = df_features[demo.data.FEATURE_COLUMNS]

df_features.head()

## Train/test set

In [None]:
df_features

In [None]:
X = df_features.iloc[:, :-1]
y = df_features.iloc[:, -1]

In [None]:
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, train_size=0.8, test_size=0.2, random_state=42
)

In [None]:
X_train

## Model training

In [None]:
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(max_depth=4, random_state=0)

classifier.fit(X_train, y_train)

prediction = classifier.predict(X_test)

cv = sklearn.model_selection.RepeatedStratifiedKFold(
    n_splits=10, n_repeats=3, random_state=1
)
print(
    "Accuracy : ", "{0:.2%}".format(sklearn.metrics.accuracy_score(y_test, prediction))
)
print(
    "Cross Validation Score : ",
    "{0:.2%}".format(
        sklearn.model_selection.cross_val_score(
            classifier, X_train, y_train, cv=cv, scoring="roc_auc"
        ).mean()
    ),
)
print(
    "ROC_AUC Score : ",
    "{0:.2%}".format(sklearn.metrics.roc_auc_score(y_test, prediction)),
)