### Packages

In [None]:
import pandas as pd
import joblib 

from sklearn.model_selection import train_test_split 

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

from sklearn.decomposition import PCA

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score

### Data and train/test split

In [None]:
df = pd.read_csv('data/heart.csv')

X = df.drop(columns='HeartDisease')
y = df['HeartDisease']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

### Preprocessing and column transformer

In [16]:
numeric_scaling = Pipeline([('scaler', StandardScaler())])
categoric_encoding = Pipeline([('encoder',OneHotEncoder())])

In [17]:
num_cols = df.select_dtypes(include=['int64','float64']).columns.drop('HeartDisease')

cat_cols = df.select_dtypes(include=['object']).columns

In [18]:
preprocessing = ColumnTransformer([
    ('numeric_features',numeric_scaling, num_cols),
    ('categoric_features',categoric_encoding,cat_cols)],remainder='passthrough')

### Random Forest Classifier 

In [None]:
base = Pipeline(steps=[('processing', preprocessing),
                           ('classifier',RandomForestClassifier())])