In [1]:
import pandas as pd
import numpy as np
import sklearn
import yaml
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, TransformerMixin
import os
import sys

In [2]:
sys.path.append(os.path.abspath('../'))

In [3]:
from src.data.dataloader import DataLoader

In [4]:
%load_ext autoreload
%autoreload 2

In [None]:
DATA_PATH = "../data/heart_cleveland_upload.csv"

In [None]:
reader_conf = '''
target: condition
categorical:
    - name: sex
      values: [0,1]
    - name: cp
      values: [0, 1, 2, 3]
    - name: fbs
      values: [0, 1]
    - name: restecg
      values: [0, 1, 2]
    - name: exang
      values: [0, 1]
    - name: slope
      values: [0, 1, 2]
    - name: ca
      values: [0, 1, 2, 3]
    - name: thal
      values: [0, 1, 2]
'''

In [None]:
class PowerTransformer(BaseEstimator, TransformerMixin):
  def __init__(self, power=2):
    print(f'Call to power transformer init() with power = {power} called')
    self.power = power

  def fit(self, X, y = None):
    print("PowerTransformer fit() method")
    return self

  def transform(self, X, y = None):
    print('transform() called.\n')
    X = np.concatenate((X, X**self.power), axis=1)
    return X

In [None]:
reader_conf = yaml.safe_load(reader_conf)

In [None]:
data_reader = DataLoader(**reader_conf)

In [None]:
X, target = data_reader.read_data(DATA_PATH)

In [None]:
X = X.values
target = target.values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target, train_size=0.8, random_state=42)

In [None]:
pipe = Pipeline([('power', PowerTransformer(power=2)), ('scaler', StandardScaler()), ('model', LogisticRegression(solver='liblinear'))])

In [None]:
pipe.fit(X_train, y_train)

In [None]:
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)

In [None]:
accuracy_score(y_train, y_train_pred)

In [None]:
accuracy_score(y_test, y_test_pred)

In [None]:
f1_score(y_train, y_train_pred)

In [None]:
f1_score(y_test, y_test_pred)