In [39]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.pipeline import Pipeline, TransformerMixin, make_pipeline
from sklearn.base import BaseEstimator
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.svm import SVC
from sklearn.compose import TransformedTargetRegressor

In [2]:
pima_data = pd.read_csv('pima-indians-diabetes.csv')
pima_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DPF,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
X = pima_data.drop('Outcome', axis=1)
y = pima_data['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=7)

In [6]:
pipeline = Pipeline(
            [
                ('sclaer', MinMaxScaler()),
                ('log_reg', LogisticRegression())
            ]    
            )
pipeline.fit(X_train, y_train)

Pipeline(steps=[('sclaer', MinMaxScaler()), ('log_reg', LogisticRegression())])

In [10]:
y_predict = pipeline.predict(X_test)
print('Score', pipeline.score(X_test, y_test))
print('Confusion Matrix: ', confusion_matrix(y_test, y_predict))

Score 0.7619047619047619
Confusion Matrix:  [[132  15]
 [ 40  44]]


# Make Pipeline

In [12]:
pipe = make_pipeline(
            MinMaxScaler(),
            LogisticRegression()
        )
pipe.fit(X_train, y_train)

Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                ('logisticregression', LogisticRegression())])

In [14]:
y_predict = pipe.predict(X_test)
print('Score', pipe.score(X_test, y_test))
print('Confusion Matrix: ', confusion_matrix(y_test, y_predict))

Score 0.7619047619047619
Confusion Matrix:  [[132  15]
 [ 40  44]]


In [16]:
pipe = make_pipeline(
            MinMaxScaler(),
            SVC()
        )
pipe.fit(X_train, y_train)

Pipeline(steps=[('minmaxscaler', MinMaxScaler()), ('svc', SVC())])

In [17]:
y_predict = pipe.predict(X_test)
print('Score', pipe.score(X_test, y_test))
print('Confusion Matrix: ', confusion_matrix(y_test, y_predict))

Score 0.7445887445887446
Confusion Matrix:  [[124  23]
 [ 36  48]]


# Pipeline with Custom functions

In [22]:
pima_data[pima_data.Insulin == 0]

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DPF,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
5,5,116,74,0,0,25.6,0.201,30,0
7,10,115,0,0,0,35.3,0.134,29,0
...,...,...,...,...,...,...,...,...,...
761,9,170,74,31,0,44.0,0.403,43,1
762,9,89,62,0,0,22.5,0.142,33,0
764,2,122,70,27,0,36.8,0.340,27,0
766,1,126,60,0,0,30.1,0.349,47,1


In [31]:
class ZeroValueTransformer(BaseEstimator, TransformerMixin):
    
    def __init__(self, feature):
#         self.df = df
        self.feature = feature
    
    def fit(self, X,y):
        print('Fit Method called>>>>>>>>>>>>>>')
        return self
    
    def transform(self, X,y=None):
        print('Transform Method called>>>>>>>>>>')
        X[self.feature] = np.where(X[self.feature]==0, X[self.feature].median(), X[self.feature])
        return X

In [32]:
pipe_custom_func = make_pipeline(
                   ZeroValueTransformer('Insulin'),
                   MinMaxScaler(),
                   SVC()
                )
pipe_custom_func.fit(X_train, y_train)

Fit Method called>>>>>>>>>>>>>>
Transform Method called>>>>>>>>>>


Pipeline(steps=[('zerovaluetransformer',
                 ZeroValueTransformer(feature='Insulin')),
                ('minmaxscaler', MinMaxScaler()), ('svc', SVC())])

In [33]:
y_predict = pipe_custom_func.predict(X_test)
print('Score', pipe_custom_func.score(X_test, y_test))
print('Confusion Matrix: ', confusion_matrix(y_test, y_predict))

Transform Method called>>>>>>>>>>
Transform Method called>>>>>>>>>>
Score 0.7575757575757576
Confusion Matrix:  [[126  21]
 [ 35  49]]


# Custom Transformation of Target Variable

In [34]:
car_data = pd.read_csv('practice/car-mpg-3.csv')
car_data.head()

Unnamed: 0,mpg,cyl,disp,hp,wt,acc,yr,origin,car_type,car_name
0,18.0,8,307.0,130,3504,12.0,70,1,0,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,0,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,0,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,0,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,0,ford torino


In [35]:
car_data.drop('car_name', axis=1, inplace=True)

In [37]:
def target_transform(target):
    print('target_transform called >>>>>>>>>>>>>>')
    return np.sqrt(target)

def inverse_target_transform(target):
    print('<<<<<<<<<<<<<<<inverse target_transform called >>>>>>>>>>>>>>')
    return target**2

In [41]:
pipe_transform_target = make_pipeline(
                            LinearRegression()
                        )
model = TransformedTargetRegressor(
        regressor=pipe_transform_target,
        func=target_transform,
        inverse_func=inverse_target_transform
        )

print('fit pipeline with target transform')
model.fit(X_train, y_train)
print('Predicted Score: {}'.format(model.score(X_test,y_test)))


fit pipeline with target transform
target_transform called >>>>>>>>>>>>>>
<<<<<<<<<<<<<<<inverse target_transform called >>>>>>>>>>>>>>
target_transform called >>>>>>>>>>>>>>
<<<<<<<<<<<<<<<inverse target_transform called >>>>>>>>>>>>>>
target_transform called >>>>>>>>>>>>>>
<<<<<<<<<<<<<<<inverse target_transform called >>>>>>>>>>>>>>
Predicted Score: 0.1290751439195511


In [42]:
# instead of individual functions, lets create a class

class CustomTargetTransformer(BaseEstimator, TransformerMixin):
    def __init_(self):
        pass
    def fit(self, target):
        return target
    def transform(self, target):
        print('target_transform called >>>>>>>>>>>>>>')
        return np.sqrt(target)
    def inverse_transform(self, target):
        print('<<<<<<<<<<<<<<<inverse target_transform called >>>>>>>>>>>>>>')
        return target**2

In [43]:
pipe_transform_target = make_pipeline(
                            LinearRegression()
                        )
model = TransformedTargetRegressor(
        regressor=pipe_transform_target,
        transformer=CustomTargetTransformer(),
        check_inverse=False
        )

print('fit pipeline with target transform')
model.fit(X_train, y_train)
print('Predicted Score: {}'.format(model.score(X_test,y_test)))

fit pipeline with target transform
target_transform called >>>>>>>>>>>>>>
<<<<<<<<<<<<<<<inverse target_transform called >>>>>>>>>>>>>>
Predicted Score: 0.1290751439195511
