In [97]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV



In [54]:
bikes_renal = pd.read_csv('daily-bike-share.csv')
bikes_renal.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,rentals
0,1,1/1/2011,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331
1,2,1/2/2011,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131
2,3,1/3/2011,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120
3,4,1/4/2011,1,0,1,0,2,1,1,0.2,0.212122,0.590435,0.160296,108
4,5,1/5/2011,1,0,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869,82


In [86]:
print(bikes_renal.isna().sum())
X = bikes_renal.drop(['rentals'], axis=1).copy()
Y = bikes_renal['rentals'].copy()
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

instant       0
dteday        0
season        0
yr            0
mnth          0
holiday       0
weekday       0
workingday    0
weathersit    0
temp          0
atemp         0
hum           0
windspeed     0
rentals       0
dtype: int64


### Remove columns

In [56]:
numeric_features = []
categorical_features = []
target = ''

In [67]:
class TempDiff_Filler(BaseEstimator, TransformerMixin):
    def __init__(self, column='diff_temp'):
        self.column = column

    def fit(self, X, y = None ):
        return self

    def transform(self, X, y = None ):
        X_transformed = X.copy()
        column = self.column
        X_transformed[column] = (X_transformed['atemp'] - X_transformed['temp']) / X_transformed['temp']
        return X_transformed

In [59]:
class Redundant_Remover(BaseEstimator, TransformerMixin):
    def __init__(self, columns):
        self.columns = columns

    def fit(self, X, y = None ):
        return self

    def transform(self, X, y = None ):
        X_transformed = X[self.columns].copy()
        return X_transformed

In [68]:
tempDiff = TempDiff_Filler()
tempDiff.fit_transform(X=X)

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,diff_temp
0,1,1/1/2011,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,0.056537
1,2,1/2/2011,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,-0.026794
2,3,1/3/2011,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,-0.035439
3,4,1/4/2011,1,0,1,0,2,1,1,0.200000,0.212122,0.590435,0.160296,0.060610
4,5,1/5/2011,1,0,1,0,3,1,1,0.226957,0.229270,0.436957,0.186900,0.010191
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
726,727,12/27/2012,1,1,12,0,4,1,2,0.254167,0.226642,0.652917,0.350133,-0.108295
727,728,12/28/2012,1,1,12,0,5,1,2,0.253333,0.255046,0.590000,0.155471,0.006762
728,729,12/29/2012,1,1,12,0,6,0,2,0.253333,0.242400,0.752917,0.124383,-0.043157
729,730,12/30/2012,1,1,12,0,0,0,1,0.255833,0.231700,0.483333,0.350754,-0.094331


In [71]:
num_preparation = Pipeline(steps=[
    ('temp_diff_fill', TempDiff_Filler()),
    ('fill_missings', SimpleImputer(strategy='mean'))
])

In [62]:
all_preparation = Pipeline(steps=[
    ('del_redundant', Redundant_Remover(columns=(numeric_features + categorical_features)))
])

In [76]:
numeric_features = ['temp', 'atemp', 'hum', 'windspeed']
categorical_features = ['season','mnth','holiday','weekday','workingday','weathersit']
target = 'rentals'

In [102]:
data_preparation = ColumnTransformer(transformers=[
    ('numeric_preprocessing', num_preparation, ['temp', 'atemp', 'hum', 'windspeed']),
    # ('categorical_preprocessing', cat_preparation, categorical_features), - no category preprocess
    ('all_preprocessing', all_preparation,  ['temp', 'diff_temp', 'hum', 'windspeed','season','mnth','holiday','weekday','workingday','weathersit']),
   ])

In [103]:
model_pipeline_v1 = Pipeline(steps=[('preprocessor', data_preparation),
                                    ('model', LinearRegression())])

In [1]:
model_pipeline_v1.fit(X_train, y_train)

NameError: name 'model_pipeline_v1' is not defined