# Bike Pipeline

In [104]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin

from sklearn.metrics import mean_absolute_error, mean_squared_error, root_mean_squared_error, mean_absolute_percentage_error
import pickle

In [105]:
df = pd.read_csv("bike.csv")
df.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0,8,32,40
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0,5,27,32
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0,3,10,13
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0,0,1,1


## Preprocessing

In [106]:
df["datetime"] = pd.to_datetime(df["datetime"],
                                    format="%Y-%m-%d %H:%M:%S")

In [107]:
df["year"] = df["datetime"].dt.year
df["month"] = df["datetime"].dt.month
df["day"] = df["datetime"].dt.day
df["weekday"] = df["datetime"].dt.weekday
df["hour"] = df["datetime"].dt.hour

df.sample(5)

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count,year,month,day,weekday,hour
892,2011-03-01 15:00:00,1,0,1,1,13.94,18.18,34,0.0,7,57,64,2011,3,1,1,15
4479,2011-10-18 16:00:00,4,0,1,1,25.42,31.06,57,16.9979,58,246,304,2011,10,18,1,16
4536,2011-11-02 02:00:00,4,0,1,1,12.3,16.665,87,0.0,0,2,2,2011,11,2,2,2
9992,2012-11-01 17:00:00,4,0,1,3,16.4,20.455,50,15.0013,37,652,689,2012,11,1,3,17
457,2011-02-02 03:00:00,1,0,1,3,9.02,11.365,93,8.9981,0,1,1,2011,2,2,2,3


## Log Transformation

In [108]:
df["count"] = np.log1p(df["count"])

## Feature Selection

In [109]:
features = ['season', 'holiday', 'workingday', 'weather', 'temp',
            'atemp', 'humidity', 'windspeed',
            'month', 'day', 'weekday', 'hour']
label = 'count'

X = df[features]
y = df[label]

## Train/Test Split

In [110]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

print(f"Train Shape: {X_train.shape}")
print(f"Test Shape: {X_test.shape}")

Train Shape: (8708, 12)
Test Shape: (2178, 12)


## Cyclical Transformation

In [111]:
class CyclicalTransformation(BaseEstimator, TransformerMixin):
    def __init__(self, drop=True):
        self.drop = drop
        self.columns = None
        self.new_columns = []

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_prim = X.copy()
        self.columns = X_prim.columns
        
        for col in self.columns:
            n_max = X_prim[col].max()
            sin_value = np.sin(2 * np.pi * X_prim[col] / n_max)
            cos_value = np.cos(2 * np.pi * X_prim[col] / n_max)
            self.new_columns.extend([col + "_sin", col + "_cos"])
            X_prim[col + "_sin"] = sin_value
            X_prim[col + "_cos"] = cos_value

        if self.drop:
            X_prim.drop(columns=self.columns, inplace=True)
        print("*** TRANSFORMATION ***")
        print(X_prim.columns)
        return X_prim

    def get_feature_names_out(self, input_features=None):
        return self.new_columns


class Debug(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
        
    def fit(self, X, y=None):
        print("*** DEBUG FIT ***")
        print(X.columns)
        return self

    def transform(self, X):
        print("*** DEBUG TRANSFORM ***")
        print(X.columns)
        return X

    def get_feature_names_out(self):
        pass

In [130]:
cyclical_pipe = make_pipeline(CyclicalTransformation(drop=True))

trans_cyclical = ColumnTransformer(transformers=[("scaler",
                                              StandardScaler(),
                                              ["weather", "temp", "atemp", "humidity", "windspeed"]),
                                                 ("cyclical",
                                               CyclicalTransformation(drop=True),
                                               ["season", "month", "hour", "day", "weekday"]),
                                                ],
                               remainder="passthrough",
                               force_int_remainder_cols=False)

# trans_scaler = ColumnTransformer(transformers=[
#                                              ("scaler",
#                                               StandardScaler(),
#                                               ["weather", "temp", "atemp", "humidity", "windspeed"])],
#                                remainder="passthrough",
#                                force_int_remainder_cols=False)

# to get the output of ColumnTransformer in pandas DataFrame
trans_cyclical.set_output(transform="pandas")
trans_scaler.set_output(transform="pandas")


# data = trans_cyclical.fit_transform(X_train)

# # transformer.get_feature_names_out()
# data

In [131]:
nn_params = {"random_state": 42,
             "max_iter": 1000,
             "hidden_layer_sizes": (100, 50, ),
             "n_iter_no_change": 50,
             "early_stopping": True,
             "verbose": False}

pipe = Pipeline(steps=[("transformer_1", trans_cyclical),
                       # ("transformer_2", trans_scaler),
                       ("debug", Debug()),
                       ("model", MLPRegressor(**nn_params))])
pipe

## Training

In [132]:
X_train.columns

Index(['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp',
       'humidity', 'windspeed', 'month', 'day', 'weekday', 'hour'],
      dtype='object')

In [133]:
pipe.fit(X_train, y_train)

*** TRANSFORMATION ***
Index(['season_sin', 'season_cos', 'month_sin', 'month_cos', 'hour_sin',
       'hour_cos', 'day_sin', 'day_cos', 'weekday_sin', 'weekday_cos'],
      dtype='object')
*** DEBUG FIT ***
Index(['scaler__weather', 'scaler__temp', 'scaler__atemp', 'scaler__humidity',
       'scaler__windspeed', 'cyclical__season_sin', 'cyclical__season_cos',
       'cyclical__month_sin', 'cyclical__month_cos', 'cyclical__hour_sin',
       'cyclical__hour_cos', 'cyclical__day_sin', 'cyclical__day_cos',
       'cyclical__weekday_sin', 'cyclical__weekday_cos', 'remainder__holiday',
       'remainder__workingday'],
      dtype='object')
*** DEBUG TRANSFORM ***
Index(['scaler__weather', 'scaler__temp', 'scaler__atemp', 'scaler__humidity',
       'scaler__windspeed', 'cyclical__season_sin', 'cyclical__season_cos',
       'cyclical__month_sin', 'cyclical__month_cos', 'cyclical__hour_sin',
       'cyclical__hour_cos', 'cyclical__day_sin', 'cyclical__day_cos',
       'cyclical__weekday_sin', 

# Testing

In [134]:
X_test.shape

(2178, 12)

In [135]:
X_test.head()

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,month,day,weekday,hour
3133,3,0,1,1,33.62,40.15,59,0.0,7,19,1,11
5786,1,1,0,1,4.1,6.82,54,6.0032,1,16,0,6
5224,4,0,0,1,9.84,11.365,48,12.998,12,11,6,18
8953,3,0,1,2,29.52,34.09,62,12.998,8,15,2,10
8054,2,0,1,1,25.42,31.06,53,16.9979,6,15,4,23


In [136]:
pipe.predict(X_test)

*** TRANSFORMATION ***
Index(['season_sin', 'season_cos', 'month_sin', 'month_cos', 'hour_sin',
       'hour_cos', 'day_sin', 'day_cos', 'weekday_sin', 'weekday_cos'],
      dtype='object')


ValueError: Length mismatch: Expected axis has 10 elements, new values have 20 elements