# Bike v. 2

In [153]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline

In [154]:
bike = pd.read_csv('../data/train.csv', index_col=0, parse_dates=True)
bike.head()

Unnamed: 0_level_0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16
2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0,8,32,40
2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0,5,27,32
2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0,3,10,13
2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0,0,1,1


In [155]:
bike.shape

(10886, 11)

In [156]:
# splitting the data 
X = bike.drop(['casual', 'registered', 'count'], axis = 1)
y = bike['count']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [157]:
# transform the datetime into hour, day, month and a one-hot-encoded year 
def transform_date_in_columns(X):
    result = pd.DataFrame()
    result['hour'] = X.index.hour / 24
    result['day'] = X.index.day
    result['month'] = X.index.month / 12
    result['2012'] = X.index.year - 2011

    return result[['hour', 'day', 'month', '2012']]

In [158]:
# Column Transformer 
linear_transformer = ColumnTransformer([
    ('pass the date', FunctionTransformer(transform_date_in_columns), X_train.columns),
    ('weather onehot', OneHotEncoder(sparse=False, handle_unknown='ignore'), ['weather', 'season']),
    ('passthrough', 'passthrough', ['workingday', 'holiday' ]),
    ('scale', MinMaxScaler(), ['temp', 'atemp', 'humidity', 'windspeed']),
])

In [159]:
# pipeline for transforming the X_train
def train_and_fit_X(X, fit = False):
    if fit: 
        linear_transformer.fit(X)
    X_trans = linear_transformer.transform(X)
    X_trans = pd.DataFrame(X_trans)
    
    X_trans.columns = ['hour', 'day', 'month', '2012',
                       'weather1', 'weather2', 'weather3', 'weather4',
                       'season1', 'season2', 'season3', 'season4',
                       'workingday', 'holiday', 'temp', 'atemp', 'humidity', 'windspeed'
                      ]
    
   
    return X_trans
#data = X.join(pd.Series(y, name='prices'))

In [160]:
X_train_trans = train_and_fit_X(X_train, True)
X_train_trans.head(3)

Unnamed: 0,hour,day,month,2012,weather1,weather2,weather3,weather4,season1,season2,season3,season4,workingday,holiday,temp,atemp,humidity,windspeed
0,0.208333,6.0,0.583333,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.673469,0.694932,0.83,0.105325
1,0.666667,4.0,0.666667,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.897959,0.898311,0.39,0.350888
2,0.625,11.0,0.583333,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.795918,0.779617,0.41,0.298225


In [161]:
#from sklearn.linear_model import LinearRegression as OLS
from statsmodels.regression.linear_model import OLS

In [162]:
X_train_trans.index = X_train.index
linear_reg = OLS(y_train, X_train_trans)
#y_train.shape, X_train_trans.shape

In [163]:
result = linear_reg.fit()

In [164]:
result.summary()

0,1,2,3
Dep. Variable:,count,R-squared:,0.396
Model:,OLS,Adj. R-squared:,0.395
Method:,Least Squares,F-statistic:,356.6
Date:,"Thu, 15 Oct 2020",Prob (F-statistic):,0.0
Time:,16:30:14,Log-Likelihood:,-55427.0
No. Observations:,8708,AIC:,110900.0
Df Residuals:,8691,BIC:,111000.0
Df Model:,16,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
hour,185.8777,5.593,33.232,0.000,174.914,196.842
day,0.1463,0.276,0.529,0.597,-0.395,0.688
month,113.3858,22.553,5.028,0.000,69.177,157.594
2012,80.7323,3.047,26.492,0.000,74.759,86.706
weather1,-30.6095,19.352,-1.582,0.114,-68.544,7.325
weather2,-24.8398,19.650,-1.264,0.206,-63.358,13.678
weather3,-60.1826,20.286,-2.967,0.003,-99.948,-20.418
weather4,30.6308,123.498,0.248,0.804,-211.455,272.716
season1,-2.8292,18.681,-0.151,0.880,-39.448,33.790

0,1,2,3
Omnibus:,1724.706,Durbin-Watson:,2.002
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3351.685
Skew:,1.204,Prob(JB):,0.0
Kurtosis:,4.853,Cond. No.,8.87e+16


### Through away:

- day
- season 1 - 4
- atemp 


In [165]:
X_train_trans_1 = X_train_trans.drop(['day', 'season1', 'season2', 'season3', 'season4', 'atemp'], axis = 1)
X_train_trans_1.head()

Unnamed: 0_level_0,hour,month,2012,weather1,weather2,weather3,weather4,workingday,holiday,temp,humidity,windspeed
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2011-07-06 05:00:00,0.208333,0.583333,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.673469,0.83,0.105325
2012-08-04 16:00:00,0.666667,0.666667,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.897959,0.39,0.350888
2012-07-11 15:00:00,0.625,0.583333,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.795918,0.41,0.298225
2011-04-10 04:00:00,0.166667,0.333333,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.346939,0.93,0.12284
2011-11-19 10:00:00,0.416667,0.916667,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.306122,0.45,0.298225


In [166]:
linear_reg_1 = OLS(y_train, X_train_trans_1)
result_1 = linear_reg_1.fit()

result_1.summary()

0,1,2,3
Dep. Variable:,count,R-squared:,0.388
Model:,OLS,Adj. R-squared:,0.387
Method:,Least Squares,F-statistic:,501.1
Date:,"Thu, 15 Oct 2020",Prob (F-statistic):,0.0
Time:,16:30:16,Log-Likelihood:,-55487.0
No. Observations:,8708,AIC:,111000.0
Df Residuals:,8696,BIC:,111100.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
hour,190.6797,5.578,34.185,0.000,179.746,201.614
month,91.5339,5.672,16.138,0.000,80.415,102.652
2012,82.0584,3.058,26.835,0.000,76.064,88.053
weather1,-0.2647,9.317,-0.028,0.977,-18.529,17.999
weather2,8.0890,10.349,0.782,0.434,-12.197,28.375
weather3,-28.9916,12.364,-2.345,0.019,-53.228,-4.755
weather4,51.4521,142.194,0.362,0.717,-227.281,330.185
workingday,0.4783,3.372,0.142,0.887,-6.132,7.088
holiday,-12.8104,9.331,-1.373,0.170,-31.101,5.480

0,1,2,3
Omnibus:,1696.914,Durbin-Watson:,2.002
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3249.221
Skew:,1.194,Prob(JB):,0.0
Kurtosis:,4.803,Cond. No.,149.0


#### thinking about data:
- when the baseline model is weekend -> working day is not significant

## interaction terms 

In [167]:
X_train_trans_2 = X_train_trans_1[:]

In [168]:
X_train_trans_2['hour_on_a_workingday'] = X_train_trans_2.hour * X_train_trans_2.workingday

X_train_trans_2['hour_on_a_not_workingday'] = X_train_trans_2.hour[X_train_trans_2.workingday == 0]
X_train_trans_2['hour_on_a_not_workingday'] = X_train_trans_2['hour_on_a_not_workingday'].fillna(0)
X_train_trans_2['hour**2'] = X_train_trans_2.hour**2
X_train_trans_2['hour**3'] = X_train_trans_2.hour**3
X_train
X_train_trans_2 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train_trans_2['hour_on_a_workingday'] = X_train_trans_2.hour * X_train_trans_2.workingday
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train_trans_2['hour_on_a_not_workingday'] = X_train_trans_2.hour[X_train_trans_2.workingday == 0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train_trans

Unnamed: 0_level_0,hour,month,2012,weather1,weather2,weather3,weather4,workingday,holiday,temp,humidity,windspeed,hour_on_a_workingday,hour_on_a_not_workingday,hour**2,hour**3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2011-07-06 05:00:00,0.208333,0.583333,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.673469,0.83,0.105325,0.208333,0.000000,0.043403,0.009042
2012-08-04 16:00:00,0.666667,0.666667,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.897959,0.39,0.350888,0.000000,0.666667,0.444444,0.296296
2012-07-11 15:00:00,0.625000,0.583333,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.795918,0.41,0.298225,0.625000,0.000000,0.390625,0.244141
2011-04-10 04:00:00,0.166667,0.333333,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.346939,0.93,0.122840,0.000000,0.166667,0.027778,0.004630
2011-11-19 10:00:00,0.416667,0.916667,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.306122,0.45,0.298225,0.000000,0.416667,0.173611,0.072338
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012-01-14 02:00:00,0.083333,0.083333,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.47,0.193018,0.000000,0.083333,0.006944,0.000579
2011-12-10 09:00:00,0.375000,1.000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.265306,0.61,0.333373,0.000000,0.375000,0.140625,0.052734
2011-12-18 16:00:00,0.666667,1.000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.265306,0.48,0.298225,0.000000,0.666667,0.444444,0.296296
2011-02-19 07:00:00,0.291667,0.166667,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.367347,0.17,0.614083,0.000000,0.291667,0.085069,0.024812


In [169]:
linear_reg_2 = OLS(y_train, X_train_trans_2)
result_2 = linear_reg_2.fit()

result_2.summary()

0,1,2,3
Dep. Variable:,count,R-squared:,0.534
Model:,OLS,Adj. R-squared:,0.533
Method:,Least Squares,F-statistic:,711.6
Date:,"Thu, 15 Oct 2020",Prob (F-statistic):,0.0
Time:,16:30:18,Log-Likelihood:,-54300.0
No. Observations:,8708,AIC:,108600.0
Df Residuals:,8693,BIC:,108700.0
Df Model:,14,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
hour,-91.5707,28.565,-3.206,0.001,-147.566,-35.576
month,77.6997,4.958,15.673,0.000,67.982,87.418
2012,87.5304,2.671,32.770,0.000,82.294,92.766
weather1,-104.4780,9.440,-11.068,0.000,-122.983,-85.973
weather2,-120.9045,10.329,-11.705,0.000,-141.152,-100.657
weather3,-175.2545,11.936,-14.682,0.000,-198.653,-151.856
weather4,-205.6573,124.221,-1.656,0.098,-449.161,37.846
workingday,-7.7465,5.575,-1.390,0.165,-18.675,3.182
holiday,-11.6612,8.144,-1.432,0.152,-27.624,4.302

0,1,2,3
Omnibus:,2094.443,Durbin-Watson:,2.017
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4994.477
Skew:,1.339,Prob(JB):,0.0
Kurtosis:,5.568,Cond. No.,1.73e+16


# testing with test_data

In [170]:
X_train_trans_2.head()

Unnamed: 0_level_0,hour,month,2012,weather1,weather2,weather3,weather4,workingday,holiday,temp,humidity,windspeed,hour_on_a_workingday,hour_on_a_not_workingday,hour**2,hour**3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2011-07-06 05:00:00,0.208333,0.583333,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.673469,0.83,0.105325,0.208333,0.0,0.043403,0.009042
2012-08-04 16:00:00,0.666667,0.666667,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.897959,0.39,0.350888,0.0,0.666667,0.444444,0.296296
2012-07-11 15:00:00,0.625,0.583333,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.795918,0.41,0.298225,0.625,0.0,0.390625,0.244141
2011-04-10 04:00:00,0.166667,0.333333,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.346939,0.93,0.12284,0.0,0.166667,0.027778,0.00463
2011-11-19 10:00:00,0.416667,0.916667,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.306122,0.45,0.298225,0.0,0.416667,0.173611,0.072338


In [171]:
X_test.head()

Unnamed: 0_level_0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2011-07-19 11:00:00,3,0,1,1,33.62,40.15,59,0.0
2012-01-16 06:00:00,1,1,0,1,4.1,6.82,54,6.0032
2011-12-11 18:00:00,4,0,0,1,9.84,11.365,48,12.998
2012-08-15 10:00:00,3,0,1,2,29.52,34.09,62,12.998
2012-06-15 23:00:00,2,0,1,1,25.42,31.06,53,16.9979


# TODO
- workingday and holiday interation -> not relevant
- reg on boxcox

In [172]:
X_train[(X_train.holiday == 1) & (X_train.workingday == 1)]

Unnamed: 0_level_0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


In [173]:
from sklearn.preprocessing import PowerTransformer

In [174]:
pt = PowerTransformer(method='box-cox', standardize=False)

In [175]:
y_boxcox = y_train

In [176]:
linear_reg_3 = OLS(y_boxcox, X_train_trans_2)
result_3 = linear_reg_3.fit()

result_3.summary()

0,1,2,3
Dep. Variable:,count,R-squared:,0.534
Model:,OLS,Adj. R-squared:,0.533
Method:,Least Squares,F-statistic:,711.6
Date:,"Thu, 15 Oct 2020",Prob (F-statistic):,0.0
Time:,16:30:23,Log-Likelihood:,-54300.0
No. Observations:,8708,AIC:,108600.0
Df Residuals:,8693,BIC:,108700.0
Df Model:,14,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
hour,-91.5707,28.565,-3.206,0.001,-147.566,-35.576
month,77.6997,4.958,15.673,0.000,67.982,87.418
2012,87.5304,2.671,32.770,0.000,82.294,92.766
weather1,-104.4780,9.440,-11.068,0.000,-122.983,-85.973
weather2,-120.9045,10.329,-11.705,0.000,-141.152,-100.657
weather3,-175.2545,11.936,-14.682,0.000,-198.653,-151.856
weather4,-205.6573,124.221,-1.656,0.098,-449.161,37.846
workingday,-7.7465,5.575,-1.390,0.165,-18.675,3.182
holiday,-11.6612,8.144,-1.432,0.152,-27.624,4.302

0,1,2,3
Omnibus:,2094.443,Durbin-Watson:,2.017
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4994.477
Skew:,1.339,Prob(JB):,0.0
Kurtosis:,5.568,Cond. No.,1.73e+16


In [337]:
# transform the datetime into hour, day, month and a one-hot-encoded year 
def transform_date_in_columns2(X):
    X_copy = X.iloc[:,:]
    result = pd.DataFrame()

    result['hour'] = X.index.hour / 24
    result['month'] = X.index.month / 12
    result['2012'] = X.index.year - 2011
    result['hour**2'] = result.hour ** 2
    result['hour**3'] = result.hour ** 3
    
    result['hour_working_day'] = (X.workingday * X.index.hour/24).values
    return result

In [338]:
def square(df):
    result = df[df.columns[0]] ** 2
    return result 
    
    '''def square_column(df):
    df['new'] = df[df.columns[0]] ** 2
    return df[['new']]
    '''
    

In [339]:
# Column Transformer 
linear_transformer2 = ColumnTransformer([
    ('pass the date', FunctionTransformer(transform_date_in_columns2), X_train.columns),
    #('weather onehot', OneHotEncoder(sparse=False, handle_unknown='ignore'), ['weather']),
    #('passthrough', 'passthrough', ['workingday', 'holiday' ]),
    #('scale', MinMaxScaler(), ['temp',  'humidity', 'windspeed']),
])

In [340]:
# pipeline for transforming the X_train
def train_and_fit_X_2(X, fit = False):
    if fit: 
        linear_transformer2.fit(X)
    X_trans = linear_transformer2.transform(X)
    X_trans = pd.DataFrame(X_trans)
    
    X_trans.columns = ['hour', 'month', '2012', 'hour**2', 'hour**3', 
                       'workhour',
                      # 'weather1', 'weather2', 'weather3', 'weather4',
                      # 'workingday', 'holiday', 'temp', 'humidity', 'windspeed'
                      ]
    
   
    return X_trans

In [341]:
X_train_trans_2.columns, X_test.columns

(Index(['hour', 'month', '2012', 'weather1', 'weather2', 'weather3', 'weather4',
        'workingday', 'holiday', 'temp', 'humidity', 'windspeed',
        'hour_on_a_workingday', 'hour_on_a_not_workingday', 'hour**2',
        'hour**3'],
       dtype='object'),
 Index(['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp',
        'humidity', 'windspeed'],
       dtype='object'))

In [342]:
X_train_trans_3 = train_and_fit_X_2(X_train, True)

In [343]:
X_train_trans_3

Unnamed: 0,hour,month,2012,hour**2,hour**3,workhour
0,0.208333,0.583333,0.0,0.043403,0.009042,0.208333
1,0.666667,0.666667,1.0,0.444444,0.296296,0.000000
2,0.625000,0.583333,1.0,0.390625,0.244141,0.625000
3,0.166667,0.333333,0.0,0.027778,0.004630,0.000000
4,0.416667,0.916667,0.0,0.173611,0.072338,0.000000
...,...,...,...,...,...,...
8703,0.083333,0.083333,1.0,0.006944,0.000579,0.000000
8704,0.375000,1.000000,0.0,0.140625,0.052734,0.000000
8705,0.666667,1.000000,0.0,0.444444,0.296296,0.000000
8706,0.291667,0.166667,0.0,0.085069,0.024812,0.000000


In [334]:
X_train_trans_2.head(1)

Unnamed: 0_level_0,hour,month,2012,weather1,weather2,weather3,weather4,workingday,holiday,temp,humidity,windspeed,hour_on_a_workingday,hour_on_a_not_workingday,hour**2,hour**3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2011-07-06 05:00:00,0.208333,0.583333,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.673469,0.83,0.105325,0.208333,0.0,0.043403,0.009042


In [335]:
X_train.head()

Unnamed: 0_level_0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2011-07-06 05:00:00,3,0,1,1,27.88,31.82,83,6.0032
2012-08-04 16:00:00,3,0,0,1,36.9,40.91,39,19.9995
2012-07-11 15:00:00,3,0,1,1,32.8,35.605,41,16.9979
2011-04-10 04:00:00,2,0,0,2,14.76,18.18,93,7.0015
2011-11-19 10:00:00,4,0,0,1,13.12,15.15,45,16.9979


In [336]:
X_train.workingday * X_train.index.hour / 24

datetime
2011-07-06 05:00:00    0.208333
2012-08-04 16:00:00    0.000000
2012-07-11 15:00:00    0.625000
2011-04-10 04:00:00    0.000000
2011-11-19 10:00:00    0.000000
                         ...   
2012-01-14 02:00:00    0.000000
2011-12-10 09:00:00    0.000000
2011-12-18 16:00:00    0.000000
2011-02-19 07:00:00    0.000000
2012-05-02 07:00:00    0.291667
Length: 8708, dtype: float64

In [326]:
X_train.index.hour / 24

Float64Index([0.20833333333333334,  0.6666666666666666,               0.625,
              0.16666666666666666,  0.4166666666666667,  0.7916666666666666,
               0.8333333333333334,                 0.5,  0.9166666666666666,
              0.16666666666666666,
              ...
                            0.125,  0.5416666666666666,  0.4583333333333333,
                              0.5,  0.2916666666666667, 0.08333333333333333,
                            0.375,  0.6666666666666666,  0.2916666666666667,
               0.2916666666666667],
             dtype='float64', name='datetime', length=8708)

In [344]:
(X.workingday * X.index.hour/24).values


array([0.        , 0.        , 0.        , ..., 0.875     , 0.91666667,
       0.95833333])

In [345]:
X.index.hour

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
            ...
            14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
           dtype='int64', name='datetime', length=10886)

In [346]:
X.workingday

datetime
2011-01-01 00:00:00    0
2011-01-01 01:00:00    0
2011-01-01 02:00:00    0
2011-01-01 03:00:00    0
2011-01-01 04:00:00    0
                      ..
2012-12-19 19:00:00    1
2012-12-19 20:00:00    1
2012-12-19 21:00:00    1
2012-12-19 22:00:00    1
2012-12-19 23:00:00    1
Name: workingday, Length: 10886, dtype: int64