# bikeレンタルの特徴量の解釈
https://christophm.github.io/interpretable-ml-book/limo.html

In [None]:
import os
import copy
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn import linear_model
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder
from pandas import Series, DataFrame
import pandas as pd
%matplotlib inline
os.getcwd()

In [None]:
data_dir = '../../data/bike'
day_file = 'day.csv'
hour_file = 'hour.csv'

## データの意味

- cnt:casualとregisteredのユーザーの数。回帰のターゲット
- season:spring, summer, fall or winter.
- holiday
- year: 2011 or 2012.
- dateday:2011/01/01からの日数。この特料量は時間経過によるトレンドを導くのに使用する。
- working day or weekend.
- 天気:
    - 1 clear, few clouds, partly cloudy, cloudy
    - 2 mist + clouds, mist + broken clouds, mist + few clouds, mist
    - 3 light snow, light rain + thunderstorm + scattered clouds, light rain + scattered clouds
    - 4 heavy rain + ice pallets + thunderstorm + mist, snow + mist
- temp：Temperature in degrees Celsius.41で割り、正規化されている。
- atemp：体感温度。50で割り、正規化されている。
- hum:湿度(0 to 100). 100で割り正規化されている。
- Wind speed: km/hour.　67で割り、正規化されている。


In [None]:
df_day = pd.read_csv(os.path.join(data_dir, day_file))
df_day


In [None]:
# データを取り出す
Y = df_day['cnt']
X_season = df_day['season'] 

# -----------------------------------------------------------
# 季節の変化
Y = np.c_[Y]
X_season = np.c_[X_season]

enc = LabelBinarizer()
onehot_X = enc.fit_transform(X_season)

X_spring = onehot_X[:,0].reshape(-1, 1)
X_summer = onehot_X[:,1].reshape(-1, 1)
X_fall = onehot_X[:,2].reshape(-1, 1)
X_winter = onehot_X[:,3].reshape(-1, 1)

# -----------------------------------------------------------
# 休日
X_holiday = df_day['holiday']
X_holiday = np.c_[X_holiday]

ohe = OneHotEncoder(sparse=False)
onehot_X = ohe.fit_transform(X_holiday+1)
X_workingday = onehot_X[:,0].reshape(-1, 1)
X_holiday = X_holiday.reshape(-1, 1)

# -----------------------------------------------------------
# 天気
X_weather = df_day['weathersit'] 
X_weather = np.c_[X_weather]

## コピーの作成
X_weathersitMISTY = X_weather.copy()
X_weathersitMISTY[X_weathersitMISTY != 2] = 0
X_weathersitMISTY[X_weathersitMISTY == 2] = 1

## コピーの作成
weathersitRAIN_SNOW_STORM = X_weather.copy()
is_wheater_RAIN_SNOW_STORM = np.logical_or(weathersitRAIN_SNOW_STORM == 3, weathersitRAIN_SNOW_STORM == 4)

weathersitRAIN_SNOW_STORM[np.logical_not(is_wheater_RAIN_SNOW_STORM)] = 0
weathersitRAIN_SNOW_STORM[is_wheater_RAIN_SNOW_STORM] = 1

# 温度、湿度、風速、経過日数
X_temp = df_day['temp']  * 41
X_hum = df_day['hum'] * 100
X_windspeed = df_day['windspeed'] * 47

X_temp = np.c_[X_temp]
X_hum = np.c_[X_hum]
X_windspeed = np.c_[X_windspeed]
X_day = np.arange(X_temp.shape[0]).reshape(-1, 1)


In [None]:
# 回帰係数の標準誤差による t 検定
#def 

In [None]:
# 処理の順番
all_X = [X_spring, X_summer,X_fall,X_winter, X_workingday, X_holiday, X_weathersitMISTY, weathersitRAIN_SNOW_STORM, X_temp, X_hum, X_windspeed,X_day]
# all_X = [X_temp, ]
all_models = []
all_weights = []

for X in all_X:
    model = linear_model.LinearRegression()
    model.fit(X, Y)
    all_models.append(copy.deepcopy(model))
    all_weights.append(model.coef_)

In [None]:
# データの確認
# fig = plt.figure()
#ax = fig.add_subplot(1,1,1)
#ax.scatter(X,Y)

#ax.set_title('first scatter plot')
#ax.set_xlabel('x')
#ax.set_ylabel('y')

#fig.show()

In [None]:
all_weights

In [None]:
# 線形回帰
from sklearn import linear_model
model = linear_model.LinearRegression()

model.fit(X_summer, Y)

In [None]:
model.predict([[0.5]])

In [None]:
model.coef_