In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### 各个数据字段的解释 Explanation of each data field
```
Metadata:
"timestamp" - timestamp field for grouping the data
"cnt" - the count of a new bike shares
"t1" - real temperature in C
"t2" - temperature in C "feels like"
"hum" - humidity in percentage
"windspeed" - wind speed in km/h
"weathercode" - category of the weather
"isholiday" - boolean field - 1 holiday / 0 non holiday
"isweekend" - boolean field - 1 if the day is weekend
"season" - category field meteorological seasons: 0-spring ; 1-summer; 2-fall; 3-winter.

"weathe_code" category description:
1 = Clear ; mostly clear but have some values with haze/fog/patches of fog/ fog in vicinity  
2 = scattered clouds / few clouds   
3 = Broken clouds   
4 = Cloudy    
7 = Rain/ light Rain shower/ Light rain    
10 = rain with thunderstorm    
26 = snowfall    
94 = Freezing Fog
```

### 导入package，设定参数
### Import Package,Setting parameters

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib as mpl
from sklearn.model_selection import train_test_split
from pandas.plotting import register_matplotlib_converters
from matplotlib.font_manager import FontProperties

%matplotlib inline
%config InlineBackend.figure_format='retina'

register_matplotlib_converters()

# 安装中文字体，并查看wqy目录下面是否已经有wqy-zenhei.ttc文件了
!apt-get install ttf-wqy-zenhei -y
!ls /usr/share/fonts/truetype/wqy
!fc-list :lang=zh

# 显示中文或者日文的方法
font_set = FontProperties(fname="/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc", size=20) 

# 下面是sns的综合设定(sns setting )
sns.set('poster', 'whitegrid','colorblind', font_scale=0.6, 
        rc={"lines.linewidth": 1, 'grid.linestyle': '--'})
# sns.set_palette("winter", 8, 0.5)  # 这个是改变输出颜色的（this is that setting color output change）

# plt.style.use('fivethirtyeight')

rcParams['figure.figsize'] = 15, 8

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

### 导入数据
### import data

In [None]:
# index_col指定df数据集里面那个列是索引，parse_dates如果True则索引列变成datetime类型
df = pd.read_csv(
  "/kaggle/input/london-bike-sharing-dataset/london_merged.csv", 
  parse_dates=True, 
  index_col="timestamp"
)

### EDA（Exploratory Data Analysis）

In [None]:
df.info()

In [None]:
plt.figure(figsize=(16,6))
print(df.isnull().sum())
sns.heatmap(df.isnull(),cmap="viridis")
plt.suptitle("各个字段空值检测(Null value detection in each fields)",fontproperties=font_set)
plt.show()

In [None]:
df.head()

In [None]:
df['hour'] = df.index.hour
df['day_of_month'] = df.index.day
df['day_of_week'] = df.index.dayofweek
df['month'] = df.index.month

In [None]:
sns.lineplot(x=df.index, y="cnt", data=df)
plt.xticks(rotation=270)
plt.grid(True)
plt.show()

In [None]:
df.head()

In [None]:
# df_by_month1=df.resample('W').ohlc()
df_by_day1=df.resample('D').sum()
df_by_day1.head()

In [None]:
sns.lineplot(x=df_by_day1.index,y=df_by_day1['cnt'],data=df_by_day1)
plt.show()

In [None]:
# 因为‘daily'单位查看图表太紧密了，所以使用月为单位进行查看，可以看到有明显的周期性，7-8月份使用的多，冬季使用的人少，和天气季节有很高的相关性
# As 'daily' unit view is too close,so using 'month' unit implement view. could you see obviously period of season . 
# july and eight more used, winter small used, because of volumn of using that to weather and season have very high correlation.
df_by_month = df.resample('M').sum()
sns.lineplot(x=df_by_month.index, y="cnt", data=df_by_month);

In [None]:
fig,(ax1, ax2, ax3, ax4, ax5)= plt.subplots(nrows=5)
fig.set_size_inches(18, 28)

sns.pointplot(data=df, x='hour', y='cnt', ax=ax1)
sns.pointplot(data=df, x='hour', y='cnt', hue='is_holiday', ax=ax2)
sns.lineplot(data=df, x='hour',y='cnt',hue='is_holiday',color="r",ax=ax3)
sns.pointplot(data=df, x='hour', y='cnt', hue='is_weekend', ax=ax4)
sns.pointplot(data=df, x='hour', y='cnt', hue='season',ax=ax5)
plt.show()

In [None]:
fig.set_size_inches(10, 8)
sns.countplot(data=df,hue=df.is_holiday,x=df.season)

In [None]:
plt.figure(figsize=(20,6))
sns.pointplot(data=df,x=df.hum,y=df.cnt,color="black")
plt.xticks(rotation=90)
plt.title("湿度和单车使用量的关系（hum to bicycle share usage relation）",fontproperties=font_set)
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.pointplot(data=df,x=df.wind_speed,y=df.cnt)
plt.xticks(rotation=90)
plt.title("风速和单车使用量的关系(wind speed to bicycle share usage relation)",fontproperties=font_set)
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.pointplot(data=df,x=df.weather_code,y=df.cnt)
plt.xticks(rotation=90)
plt.title("天气和单车使用量的关系(weather to bicycle share usage relation)",fontproperties=font_set)
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.pointplot(x=df.hour,y=df.cnt,data=df,hue=df.weather_code)
plt.title("不同天气状况下单车使用量的关系（bicycle share usage volume in different weather）",fontproperties=font_set)
plt.show()

In [None]:
df_hum=df.copy()
for idx in df_hum.index:
  if(df_hum.loc[idx]['hum']>=20.0) & (df_hum.loc[idx]['hum']<=30.0):
    df_hum.loc[idx,'hum']=1
  elif(df_hum.loc[idx]['hum']>30.0) & (df_hum.loc[idx]['hum']<=40.0):
    df_hum.loc[idx,'hum']=2
  elif(df_hum.loc[idx]['hum']>40.0) & (df_hum.loc[idx]['hum']<=50.0):
    df_hum.loc[idx,'hum']=3
  elif(df_hum.loc[idx]['hum']>50.0) & (df_hum.loc[idx]['hum']<=60.0):
    df_hum.loc[idx,'hum']=4
  elif(df_hum.loc[idx]['hum']>60.0) & (df_hum.loc[idx]['hum']<=70.0):
    df_hum.loc[idx,'hum']=5
  elif(df_hum.loc[idx]['hum']>70.0) & (df_hum.loc[idx]['hum']<=80.0):
    df_hum.loc[idx,'hum']=6
  elif(df_hum.loc[idx]['hum']>80.0) & (df_hum.loc[idx]['hum']<=90.0):
    df_hum.loc[idx,'hum']=7
  elif(df_hum.loc[idx]['hum']>90.0):
    df_hum.loc[idx,'hum']=8

plt.figure(figsize=(16,6))
sns.pointplot(x=df_hum.hour,y=df_hum.cnt,data=df_hum,hue=df_hum.hum)
plt.title("不同湿度状况下单车使用量的关系(bicycle share usage volume in different hum)",fontproperties=font_set)
plt.show()

In [None]:
df_wind=df.copy()
for idx in df_wind.index:
  if(df_wind.loc[idx]['wind_speed']>=0.0) & (df_wind.loc[idx]['wind_speed']<=5.0):
    df_wind.loc[idx,'wind_speed']=1
  elif(df_wind.loc[idx]['wind_speed']>5.0) & (df_wind.loc[idx]['wind_speed']<=10.0):
    df_wind.loc[idx,'wind_speed']=2
  elif(df_wind.loc[idx]['wind_speed']>10.0) & (df_wind.loc[idx]['wind_speed']<=15.0):
    df_wind.loc[idx,'wind_speed']=3
  elif(df_wind.loc[idx]['wind_speed']>15.0) & (df_wind.loc[idx]['wind_speed']<=20.0):
    df_wind.loc[idx,'wind_speed']=4
  elif(df_wind.loc[idx]['wind_speed']>20.0) & (df_wind.loc[idx]['wind_speed']<=25.0):
    df_wind.loc[idx,'wind_speed']=5
  elif(df_wind.loc[idx]['wind_speed']>25.0) & (df_wind.loc[idx]['wind_speed']<=30.0):
    df_wind.loc[idx,'wind_speed']=6
  elif(df_wind.loc[idx]['wind_speed']>30.0) & (df_wind.loc[idx]['wind_speed']<=35.0):
    df_wind.loc[idx,'wind_speed']=7
  elif(df_wind.loc[idx]['wind_speed']>35.0) & (df_wind.loc[idx]['wind_speed']<=40.0):
    df_wind.loc[idx,'wind_speed']=8
  elif(df_wind.loc[idx]['wind_speed']>40.0) & (df_wind.loc[idx]['wind_speed']<=45.0):
    df_wind.loc[idx,'wind_speed']=9
  elif(df_wind.loc[idx]['wind_speed']>45.0) & (df_wind.loc[idx]['wind_speed']<=50.0):
    df_wind.loc[idx,'wind_speed']=10
  elif(df_wind.loc[idx]['wind_speed']>50.0):
    df_wind.loc[idx,'wind_speed']=11

plt.figure(figsize=(16,6))
sns.pointplot(x=df_wind.hour,y=df_wind.cnt,data=df_wind,hue=df_wind.wind_speed)
plt.title("不同风速状况下单车使用量的关系(bicycle share usage volume in different wind speed)",fontproperties=font_set)
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.countplot(data=df,x=df.day_of_week,hue=df.weather_code,palette="viridis")
plt.legend(loc="best")
plt.title("一周每天，不同天气状况下单车使用量的关系(In day of week, bicycle share usage volume in different weather)",fontproperties=font_set)
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.boxplot(data=df,x=df["hour"],y=df.cnt)
plt.title("通过箱图数据分布状态可以清晰表现出来，中位数，异常值，最大值，最小值，分位值(data analysis status could be exact show by boxplot, include middle value,max value,min value,outlier value)",fontproperties=font_set)
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.boxplot(data=df,x=df["day_of_week"],y=df.cnt)
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.boxplot(data=df,x=df["day_of_month"],y=df.cnt)
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.boxplot(data=df,x=df["month"],y=df.cnt)
plt.title("从图中可以看出4-10月份单车使用量多一些，冬季单车使用量低一些(from chart known that share bicycle more used in April until October, but small used in winter)",fontproperties=font_set)
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.boxplot(data=df,x=df["day_of_month"],y=df.cnt,hue=df["is_holiday"])
plt.title("假日和非假日的单车使用情况（share bicycle usage in holiday and not holiday）",fontproperties=font_set)
plt.show()

### 查看一下温度和单车使用数量的关系
### view temperature with share bicycle usage's relation

#### 1.查看一下温度在6.5度的时候前100个数据的t1，t2和单车使用数量的关系图
#### 1.view Top 100 data's T1,T2 with share bicycle usage's relation in temperature 6.5

In [None]:
df[:100][df[:100]['t1']==6.5]

In [None]:
df[:100][df[:100]['t2']==2.5]

In [None]:
# t1温度在2.5度单车使用数量表示，可以看出7点左右突然提高，置信区间在150到1100左右
# In temperature 2.5, share bicycle usage show, could you see suddenly rise in around 7 o'clock, confidence interval around 150 until 1100
df[:100][(df[:100]['t2']==2.5)]['cnt'].plot()
plt.grid(True)
plt.show()

In [None]:
sns.set(style="whitegrid")
plt.figure(dpi=150)
fig,(ax1,ax2)=plt.subplots(nrows=2)
sns.pointplot(data=df[:100],x='t1',y='cnt',ax=ax1)
sns.pointplot(data=df[:100],x='t2',y='cnt',ax=ax2)
plt.show()

#### 2.将t1所有的温度分组然后显示在这个温度时候单车使用数量的图表
#### 2.will temperature of all T1 to group and then show share bicycle usage volume chart when in the temperature

In [None]:
df_t1=df.groupby(['t1']).sum()
print(df_t1.head(50))
sns.barplot(data=df_t1,x=df_t1.index,y='cnt')
plt.xticks(rotation=270)
plt.title("可以看出T1温度的时候，在6度到22度之间的时候单车使用量非常高(could be seen in T1 temperature, share bicycle usage volume more in 6 until 22 temperature)",FontProperties=font_set)
plt.show()

#### 3.将t2所有的温度分组然后显示在这个温度时候单车使用数量的图表
#### 3.will T2 all temperature to group, and then show share bicycle usage volume chart when in the temperature

In [None]:
df_t2=df.groupby(['t2']).sum()
print(df_t2.head(50))
sns.barplot(data=df_t2,x=df_t2.index,y='cnt')
plt.xticks(rotation=270)
plt.title("可以看出T2温度的时候，在11度到21度之间的时候单车使用量非常高(could be seen in T2 temperature, share bicycle usage volume more in 11 until 21 temperature)",FontProperties=font_set)
plt.show()

In [None]:
fig,(ax1, ax2)= plt.subplots(nrows=2)
fig.set_size_inches(18, 14)
sns.pointplot(data=df, x='day_of_week', y='cnt', ax=ax1)
sns.pointplot(data=df, x='day_of_week', y='cnt', hue='season', ax=ax2);
plt.suptitle("一周中周日，周1，周2，周3，周4单车的使用量都比较多，周五和周六明显下降。按照季节划分夏季，秋季，春季使用的量很大，冬季使用量降低。(in sunday,monday,tuesday,wednesday,thursday that share bicycle more usage, obvious decrease in friday,saturday)",FontProperties=font_set)
plt.show()

In [None]:
fig.set_size_inches(18, 14)
df_month=df.groupby(['month']).sum()
sns.barplot(data=df_month, x=df_month.index, y='cnt')
plt.title("每个月的单车使用总量表示，可以看到4-10月使用量高，11月到2月使用量低（share bicycle usage total volume of each month show, could you seen April until October usage volume high, november until Feb usage volume down
）",fontproperties=font_set)
plt.show()

### 4.weather code categorical 查看天气因素和单车使用量的关系
### 4.weather code categorical view weather factor with share bicycle usage volume's relation
1 = Clear ; mostly clear but have some values with haze/fog/patches of fog/ fog in vicinity  
2 = scattered clouds / few clouds   
3 = Broken clouds   
4 = Cloudy    
7 = Rain/ light Rain shower/ Light rain    
10 = rain with thunderstorm    
26 = snowfall    
94 = Freezing Fog

In [None]:
df_weather=df.groupby(['weather_code']).sum()
# 降序排列cnt字段
df_weather=df_weather.sort_values('cnt',ascending=False)
index_str={1.0:'Clear',2.0:'Few Clouds',3.0:'Broken Clouds',4.0:'Cloudy',7.0:'Light rain',10.0:'Rain With Thunderstorm',26.0:'Snowfall',94.0:'Freezing Fog'}
df_weather = df_weather.rename(index=index_str)
df_weather

In [None]:
fig.set_size_inches(10,8)
sns.barplot(data=df_weather,x=df_weather.index,y='cnt')
plt.suptitle("可以看出Clear，Few Clouds，Borken Clouds的时候单车使用量很大，Light Rain，Cloudy的时候单车使用量降低",fontproperties=font_set)
plt.show()

### 5.查看风速和单车使用量的关系
### 5.view wind speed with share bicycle usage volume's relation

In [None]:
df['wind_speed'].describe()

风速分为4类  
wind speed have four kind of category
```
1类 0-8.0
2类 8.0-24.0
3类 24.0-32.0
4类 32.0-Max
```

In [None]:
df_wind=df.copy()
for idx in df_wind.index:
  if(df_wind.loc[idx]['wind_speed']>=0) & (df_wind.loc[idx]['wind_speed']<=8.0):
    df_wind.loc[idx,'wind_speed']=1
  elif(df_wind.loc[idx]['wind_speed']>8.0) & (df_wind.loc[idx]['wind_speed']<=24.0):
    df_wind.loc[idx,'wind_speed']=2
  elif(df_wind.loc[idx]['wind_speed']>24.0) & (df_wind.loc[idx]['wind_speed']<=32.0):
    df_wind.loc[idx,'wind_speed']=3
  elif(df_wind.loc[idx]['wind_speed']>32.0):
    df_wind.loc[idx,'wind_speed']=4

In [None]:
# 把转换好的wind_speed新值加入到df中去
df['wind_speed_1']=df_wind['wind_speed']

In [None]:
df_wind=df_wind.groupby(['wind_speed']).sum()
# 降序排列cnt字段
df_wind=df_wind.sort_values('cnt',ascending=False)
index_str={1:'0<=wind_speed<=8',2:'8<wind_speed<=24',3:'24<wind_speed<=32',4:'32<wind_speed'}
df_wind = df_wind.rename(index=index_str)
df_wind

In [None]:
fig.set_size_inches(10,8)
sns.barplot(data=df_wind,x=df_wind.index,y='cnt')
plt.suptitle("可以看出风速在8-24的时候单车使用量很大，在其他的时候单车使用量降低",fontproperties=font_set)
plt.show()

### 6.查看湿度和单车使用量的关系
### 6.view hum with share bicycle usage volume's relation

In [None]:
df['hum'].describe()

湿度分为4类   
hum have four kind of category
```
1类 20.0-44.0
2类 44.0-58.0
3类 58.0-86.0
4类 86.0-100.0
```

In [None]:
df_hum=df.copy()
for idx in df_hum.index:
  if(df_hum.loc[idx]['hum']>=20.0) & (df_hum.loc[idx]['hum']<=44.0):
    df_hum.loc[idx,'hum']=1
  elif(df_hum.loc[idx]['hum']>44.0) & (df_hum.loc[idx]['hum']<=58.0):
    df_hum.loc[idx,'hum']=2
  elif(df_hum.loc[idx]['hum']>58.0) & (df_hum.loc[idx]['hum']<=86.0):
    df_hum.loc[idx,'hum']=3
  elif(df_hum.loc[idx]['hum']>86.0):
    df_hum.loc[idx,'hum']=4

In [None]:
# 把转换好的hum新值加入到df中去
df['hum_1']=df_hum['hum']

In [None]:
df_hum=df_hum.groupby(['hum']).sum()
# 降序排列cnt字段
df_hum=df_hum.sort_values('cnt',ascending=False)
index_str={1:'20<=hum<=44',2:'44<hum<=58',3:'58<hum<=86',4:'86<hum'}
df_hum = df_hum.rename(index=index_str)
df_hum

In [None]:
fig.set_size_inches(10,8)
sns.barplot(data=df_hum,x=df_hum.index,y='cnt')
plt.suptitle("可以看出湿度在58-86的时候单车使用量很大，在其他的时候单车使用量降低(could be seen that share bicycle usage volume more in hum 58-86, usage volume decrease in other hum)",fontproperties=font_set)
plt.show()

### 通过数据分析总结内容
### summarized by data analysis
```
单车使用量和一天中的时间有关
	早上7点8点9点的时候和晚上16-19点的时候使用量高
单车使用量在周末和假日没有明显高峰期间
	10-14点逐渐上升，14-18点逐渐下降
单车使用量在季节和时间区间也有明显变化
	早高峰7-9点和16-19点使用量高，按照季节划分 夏季，秋季，春季使用量高，冬季使用量低
单车使用量和实际温度有关
	在6到22度实际温度的时候单车使用量很高
单车使用量和身体感知温度有关
	在11到21度之间的体感温度时候单车使用量很高
单车使用量和一周中的日期有关
	一周中周日，周1，周2，周3，周4单车的使用量都比较多，周五和周六明显下降。
单车使用量和季节有关	
	按照季节划分夏季，秋季，春季使用的量很大，冬季使用量降低。

Bicycle usage is related to the time of day
    High usage at 7: 8: 9 in the morning and 16-19: 00 in the evening
There is no obvious peak in bicycle usage during weekends and holidays
    10-14 points gradually increased, 14-18 points gradually decreased
There are also significant changes in bicycle usage in seasons and time intervals
    The morning peak usage is high at 7-9 o'clock and 16-19 o'clock, according to the season. High usage in summer, autumn and spring, low usage in winter
Bicycle usage is related to actual temperature
    Bicycle usage is high when the actual temperature is 6 to 22 degrees
Bicycle usage is related to body temperature
    Bicycle usage is high when the temperature is between 11 and 21 degrees
Bicycle usage is related to the day of the week
    The usage of bicycles is relatively high on Sundays, Week 1, Week 2, Week 3, and Week 4, and significantly decreases on Friday and Saturday.
Bicycle usage depends on the season
    According to the season, the amount used in summer, autumn and spring is very large, and the amount used in winter is lower.
```

### 准备训练数据集和测试数据集
### ready training dataset and testing dataset

In [None]:
df=df.drop(['hum','wind_speed'],axis=1)
df=df.rename(columns={'wind_speed_1':'wind_speed','hum_1':'hum'})
df

In [None]:
train_size = int(len(df) * 0.9)
test_size = len(df) - train_size
train, test = df.iloc[0:train_size], df.iloc[train_size:len(df)]
print(len(train), len(test))

### 前处理Preprocessing

In [None]:
df.head()

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap(df.corr(),cmap="YlGnBu",square=True,annot=True,linewidths=.5,center=0,linecolor="red")
plt.show()

In [None]:
from sklearn.preprocessing import RobustScaler

# f_columns = ['t1', 't2', 'hum', 'wind_speed']
f_columns = ['t1', 't2']

f_transformer = RobustScaler()
cnt_transformer = RobustScaler()

f_transformer = f_transformer.fit(train[f_columns].to_numpy())
cnt_transformer = cnt_transformer.fit(train[['cnt']])

train.loc[:, f_columns] = f_transformer.transform(train[f_columns].to_numpy())
train['cnt'] = cnt_transformer.transform(train[['cnt']])

test.loc[:, f_columns] = f_transformer.transform(test[f_columns].to_numpy())
test['cnt'] = cnt_transformer.transform(test[['cnt']])

In [None]:
fig,(ax1,ax2,ax3)=plt.subplots(nrows=3)
fig.set_size_inches(18, 14)
sns.scatterplot(train['cnt'],train.index,ax=ax1)
sns.scatterplot(train['t1'],train.index,ax=ax2)
sns.scatterplot(train['t2'],train.index,ax=ax3)
plt.suptitle("RobustScale转换后的数据分布图",fontproperties=font_set)
plt.show()

In [None]:
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)        
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)

In [None]:
time_steps = 10

# reshape to [samples, time_steps, n_features]

X_train, y_train = create_dataset(train, train.cnt, time_steps)
X_test, y_test = create_dataset(test, test.cnt, time_steps)

print(X_train.shape, y_train.shape)

In [None]:
train.shape,train.cnt.shape

### 模型构筑Modeling

In [None]:
model = keras.Sequential()
model.add(
  keras.layers.Bidirectional(
    keras.layers.LSTM(
      units=128, 
      input_shape=(X_train.shape[1], X_train.shape[2])
    )
  )
)
model.add(keras.layers.Dropout(rate=0.2))
model.add(keras.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
history = model.fit(
    X_train, y_train, 
    epochs=100, 
    batch_size=32, 
    validation_split=0.1,
    shuffle=False
)

In [None]:
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend();

### 模型预测Predicted

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_train_inv = cnt_transformer.inverse_transform(y_train.reshape(1, -1))
y_test_inv = cnt_transformer.inverse_transform(y_test.reshape(1, -1))
y_pred_inv = cnt_transformer.inverse_transform(y_pred)

In [None]:
actual = pd.DataFrame(data=y_test_inv.flatten(),columns=["actual"])
predicted = pd.DataFrame(data=y_pred_inv.flatten(),columns = ["predicted"])

In [None]:
final = pd.concat([actual,predicted],axis=1)
final.head()

### 通过使用RMSE和R2来评测结果值
### using RMSE and R2 to evaluation result value

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
rmse = np.sqrt(mean_squared_error(final.actual,final.predicted)) 
r2 = r2_score(final.actual,final.predicted) 
print("RMSE是越低越好，越接近于0越好。R2是越接近于1越好。")
print("rmse is : {}\nr2 is : {}".format(rmse,r2))

### 结果值可视化，可以发现预测值和实际值很接近
### result value visualization,could be find predicted value and actual value are very close

In [None]:
plt.plot(np.arange(0, len(y_train)), y_train_inv.flatten(), 'g', label="history")
plt.plot(np.arange(len(y_train), len(y_train) + len(y_test)), y_test_inv.flatten(), marker='.', label="true")
plt.plot(np.arange(len(y_train), len(y_train) + len(y_test)), y_pred_inv.flatten(), 'r', label="prediction")
plt.ylabel('Bike Count')
plt.xlabel('Time Step')
plt.legend()
plt.show();

In [None]:
plt.plot(y_test_inv.flatten(), marker='.', label="true")
plt.plot(y_pred_inv.flatten(), 'r', label="prediction")
plt.ylabel('Bike Count')
plt.xlabel('Time Step')
plt.legend()
plt.show();

### 结论(conclusion)

```
Conclusion :
可以看出R2的结果还是不错的，接近于1。RMSE也是不错的，结果值比较小。
可以尝试继续增加新的特征抽出，或许会继续提高精确度。

It can be seen that the result of R2 is still good, close to 1. RMSE is also
good, and the resulting value is relatively small. You can try to continue to
add new feature extractions, and perhaps continue to improve the accuracy.

```

### If you like this notebook Please Do give an Upvote.  
### Thank you very much!