In [None]:
# import the necessary package

import numpy as np
import pandas as pd
import tensorflow as tf
import zipfile
import os

#digunakan untuk membuat model dan data preprocessing
from keras.models import Sequential
from keras.layers import Dense, Input, Dropout, LSTM, Activation, Embedding, Bidirectional
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler


#untuk menampilkan gambar
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
dataset = pd.read_csv ('/kaggle/input/seoul-bike-sharing-demand-prediction/SeoulBikeData.csv')

In [None]:
dataset.head()

In [None]:
print ('Shape dataset')
print (dataset.shape)
print ('\n')
print ('Info Dataset')
print (dataset.info())
print ('\n')
print ('See if any missing value of Dataset')
print (dataset.isna().sum())

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from pylab import rcParams
sns.set_style("darkgrid")

import warnings
warnings.filterwarnings("ignore")

In [None]:
sns.countplot(x="Seasons",data=dataset)

In [None]:
sns.countplot(x="Holiday", data=dataset)

In [None]:
dataset['Date'] = pd.to_datetime(dataset['Date'])

**Let's extract new columns (day of the week, day of the month, hour, month, season, year etc.) by using new index.**

In [None]:
dataset['Year'] = dataset['Date'].dt.year
dataset['Month'] = dataset['Date'].dt.month
dataset['Day'] = dataset['Date'].dt.day
dataset.tail()

In [None]:
sns.countplot(x="Year", data=dataset)

In [None]:
sns.countplot(x="Month", data=dataset)

Plot bike shares by months and year_of_month to understand the correlation between bike shares and months

In [None]:
Years = dataset.groupby("Year").sum().reset_index()
plt.figure(figsize=(16,4), dpi=150)
sns.barplot(x="Year", y="Rented Bike Count", data=Years)
plt.xticks(rotation=90);

In [None]:
Month = dataset.groupby("Month").sum().reset_index()
plt.figure(figsize=(16,4), dpi=150)
sns.barplot(x="Month", y="Rented Bike Count", data=Month)
plt.xticks(rotation=90);

In [None]:
Day = dataset.groupby("Day").sum().reset_index()
plt.figure(figsize=(16,4), dpi=150)
sns.barplot(x="Day", y="Rented Bike Count", data=Day)
plt.xticks(rotation=90);

In [None]:
Season = dataset.groupby("Seasons").sum().reset_index()
plt.figure(figsize=(16,4), dpi=150)
sns.barplot(x="Seasons", y="Rented Bike Count", data=Season)
plt.xticks(rotation=90);

In [None]:
plt.figure(figsize=(12,4), dpi=100)
sns.lineplot(data=dataset, x="Hour", y="Rented Bike Count", hue="Seasons")

In [None]:
plt.figure(figsize=(12,8), dpi=150)
sns.heatmap(dataset.corr(),annot=True)

In [None]:
plt.figure(figsize=(2,4), dpi=150)
sns.heatmap(dataset.corr()[["Rented Bike Count"]].sort_values
            (by="Rented Bike Count", ascending=False)[1:],annot=True)

In [None]:
fig, axs = plt.subplots(nrows=4,ncols=1,figsize=(12,10), dpi=100)
sns.pointplot(data=dataset, x="Hour", y="Rented Bike Count", ax=axs[0])
sns.pointplot(data=dataset, x="Hour", y="Rented Bike Count", ax=axs[1], 
              hue="Holiday")
sns.pointplot(data=dataset, x="Hour", y="Rented Bike Count", ax=axs[2], 
              hue="Functioning Day")
sns.pointplot(data=dataset, x="Hour", y="Rented Bike Count", ax=axs[3], 
              hue="Seasons")
plt.tight_layout()

**Predicting the data using LSTM**

In [None]:
#change the index
dataset.index = pd.to_datetime(dataset.index)

Normalization dataset using Minmax Scalar

In [None]:
MinMax_scaler = MinMaxScaler(feature_range=(0, 1))
dataset['Rented Bike Count'] = MinMax_scaler.fit_transform(dataset['Rented Bike Count'].values.reshape(-1, 1))

In [None]:
Rented = dataset['Rented Bike Count'].to_numpy()
dates = dataset.index

date_train, date_test, rented_train, rented_test = train_test_split (dates, Rented,test_size = 0.2, random_state=25)

In [None]:
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda w: (w[:-1], w[-1:]))
    return ds.batch(batch_size).prefetch(1)

In [None]:
train_set = windowed_dataset(rented_train, window_size=60, batch_size=100, shuffle_buffer=1000)
val_set = windowed_dataset(rented_test, window_size=60, batch_size=100, shuffle_buffer=1000)

In [None]:
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('mae')<0.01 
       and 
       logs.get('val_mae')<0.01):
      print("\n Mean Absolute Error <10%!")
      self.model.stop_training = True
callbacks = myCallback()

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.LSTM(10, return_sequences=True),
  tf.keras.layers.Dropout(rate=0.1),
  tf.keras.layers.LSTM(10,return_sequences=False),
  tf.keras.layers.Dense(30, activation="relu"),
  tf.keras.layers.Dropout(rate=0.1),
  tf.keras.layers.Dense(10, activation="relu"),
  tf.keras.layers.Dense(1),
])

In [None]:
optimizer = tf.keras.optimizers.SGD(learning_rate=1.00e-05, momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

In [None]:
Training = model.fit(train_set, epochs=25, steps_per_epoch= 10,
                    batch_size= 62, validation_data=val_set, 
                    verbose=2, callbacks=[callbacks])

In [None]:
plt.figure()
plt.subplot(211)
plt.plot(Training.history['mae'])
plt.plot(Training.history['val_mae'])
plt.title('Model Mean Absolute Error (MAE)')
plt.ylabel('MAE')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

plt.subplot(212)
plt.plot(Training.history['loss'])
plt.plot(Training.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()