In [None]:
import pickle
import pandas as pd
import tensorflow as tf

### 2.1 Pobieranie danych

In [None]:
tf.keras.utils.get_file(
"bike_sharing_dataset.zip",
"https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip",
cache_dir=".",
extract=True
)

### 2.2 Przygotowanie danych

In [None]:
df = pd.read_csv('datasets/hour.csv',
                 parse_dates={'datetime': ['dteday', 'hr']},
                 date_format='%Y-%m-%d %H',
                 index_col='datetime')

In [None]:
df = df[['casual', 'registered', 'cnt', 'temp', 'atemp', 'hum', 'windspeed', 'holiday', 'weekday', 'workingday', 'weathersit']]

In [None]:
df = df.asfreq('h')

In [None]:
df[['casual', 'registered', 'cnt']] = df[['casual', 'registered', 'cnt']].fillna(0)
df[['temp', 'atemp', 'hum', 'windspeed']] = df[['temp', 'atemp', 'hum', 'windspeed']].interpolate()
df[['holiday', 'weekday', 'workingday', 'weathersit']] = df[['holiday', 'weekday', 'workingday', 'weathersit']].ffill()

In [None]:
df.notna().sum()

In [None]:
df[['casual', 'registered', 'cnt', 'weathersit']].describe()

In [None]:
df.casual /= 1e3
df.registered /= 1e3
df.cnt /= 1e3
df.weathersit /= 4

In [None]:
df_2weeks = df[:24 * 7 * 2]
df_2weeks[['casual', 'registered', 'cnt', 'temp']].plot(figsize=(10, 3))

In [None]:
df_daily = df.resample('W').mean()
df_daily[['casual', 'registered', 'cnt', 'temp']].plot(figsize=(10, 3))


### 2.3 Wskaźniki bazowe

In [None]:
df_sh_d = df['cnt'].shift(24)
df_sh_w = df['cnt'].shift(24*7)

In [None]:
mae_daily = (df['cnt'] - df_sh_d).abs().mean() * 1e3
mae_weekly = (df['cnt'] - df_sh_w).abs().mean() * 1e3

In [None]:
with open('mae_baseline.pkl', 'wb') as f:
    pickle.dump((mae_daily, mae_weekly), f)

### 2.4 Predykcja przy pomocy sieci gęstej

In [None]:
cnt_train = df['cnt']['2011-01-01 00:00':'2012-06-30 23:00']
cnt_valid = df['cnt']['2012-07-01 00:00':]

In [None]:
seq_len = 1 * 24

train_ds = tf.keras.utils.timeseries_dataset_from_array(
    cnt_train.to_numpy(),
    targets=cnt_train[seq_len:],
    sequence_length=seq_len,
    batch_size=32,
    shuffle=True,
    seed=42
)

valid_ds = tf.keras.utils.timeseries_dataset_from_array(
    cnt_valid.to_numpy(),
    targets=cnt_valid[seq_len:],
    sequence_length=seq_len,
    batch_size=32
)

In [None]:
model = tf.keras.Sequential([
tf.keras.layers.Dense(1, input_shape=[seq_len])
])


In [None]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)
loss = tf.keras.losses.Huber(delta=1.0)

In [None]:
model.compile(optimizer=optimizer,
              loss=loss, 
              metrics=['mae']
)

In [None]:
history = model.fit(train_ds,
                    validation_data=valid_ds,
                    epochs=20,
                    batch_size=32)

In [None]:
val_loss, val_mae = model.evaluate(valid_ds)

In [None]:
with open('mae_linear.pkl', 'wb') as f:
    pickle.dump((val_mae,), f)
    
model.save('model_linear.h5')

### 2.5 Prosta sieć rekurencyjna

In [None]:
model2 = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(1, input_shape=[None, 1])
])

In [None]:
optimizer2 = tf.keras.optimizers.Adam(learning_rate=0.005)

In [None]:
model2.compile(optimizer=optimizer2,
              loss='mean_squared_error',
              metrics=['mae']
)

In [None]:
history2 = model2.fit(train_ds,
                      validation_data=valid_ds,
                      epochs=20,
                      batch_size=32)

In [None]:
val_loss2, val_mae2 = model2.evaluate(valid_ds)

In [None]:
with open('mae_rnn1.pkl', 'wb') as f:
    pickle.dump((val_mae2,), f)
    
model2.save('model_rnn1.h5')

In [None]:
model3 = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(32, input_shape=[None, 1]),
    tf.keras.layers.Dense(1)
])

In [None]:
model3.compile(optimizer='adam',
               loss='mean_squared_error',
               metrics=['mae']
)

In [None]:
history3 = model3.fit(train_ds,
                      validation_data=valid_ds,
                      epochs=20,
                      batch_size=32)

In [None]:
val_loss3, val_mae3 = model3.evaluate(valid_ds)

In [None]:
with open('mae_rnn32.pkl', 'wb') as f:
    pickle.dump((val_mae3,), f)
    
model3.save('model_rnn32.h5')

### 2.6 Głęboka RNN

In [None]:
model4 = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(32, input_shape=[None, 1], return_sequences=True),
    tf.keras.layers.SimpleRNN(32, input_shape=[None, 1], return_sequences=True),
    tf.keras.layers.SimpleRNN(32, input_shape=[None, 1]),
    tf.keras.layers.Dense(1)
])

In [None]:
model4.compile(optimizer='adam',
               loss='mean_squared_error',
               metrics=['mae']
)

In [None]:
history4 = model4.fit(train_ds,
                      validation_data=valid_ds,
                      epochs=20,
                      batch_size=32)

In [None]:
val_loss4, val_mae4 = model4.evaluate(valid_ds)

In [None]:
with open('mae_rnn_deep.pkl', 'wb') as f:
    pickle.dump((val_mae4,), f)
    
model4.save('model_rnn_deep.h5')

### 2.7 Model wielowymiarowy

In [None]:
cnt_train2 = df[['cnt', 'weathersit', 'atemp', 'workingday']]['2011-01-01 00:00':'2012-06-30 23:00']
cnt_valid2 = df[['cnt', 'weathersit', 'atemp', 'workingday']]['2012-07-01 00:00':]

In [None]:
seq_len = 1 * 24

train_ds2 = tf.keras.utils.timeseries_dataset_from_array(
    cnt_train2.to_numpy(),
    targets=cnt_train2['cnt'][seq_len:],
    sequence_length=seq_len,
    batch_size=32,
    shuffle=True,
    seed=42
)

valid_ds2 = tf.keras.utils.timeseries_dataset_from_array(
    cnt_valid2.to_numpy(),
    targets=cnt_valid2['cnt'][seq_len:],
    sequence_length=seq_len,
    batch_size=32
)

In [None]:
model5 = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(32, input_shape=[None, 4]),
    tf.keras.layers.Dense(1)
])

In [None]:
model5.compile(optimizer='adam',
               loss='mean_squared_error',
               metrics=['mae']
)

In [None]:
history5 = model5.fit(train_ds2,
                      validation_data=valid_ds2,
                      epochs=20,
                      batch_size=32)

In [None]:
val_loss5, val_mae5 = model5.evaluate(valid_ds2)

In [None]:
with open('mae_rnn_mv.pkl', 'wb') as f:
    pickle.dump((val_mae5,), f)
    
model5.save('model_rnn_mv.h5')