In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date

In [None]:
%matplotlib inline

In [None]:
df = pd.read_csv('../input/fremont-bridge-hourly-bicycle-counts-by-month-october-2012-to-present.csv')

# Clean data

In [None]:
df.head()

Extract data from Datetime:

In [None]:
df['Datetime'] = pd.to_datetime(df['Date'])

In [None]:
df = df.drop(['Date','Fremont Bridge Total' ], axis=1)

In [None]:
df['Hour'] = df['Datetime'].dt.hour
df['Day'] = df['Datetime'].dt.day
df['Month'] = df['Datetime'].dt.month
df['Year'] = df['Datetime'].dt.year

In [None]:
df['Dayname'] = df['Datetime'].dt.day_name()

Filling null values with mean

In [None]:
df[df.isnull().any(axis=1)]

In [None]:
df['Fremont Bridge West Sidewalk'].fillna(df.groupby(["Dayname", "Hour"])["Fremont Bridge West Sidewalk"].transform(np.mean), inplace=True)
df['Fremont Bridge East Sidewalk'].fillna(df.groupby(["Dayname", "Hour"])["Fremont Bridge East Sidewalk"].transform(np.mean), inplace=True)

Rename columns:

In [None]:
df.rename({'Fremont Bridge East Sidewalk': 'East', 'Fremont Bridge West Sidewalk': 'West'}, axis=1, inplace=True)

# Data vizualization

Compute the mean for each group:

In [None]:
df.groupby('Hour').mean()['East'].plot()

We can see the different beetween peak and other.

In [None]:
df.groupby('Day').mean()['East'].plot()

In [None]:
df.groupby('Month').mean()['East'].plot()

In [None]:
df.groupby('Year').mean()['East'].plot()

Number of bikes with time plot:

In [None]:
values = df.values
groups = [0, 1]
i = 1
plt.figure(figsize=(10,4))
for group in groups:
    plt.subplot(len(groups), 1, i)
    plt.plot(values[:, group])
    plt.title(df.columns[group], y=0.5, loc='right')
    i += 1
plt.show()


Number of bikes in specified one hour in specific day in week:

In [None]:
dayname_and_hour_data = df[['West','Dayname']][df['Hour']==17]

In [None]:
order_day_in_week = [
                     'Monday',
                     'Tuesday',
                     'Wednesday',
                     'Thursday',
                     'Friday',
                     'Saturday',
                     'Sunday',
                     ]

In [None]:
plt.figure(figsize=(10,4))
sns.stripplot(x='Dayname', y='West', data=dayname_and_hour_data, order=order_day_in_week )

In [None]:
plt.figure(figsize=(10,4))
sns.boxplot(x='Dayname', y='West', data=dayname_and_hour_data, order=order_day_in_week )

Create column prior time step:

In [None]:
df['East-168'] = df['East'].shift(168)
df = df.dropna()

In [None]:
df.corr()

# Data preproccessing and creating model 


In [None]:
df = df.drop(['Datetime','Dayname'], axis=1 )

In [None]:
X = df.drop('East', axis=1).values
y = df['East'].values

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42)

In [None]:
scaler = MinMaxScaler()

In [None]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.python.keras.layers.recurrent import LSTM

In [None]:
print(X_train.shape, y_train.shape)

In [None]:
model = Sequential()
model.add(LSTM(6, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')

In [None]:
history = model.fit(X_train, y_train, epochs=60, batch_size=72, validation_data=(X_test, y_test), verbose=2, shuffle=False)

In [None]:
losses = pd.DataFrame(history.history)

In [None]:
losses.plot()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
predictions = model.predict(X_test)

In [None]:
mean_absolute_error(y_test, predictions)

In [None]:
mean_squared_error(y_test, predictions)**(1/2)

In [None]:
df['East'].describe()

In [None]:
single_hour = df.drop('East', axis=1).iloc[20:21]
single_hour

In [None]:
single_hour = scaler.transform(single_hour)
single_hour = single_hour.reshape((single_hour.shape[0], 1, single_hour.shape[1]))
model.predict(single_hour)

In [None]:
df.iloc[20:21]

Do the same with West.