In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [4]:
parking = pd.read_csv('aarhus_parking.csv')

In [5]:
df_raw = pd.DataFrame(parking)

In [25]:
df_raw.dtypes


_id                      int64
updatetime              object
streamtime              object
Datetime        datetime64[ns]
date                    object
time                    object
vehiclecount             int64
totalspaces              int64
garagecode              object
dtype: object

In [14]:
df_raw['Datetime'] = pd.to_datetime(df_raw['updatetime'])
df_raw['date'] = df_raw['Datetime'].dt.date
df_raw['time'] = df_raw['Datetime'].dt.time

In [16]:
df_raw.columns

Index(['vehiclecount', 'updatetime', '_id', 'totalspaces', 'garagecode',
       'streamtime', 'Datetime', 'date', 'time'],
      dtype='object')

In [24]:
order = ['_id','updatetime', 'streamtime', 'Datetime', 'date', 'time','vehiclecount', 'totalspaces', 'garagecode']
df_raw = df_raw[order]
df_raw.head()

Unnamed: 0,_id,updatetime,streamtime,Datetime,date,time,vehiclecount,totalspaces,garagecode
0,1,2014-05-22 09:09:04.145,2014-11-03 16:18:44,2014-05-22 09:09:04.145,2014-05-22,09:09:04.145000,0,65,NORREPORT
1,2,2014-05-22 09:09:04.145,2014-11-03 16:18:44,2014-05-22 09:09:04.145,2014-05-22,09:09:04.145000,0,512,SKOLEBAKKEN
2,3,2014-05-22 09:09:04.145,2014-11-03 16:18:44,2014-05-22 09:09:04.145,2014-05-22,09:09:04.145000,869,1240,SCANDCENTER
3,4,2014-05-22 09:09:04.145,2014-11-03 16:18:44,2014-05-22 09:09:04.145,2014-05-22,09:09:04.145000,22,953,BRUUNS
4,5,2014-05-22 09:09:04.145,2014-11-03 16:18:44,2014-05-22 09:09:04.145,2014-05-22,09:09:04.145000,124,130,BUSGADEHUSET


In [27]:
df_raw['date'] = pd.to_datetime(df_raw['date'])
df_raw['weekday'] = df_raw['date'].dt.dayofweek
weekday_names = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday',
                 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
df_raw['weekday_name'] = df_raw['weekday'].map(weekday_names)

In [29]:
#df_raw.to_csv('parking_data.csv', index=False)

# LSTM

In [41]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow.keras.optimizers as optimizers




In [42]:
data = pd.read_csv('parking_data.csv')
df = pd.DataFrame(data)


In [43]:
df['date'] = pd.to_datetime(df['date'])
df['weekday'] = df['date'].dt.dayofweek
df['hour'] = df['date'].dt.hour

# One-hot encode 'garagecode'
df = pd.get_dummies(df, columns=['garagecode'])
values = df.values

In [49]:
df.dtypes

_id                                  int64
updatetime                          object
streamtime                          object
Datetime                            object
date                        datetime64[ns]
time                                object
vehiclecount                       float64
totalspaces                          int64
weekday                              int64
weekday_name                        object
hour                                 int64
garagecode_BRUUNS                    uint8
garagecode_BUSGADEHUSET              uint8
garagecode_KALKVAERKSVEJ             uint8
garagecode_MAGASIN                   uint8
garagecode_NORREPORT                 uint8
garagecode_SALLING                   uint8
garagecode_SCANDCENTER               uint8
garagecode_SKOLEBAKKEN               uint8
dtype: object

In [44]:
scaler = MinMaxScaler()
df['vehiclecount'] = scaler.fit_transform(df[['vehiclecount']])

In [45]:
def create_sequences(data, n_steps):
    X, y = [], []
    for i in range(len(data)):
        # Find the end of the current sequence
        end_ix = i + n_steps
        # Check if we are beyond the dataset
        if end_ix > len(data)-1:
            break
        # Gather input and output parts of the pattern
        seq_x, seq_y = data[i:end_ix, :-1], data[end_ix, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# Assuming df is ready and sorted by datetime
values = df.values
n_features = df.shape[1]  # Number of features
n_steps = 48  # Number of time steps you're looking back

# Prepare the sequences
X, y = create_sequences(values, n_steps)

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [48]:
seq_length = X_train.shape[1]  # Example: 3 if you're using 3 time steps
nb_features = X_train.shape[2]  # Number of features in your input
nb_out = 1  # For regression tasks, typically 1 output node

model = Sequential()

# Add first LSTM layer
model.add(LSTM(input_shape=(seq_length, nb_features),
               units=5, 
               return_sequences=True))
model.add(Dropout(0.2)) 

# Add second LSTM layer
model.add(LSTM(units=3, return_sequences=False))
model.add(Dropout(0.2))

# Add output layer
model.add(Dense(units=nb_out, activation='linear'))  # Use 'linear' for regression tasks

# Compile the model
optimizer = optimizers.Adam(learning_rate=0.01)
model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mse'])

# Model summary
print(model.summary())

# Ensure the data is of type float32
X_train = X_train.astype('float32')
y_train = y_train.astype('float32')

# Define callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='min'),
    ModelCheckpoint('model.h5', monitor='val_loss', save_best_only=True, mode='min', verbose=0)
]

# Fit the network
history = model.fit(X_train, y_train, epochs=100, batch_size=500, validation_split=0.05, verbose=2, callbacks=callbacks)
# List all data in history
print(history.history.keys())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 48, 5)             480       
                                                                 
 dropout_2 (Dropout)         (None, 48, 5)             0         
                                                                 
 lstm_3 (LSTM)               (None, 3)                 108       
                                                                 
 dropout_3 (Dropout)         (None, 3)                 0         
                                                                 
 dense_1 (Dense)             (None, 1)                 4         
                                                                 
Total params: 592 (2.31 KB)
Trainable params: 592 (2.31 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


ValueError: could not convert string to float: '2014-06-01 00:12:54.786'