In [None]:
import torch
import numpy as np
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import math
import pandas as pd
from prophet import Prophet
import seaborn as sns
import os

# from google.colab import drive
# drive.mount('/content/drive') 
import matplotlib.pyplot as plt

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error

import warnings
warnings.filterwarnings("ignore")

plt.style.use('ggplot')
plt.style.use('fivethirtyeight')

In [None]:
def get_season(date):
    month = date.month
    if month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    elif month in [9, 10, 11]:
        return 'Fall'
    else:
        return 'Winter'

In [None]:
# Get the paths to the data files
current_directory = os.getcwd()
csv_folder = os.path.join(current_directory, '..', 'data')
train_data_file_path = os.path.join(csv_folder, 'train_data.csv')
test_data_file_path = os.path.join(csv_folder, 'test_data.csv')


In [None]:
# Save the train and test data as dataframes
test_data = pd.read_csv(test_data_file_path,parse_dates=[0], index_col=[0], usecols=range(0, 8))
train_data = pd.read_csv(train_data_file_path,parse_dates=[0], index_col=[0], usecols=range(0, 8))
train_data.head()
parks = ["North","West","Rose","Health Sciences","Fraser","Thunderbird","University Lot Blvd"]


In [None]:
from pandas.api.types import CategoricalDtype

cat_type = CategoricalDtype(categories=['Monday','Tuesday',
                                        'Wednesday',
                                        'Thursday','Friday',
                                        'Saturday','Sunday'],
                            ordered=True)

def create_features(df, label=None):
    """
    Creates time series features from datetime index.
    """
    df['date'] = df.index
    df['hour'] = df['date'].dt.hour
    df['dayofweek'] = df['date'].dt.dayofweek
    df['weekday'] = df['date'].dt.day_name()
    df['weekday'] = df['weekday'].astype(cat_type)
    
    seasons = []
    for date in df.index:
        seasons.append(get_season(date))
    df['season'] = seasons

                   
    df_time = df[['date','hour','weekday',"dayofweek",'season']]

    # Now add the parkade as a feature 
    # By coppying the time features 7 times and appending a unique parkade to each one
    dfs = []
    for park in parks:
        copy_df = df_time.copy()
        copy_df['Parkade'] = park
        dfs.append(copy_df)

    # Concatenate all DataFrames in the list into a single DataFrame
    features_df = pd.concat(dfs, ignore_index=True)
    features_df.set_index(['Parkade', 'date'],inplace=True)

    return features_df

In [None]:
features_df_train = create_features(train_data) 
features_df_test = create_features(test_data) 


In [None]:

start_timestamp_train = pd.Timestamp('2018-09-06 10:00:00')
end_timestamp_train = pd.Timestamp('2023-01-20 13:00:00')

_2020_start_date = pd.Timestamp('2020-01-01 00:00:00')
_2020_end_date = pd.Timestamp('2020-12-31 23:00:00')


start_timestamp_test = pd.Timestamp('2023-01-20 14:00:00')
end_timestamp_test = pd.Timestamp('2024-03-05 23:00:00')


timestamps_train = pd.date_range(start=start_timestamp_train, end=end_timestamp_train, freq='H')
timestamps_test = pd.date_range(start=start_timestamp_test, end=end_timestamp_test, freq='H')


# Create a mask to filter out timestamps within the specified range
covid_window = (timestamps_train < _2020_start_date) | (timestamps_train > _2020_end_date)
# Apply the mask to filter out the timestamps
filtered_timestamps_train = timestamps_train[covid_window]

num_cars_train = []
num_cars_test = []

# Add the current park occupancy to the dataframe
for park in parks:
    for timestamp in filtered_timestamps_train:
        num_cars_train.append(train_data.loc[timestamp][park])
    
    for timestamp in timestamps_test:
        num_cars_test.append(test_data.loc[timestamp][park])
        


features_df_train["num_cars"] = num_cars_train
features_df_test["num_cars"] = num_cars_test



In [None]:
# Format data for prophet model using ds and y
train_prophet_north = features_df_train.loc["North"]["num_cars"].reset_index() \
    .rename(columns={'date':'ds',
                     'num_cars':'y'})

test_prophet_north = features_df_test.loc["North"]["num_cars"].reset_index() \
    .rename(columns={'date':'ds',
                     'num_cars':'y'})


In [None]:
changepoint_prior_scale = 0.0001
seasonality_prior_scale = 0.25
yearly_seasonality = 10
weekly_seasonality = 22
daily_seasonality = 39
hourly_seasonality = 9
model = Prophet(changepoint_prior_scale=changepoint_prior_scale,
                                    seasonality_prior_scale=seasonality_prior_scale,
                                    yearly_seasonality=yearly_seasonality,
                                    weekly_seasonality=weekly_seasonality,
                                    daily_seasonality=daily_seasonality)
model.add_country_holidays(country_name='Canada')
model.add_seasonality(name='hourly', period=0.0147, fourier_order=hourly_seasonality)



#---------------------------------------------------
model.fit(train_prophet_north)
predictions = model.predict(test_prophet_north)
r2 = r2_score(test_data['North'], predictions["yhat"])
print(f"r2 = {r2}")
#---------------------------------------------------
f, ax = plt.subplots(figsize=(20, 5))
ax.scatter(test_data.index, test_data['North'], color='r')
ax.scatter(test_data.index, predictions["yhat"], color='b')


