In [10]:
import pandas as pd

# Load your CSV file into a DataFrame
df = pd.read_csv("sample-data/combined_weather_parking_WO2020_no_dups.csv")

# Load term dates from another CSV file
term_dates_df = pd.read_csv("sample-data/term dates.csv", parse_dates=['Timestamp'])

# Convert 'Timestamp' column to datetime format
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Check if each timestamp in the DataFrame matches any of the additional timestamps
df['term_date'] = df['Timestamp'].isin(term_dates_df['Timestamp'])



In [11]:
print(term_dates_df)

    Timestamp
0  2018-09-04
1  2018-12-20
2  2019-01-02
3  2019-04-27
4  2019-05-06
5  2019-06-29
6  2019-07-02
7  2019-08-18
8  2019-09-03
9  2019-12-18
10 2020-01-06
11 2020-04-29
12 2020-05-11
13 2020-06-26
14 2020-07-06
15 2020-08-20
16 2020-09-08
17 2020-12-22
18 2021-01-11
19 2021-04-29
20 2021-05-10
21 2021-06-25
22 2021-07-05
23 2021-08-20
24 2021-09-07
25 2021-12-22
26 2022-01-10
27 2022-04-27
28 2022-05-16
29 2022-06-30
30 2022-07-04
31 2022-08-19
32 2022-09-06
33 2022-12-22
34 2023-01-09
35 2023-04-28
36 2023-05-15
37 2023-06-30
38 2023-07-04
39 2023-08-19
40 2023-09-05
41 2023-12-21
42 2024-01-08
43 2024-04-27


In [12]:
import pandas as pd
import datetime

# Create an empty list to store the expanded data
expanded_data = []

# Create a set to store processed dates
processed_dates = set()

# Iterate over each row in the original DataFrame
for index, row in term_dates_df.iterrows():
    # Extract the date from the Timestamp column
    date = row['Timestamp'].date()
    print(date)
    
    # Check if the date has been processed before
    if date not in processed_dates:
        # Generate datetime objects for each hour of the day and append them to the list
        for hour in range(0, 24):
            expanded_data.append({'Timestamp': datetime.datetime.combine(date, datetime.time(hour))})
        
        # Add the date to the set of processed dates
        processed_dates.add(date)

# Create a DataFrame from the list of dictionaries
expanded_df = pd.DataFrame(expanded_data)

# Display the expanded DataFrame
print(expanded_df)


2018-09-04
2018-12-20
2019-01-02
2019-04-27
2019-05-06
2019-06-29
2019-07-02
2019-08-18
2019-09-03
2019-12-18
2020-01-06
2020-04-29
2020-05-11
2020-06-26
2020-07-06
2020-08-20
2020-09-08
2020-12-22
2021-01-11
2021-04-29
2021-05-10
2021-06-25
2021-07-05
2021-08-20
2021-09-07
2021-12-22
2022-01-10
2022-04-27
2022-05-16
2022-06-30
2022-07-04
2022-08-19
2022-09-06
2022-12-22
2023-01-09
2023-04-28
2023-05-15
2023-06-30
2023-07-04
2023-08-19
2023-09-05
2023-12-21
2024-01-08
2024-04-27
               Timestamp
0    2018-09-04 00:00:00
1    2018-09-04 01:00:00
2    2018-09-04 02:00:00
3    2018-09-04 03:00:00
4    2018-09-04 04:00:00
...                  ...
1051 2024-04-27 19:00:00
1052 2024-04-27 20:00:00
1053 2024-04-27 21:00:00
1054 2024-04-27 22:00:00
1055 2024-04-27 23:00:00

[1056 rows x 1 columns]


In [13]:
# Filter expanded_df to include only dates present in term_dates_df
expanded_df = expanded_df[expanded_df['Timestamp'].dt.date.isin(term_dates_df['Timestamp'].dt.date)]

# Reset index of the filtered DataFrame
expanded_df.reset_index(drop=True, inplace=True)

# Display the filtered DataFrame
print(expanded_df)


               Timestamp
0    2018-09-04 00:00:00
1    2018-09-04 01:00:00
2    2018-09-04 02:00:00
3    2018-09-04 03:00:00
4    2018-09-04 04:00:00
...                  ...
1051 2024-04-27 19:00:00
1052 2024-04-27 20:00:00
1053 2024-04-27 21:00:00
1054 2024-04-27 22:00:00
1055 2024-04-27 23:00:00

[1056 rows x 1 columns]


In [14]:
# Check if each timestamp in the DataFrame matches any of the additional timestamps
df['term_date'] = df['Timestamp'].isin(expanded_df['Timestamp'])


In [15]:
# -*- coding: utf-8 -*-
"""
Define holidays
"""

from pandas.tseries.holiday import (
    AbstractHolidayCalendar, DateOffset, EasterMonday, GoodFriday, Holiday, MO,
    next_monday, next_monday_or_tuesday
    )

class AlbertaHolidayCalendar(AbstractHolidayCalendar):
    """
    Uses the pandas AbstractHolidayCalendar class to create a class for
    Alberta holidays:
    Adjusts for observance date if set holidays fall on weekends
    New years - Jan 1
    Family Day - 3rd Monday in February
    Good Friday
    Easter Monday
    Victoria Day - Last Monday before May 25
    Canada Day - July 1
    Heritage Day - August 1
    Labour Day - September 1
    Thanksgiving - Second Monday in October
    Remembrance Day - November 11
    Christmas Day - December 25
    Boxing Day - December 26
    Additional holidays from December 25 to January 1
    See Pandas documentation for more information on holiday calendars
    http://pandas.pydata.org/pandas-docs/stable/timeseries.html#holidays-holiday-calendars
    Some sample code is here:
        http://mapleoin.github.io/perma/python-uk-business-days
        http://stackoverflow.com/documentation/pandas/7976/holiday-calendars#t=201703131711384942824
    """
    rules = [
        Holiday('New Years Day', month=1, day=1, observance=next_monday),
        Holiday('Family Day',
                month=2, day=1, offset=DateOffset(weekday=MO(3))),
        GoodFriday,
        EasterMonday,
        Holiday('Victoria Day',
                month=5, day=25, offset=DateOffset(weekday=MO(-1))),
        Holiday('Canada Day', month=7, day=1, observance=next_monday),
        Holiday('Heritage Day',
                month=8, day=1, offset=DateOffset(weekday=MO(1))),
        Holiday('Labour Day',
                month=9, day=1, offset=DateOffset(weekday=MO(1))),
        Holiday('Thanksgiving',
                month=10, day=1, offset=DateOffset(weekday=MO(2))),
        Holiday('Remembrance Day',
                month=11, day=11, observance=next_monday),
        Holiday('Christmas Day', month=12, day=25, observance=next_monday),
        Holiday('Boxing Day',
                month=12, day=26, observance=next_monday_or_tuesday),
        Holiday('Additional Holiday 1', month=12, day=27, observance=next_monday_or_tuesday),
        Holiday('Additional Holiday 2', month=12, day=28, observance=next_monday_or_tuesday),
        Holiday('Additional Holiday 3', month=12, day=29, observance=next_monday_or_tuesday),
        Holiday('Additional Holiday 4', month=12, day=30, observance=next_monday_or_tuesday),
        Holiday('New Years Eve', month=12, day=31, observance=next_monday)
    ]


In [16]:
import pandas as pd
import datetime

# Create an empty list to store the expanded data
expanded_data = []

# Create a set to store processed dates
processed_dates = set()

cal = AlbertaHolidayCalendar()
holidays = cal.holidays(start=df['Timestamp'].min(), end=df['Timestamp'].max())

# Iterate over each row in the original DataFrame
for holiday_date  in holidays:
    # Extract the date from the Timestamp column
    date = holiday_date.date()
    print(date)
    
    # Check if the date has been processed before
    if date not in processed_dates:
        # Generate datetime objects for each hour of the day and append them to the list
        for hour in range(0, 24):
            expanded_data.append({'Timestamp': datetime.datetime.combine(date, datetime.time(hour))})
        
        # Add the date to the set of processed dates
        processed_dates.add(date)

# Create a DataFrame from the list of dictionaries
expanded_df = pd.DataFrame(expanded_data)

# Display the expanded DataFrame
print(expanded_df)


2018-10-08
2018-11-12
2018-12-25
2018-12-26
2018-12-27
2018-12-28
2018-12-31
2018-12-31
2019-01-01
2019-01-01
2019-02-18
2019-04-19
2019-04-22
2019-05-20
2019-07-01
2019-08-05
2019-09-02
2019-10-14
2019-11-11
2019-12-25
2019-12-26
2019-12-27
2019-12-30
2019-12-31
2019-12-31
2019-12-31
2020-01-01
2020-02-17
2020-04-10
2020-04-13
2020-05-25
2020-07-01
2020-08-03
2020-09-07
2020-10-12
2020-11-11
2020-12-25
2020-12-28
2020-12-29
2020-12-29
2020-12-29
2020-12-30
2020-12-31
2021-01-01
2021-02-15
2021-04-02
2021-04-05
2021-05-24
2021-07-01
2021-08-02
2021-09-06
2021-10-11
2021-11-11
2021-12-27
2021-12-28
2021-12-28
2021-12-28
2021-12-29
2021-12-30
2021-12-31
2022-01-03
2022-02-21
2022-04-15
2022-04-18
2022-05-23
2022-07-01
2022-08-01
2022-09-05
2022-10-10
2022-11-11
2022-12-26
2022-12-27
2022-12-27
2022-12-28
2022-12-29
2022-12-30
2023-01-02
2023-01-02
2023-02-20
2023-04-07
2023-04-10
2023-05-22
2023-07-03
2023-08-07
2023-09-04
2023-10-09
2023-11-13
2023-12-25
2023-12-26
2023-12-27
2023-12-28

In [17]:
# Filter expanded_df to include only dates present in holidays
expanded_df = expanded_df[expanded_df['Timestamp'].dt.date.isin(holidays.date)]

# Reset index of the filtered DataFrame
expanded_df.reset_index(drop=True, inplace=True)

# Display the filtered DataFrame
print(expanded_df)
# Save the modified DataFrame back to a CSV file
expanded_df.to_csv("test.csv", index=False)

# Check if each timestamp in the DataFrame matches any of the additional timestamps
df['is_holiday'] = df['Timestamp'].isin(expanded_df['Timestamp'])


               Timestamp
0    2018-10-08 00:00:00
1    2018-10-08 01:00:00
2    2018-10-08 02:00:00
3    2018-10-08 03:00:00
4    2018-10-08 04:00:00
...                  ...
2011 2024-02-19 19:00:00
2012 2024-02-19 20:00:00
2013 2024-02-19 21:00:00
2014 2024-02-19 22:00:00
2015 2024-02-19 23:00:00

[2016 rows x 1 columns]


In [18]:
# Save the modified DataFrame back to a CSV file
df.to_csv("sample-data/term_dates_parking.csv", index=False)