# Manual Updating holidays code

In [15]:
import pandas as pd
import numpy as np

# Define the festival dates (holidays) for each year
festivals_2020 = [
    "2020-01-01", "2020-02-03", "2020-03-16", "2020-04-09", "2020-04-10", "2020-05-01", "2020-05-05",
    "2020-09-16", "2020-10-12", "2020-11-02", "2020-11-16", "2020-12-12", "2020-12-25",
    "2020-10-31", "2020-11-01"  
]
festivals_2021 = [
    "2021-01-01", "2021-02-01", "2021-03-15", "2021-04-01", "2021-04-02", "2021-05-01", "2021-05-05",
    "2021-09-16", "2021-10-12", "2021-11-02", "2021-11-15", "2021-12-12", "2021-12-25",
    "2021-10-31", "2021-11-01"  
]
festivals_2022 = [
    "2022-01-01", "2022-02-07", "2022-03-21","2022-04-14", "2022-04-15", "2022-05-01", 
    "2022-09-16", "2022-11-21", "2022-12-25",
    "2022-10-31", "2022-11-01"  
]
festivals_2023 = [
    "2023-01-01", "2023-02-06", "2023-03-20", "2023-04-06", "2023-04-07", "2023-05-01", "2023-05-05",
    "2023-09-16", "2023-10-12", "2023-11-02", "2023-11-20", "2023-12-12", "2023-12-25",
    "2023-10-31", "2023-11-01"  
]
festivals_2024 = [
    "2024-01-01", "2024-02-05", "2024-03-18", "2024-03-28", "2024-03-29", "2024-05-01", "2024-05-05","2024-06-02",
    "2024-09-16", "2024-10-12", "2024-11-02", "2024-11-18", "2024-12-12", "2024-12-25",
    "2024-10-31", "2024-11-01"  
]

# Additional festivals not fixed to a date, but with approximate ranges or exact days
additional_festivals = {
    "Fiesta de la Candelaria": ["2020-02-02", "2021-02-02", "2022-02-02", "2023-02-02", "2024-02-02"],  # Fixed date
    "Carnaval": ["2020-02-25", "2021-02-16", "2022-03-01", "2023-02-21", "2024-02-13"],  # Approximate dates
    "Festival del Centro Histórico": ["2020-03-01", "2021-03-01", "2022-03-01", "2023-03-01", "2024-03-01"],  # Approximate date
    "Semana Santa": ["2020-04-05", "2021-04-04", "2022-04-10", "2023-04-09", "2024-03-31"],  # Easter Week
    "Festival Internacional de Cine de la Ciudad de México": ["2020-10-01", "2021-10-01", "2022-10-01", "2023-10-01", "2024-10-01"],  # Approximate date
    "Festival Internacional Cervantino": ["2020-10-01", "2021-10-01", "2022-10-01", "2023-10-01", "2024-10-01"],  # Approximate date
}

# Combine all festival lists into a dictionary by year
festivals = {
    2020: festivals_2020,
    2021: festivals_2021,
    2022: festivals_2022,
    2023: festivals_2023,
    2024: festivals_2024
}

# Add the additional festivals
for festival in additional_festivals.values():
    for year in range(2020, 2025):
        festivals[year].extend(festival)

# Generate hourly data for each year
def generate_data_for_year(year):
    # Create a date range from January 1 to December 31 with hourly frequency
    date_range = pd.date_range(f"{year}-01-01", f"{year}-12-31", freq="H")

    # Initialize dataframe with the date range
    df = pd.DataFrame(date_range, columns=["timestamp"])

    # Add is_festival column: 1 if the date is a festival, 0 otherwise
    df['is_festival'] = df['timestamp'].dt.date.astype(str).isin(festivals[year])
    df['is_festival'] = df['is_festival'].replace({True: 1, False: 0}) 



    # Add is_weekend column: 1 if the day is Saturday or Sunday, 0 otherwise
    df['is_weekend'] = df['timestamp'].dt.dayofweek.isin([5, 6]).astype(int)

    return df

# Generate data for all years
all_data = pd.concat([generate_data_for_year(year) for year in range(2020, 2025)])

# Show the first few rows of the dataset
all_data.head()

Unnamed: 0,timestamp,is_festival,is_weekend
0,2020-01-01 00:00:00,1,0
1,2020-01-01 01:00:00,1,0
2,2020-01-01 02:00:00,1,0
3,2020-01-01 03:00:00,1,0
4,2020-01-01 04:00:00,1,0


In [16]:
all_data['is_festival'].value_counts()

is_festival
0    41429
1     2304
Name: count, dtype: int64

In [17]:
all_data['is_weekend'].value_counts()

is_weekend
0    31251
1    12482
Name: count, dtype: int64

In [18]:
# Save the data to a CSV file
all_data.to_csv("Mexico_city_data_with_festivals_Manual.csv", index=False)