In [1]:
import pandas as pd
from astral import LocationInfo
from astral.sun import sunrise as astral_sunrise, sunset as astral_sunset
import pytz
from pathlib import Path
import numpy as np
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

# Load area mapping csv and add coordinates to each location

In [None]:

area_df = pd.read_csv("../Raw_Data/Aeso_AreaRegionMapping_Urban_Rural_Raw/area_region_mapping.csv")

geolocator = Nominatim(user_agent="aeso-load-project")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=5)  

def get_lat_lon(name):
    # Add province + country to improve accuracy
    query = f"{name}, Alberta, Canada"
    loc = geocode(query)
    if loc is None:
        return None, None
    return loc.latitude, loc.longitude

lats = []
lons = []

for name in area_df["location_name"]:
    lat, lon = get_lat_lon(name)
    lats.append(lat)
    lons.append(lon)
    print(f"{name}: {lat}, {lon}")

area_df["latitude"] = lats
area_df["longitude"] = lons

area_df.to_csv("../Cleaned_And_Combined_Data_From_Our_Notebooks/area_region_mapping_with_coordinates.csv", index=False)
print("../Cleaned_And_Combined_Data_From_Our_Notebooks/area_region_mapping_with_coordinates.csv")



In [4]:
TIMEZONE = "America/Edmonton"
tz = pytz.timezone(TIMEZONE)

# Load_Data_Which includes New CSV
area_df_with_coords = pd.read_csv("../Cleaned_And_Combined_Data_From_Our_Notebooks/area_region_mapping_with_coordinates.csv")
weather_df = pd.read_csv("../Cleaned_And_Combined_Data_From_Our_Notebooks/weather_all_areas_hourly.csv")

# Keep only needed columns from mapping-
weather_df["timestamp"] = pd.to_datetime(weather_df["timestamp"])
weather_df["date"] = weather_df["timestamp"].dt.date

weather_df["area_code"] = weather_df["area_code"].astype(int)
area_df_with_coords["area_code"] = area_df_with_coords["area_code"].astype(int)

## Merge coordinates into weather data
merged = weather_df.merge(
    area_df_with_coords[["area_code", "region_type", "location_name", "latitude", "longitude"]],
    on="area_code",
    how="left",
)
def compute_daylight_for_group(group: pd.DataFrame) -> pd.DataFrame:
    row0 = group.iloc[0]

    loc = LocationInfo(
        name=row0["location_name"],
        region=row0["region_type"],
        timezone=TIMEZONE,
        latitude=row0["latitude"],
        longitude=row0["longitude"],
    )

    date_val = row0["date"]

    # only sunrise and sunset â€“ avoids the dusk error
    sunrise = astral_sunrise(loc.observer, date=date_val, tzinfo=tz)
    sunset  = astral_sunset(loc.observer,  date=date_val, tzinfo=tz)

    ts = group["timestamp"]
    if ts.dt.tz is None:
        ts_local = ts.dt.tz_localize(
            tz,
            nonexistent="shift_forward",
            ambiguous="NaT",
        )
    else:
        ts_local = ts.dt.tz_convert(tz)

    group["is_daylight"] = ((ts_local >= sunrise) & (ts_local <= sunset)).astype(int)
    return group[["timestamp", "area_code", "is_daylight"]]


In [5]:
daylight_df = (
    merged
    .groupby(["area_code", "date"], group_keys=False)
    .apply(compute_daylight_for_group)
)

daylight_df = daylight_df.sort_values(["area_code", "timestamp"]).reset_index(drop=True)

output_path = Path("../Cleaned_And_Combined_Data_From_Our_Notebooks/daylightToAreaCodeAddition.csv")
daylight_df.to_csv(output_path, index=False)
print("Saved:", output_path.resolve())


  .apply(compute_daylight_for_group)


Saved: /Users/marleycheema/Desktop/612FinalProjectWorkspace/612FinalProjectGroup3/Cleaned_And_Combined_Data_From_Our_Notebooks/daylightToAreaCodeAddition.csv
