# Daylight calculation 

Imports and Paths

In [44]:
import pandas as pd
from astral import LocationInfo
from astral.sun import sun
import pytz
from pathlib import Path
import numpy as np
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

# Base paths 
DATA_DIR = Path("../data")
RAW_AREA_PATH = DATA_DIR / "area_region_mapping.csv"
AREA_MAPPING_PATH = DATA_DIR / "area_region_mapping_with_coords.csv"
WEATHER_PATH = DATA_DIR / "weather_all_areas_hourly.csv"
OUTPUT_PATH = DATA_DIR / "weather_all_areas_hourly_with_daylight.csv"


TIMEZONE = "America/Edmonton"
tz = pytz.timezone(TIMEZONE)

Load area mapping and add coordinates to each location

In [45]:


area_df = pd.read_csv(RAW_AREA_PATH)

geolocator = Nominatim(user_agent="aeso-load-project")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=5)  

def get_lat_lon(name):
    # Add province + country to improve accuracy
    query = f"{name}, Alberta, Canada"
    loc = geocode(query)
    if loc is None:
        return None, None
    return loc.latitude, loc.longitude

lats = []
lons = []

for name in area_df["location_name"]:
    lat, lon = get_lat_lon(name)
    lats.append(lat)
    lons.append(lon)
    print(f"{name}: {lat}, {lon}")

area_df["latitude"] = lats
area_df["longitude"] = lons

area_df.to_csv(AREA_MAPPING_PATH, index=False)
print("Saved to area_region_mapping_with_coords.csv")

Medicine Hat: 50.04303, -110.679016
Calgary: 51.0456064, -114.057541
Lloydminster: 53.279995, -110.00885
Rainbow Lake: 58.502505, -119.400279
High Level: 58.5115361, -117.1438756
Peace River: 56.2337111, -117.2910839
Grande Prairie: 55.17108, -118.7949873
High Prairie: 55.4325044, -116.4836198
Grande Cache: 53.8888135, -119.1185658
Valleyview: 55.066667, -117.283333
Fox Creek: 54.4061853, -116.8004913
Fort McMurray: 56.7291997, -111.3885221
Swan Hills: 54.7180657, -115.4008258
Athabasca/Lac La Biche: None, None
Cold Lake: 54.46046, -110.192871
Hinton/Edson: None, None
Drayton Valley: 53.2205548, -114.9832127
Wetaskiwin: 52.968492, -113.36792
Wainwright: 52.8402725, -110.8514344
Fort Saskatchewan: 53.7128571, -113.214894
Abraham Lake: 52.2063903, -116.4525891
Red Deer: 52.2690628, -113.8141464
Alliance/Battle River: None, None
Provost: 52.4137383, -110.639576
Caroline: 52.0943034, -114.7391684
Didsbury: 51.6567661, -114.1375682
Wabamun: 53.5592733, -114.4783142
Hanna: 51.6445566, -111.9

Load hourly weather data and parse timestamps

In [46]:
weather_df = pd.read_csv(WEATHER_PATH)

print(weather_df.head())
print(weather_df.columns)

# Parse timestamp; format like "2023-11-01 00:00:00"
weather_df["timestamp"] = pd.to_datetime(
    weather_df["timestamp"],
    format="%Y-%m-%d %H:%M:%S",
    errors="coerce"
)


# Extract date for joining with sunrise/sunset
weather_df["date"] = weather_df["timestamp"].dt.date


   area_code            timestamp  temp_c  precip_mm
0          4  2023-11-01 00:00:00    -4.6        0.0
1          4  2023-11-01 01:00:00    -5.9        0.0
2          4  2023-11-01 02:00:00    -6.5        0.0
3          4  2023-11-01 03:00:00    -6.9        0.0
4          4  2023-11-01 04:00:00    -6.8        0.0
Index(['area_code', 'timestamp', 'temp_c', 'precip_mm'], dtype='object')


Attach coordinates to each row

In [47]:
# Keep only needed columns from mapping
coords_cols = ["area_code", "latitude", "longitude"]
area_coords = area_df[coords_cols].drop_duplicates()

# Merge coordinates into weather data
weather_with_coords = weather_df.merge(
    area_coords,
    on="area_code",
    how="left"
)

# Check
weather_with_coords[["area_code", "timestamp", "latitude", "longitude"]].head()


Unnamed: 0,area_code,timestamp,latitude,longitude
0,4,2023-11-01 00:00:00,50.04303,-110.679016
1,4,2023-11-01 01:00:00,50.04303,-110.679016
2,4,2023-11-01 02:00:00,50.04303,-110.679016
3,4,2023-11-01 03:00:00,50.04303,-110.679016
4,4,2023-11-01 04:00:00,50.04303,-110.679016


Compute Sunrise/sunset 

In [49]:

# Build unique (area_code, date, lat, lon) combos
unique_days = (
    weather_with_coords[["area_code", "date", "latitude", "longitude"]]
    .drop_duplicates()
    .reset_index(drop=True)
)

print("Number of (area_code, date) combos:", len(unique_days))

# Ensure types
unique_days["latitude"] = unique_days["latitude"].astype(float)
unique_days["longitude"] = unique_days["longitude"].astype(float)
unique_days["date"] = pd.to_datetime(unique_days["date"]).dt.date


def compute_sun_times(row):
    """
    Given a row with latitude, longitude, and date,
    return sunrise and sunset in local Alberta time.
    If Astral cannot compute (e.g., polar-ish edge cases),
    return NaT for both.
    """
    lat = row["latitude"]
    lon = row["longitude"]

    if pd.isna(lat) or pd.isna(lon):
        return pd.Series({"sunrise": pd.NaT, "sunset": pd.NaT})

    d = row["date"]
    if isinstance(d, (pd.Timestamp, np.datetime64)):
        d = pd.to_datetime(d).date()
    elif not isinstance(d, Date):
        d = pd.to_datetime(d).date()

    loc = LocationInfo(
        name=str(row["area_code"]),
        region="Alberta",
        timezone=TIMEZONE,  #  "America/Edmonton"
        latitude=lat,
        longitude=lon,
    )

    try:
        s = sun(loc.observer, date=d, tzinfo=tz)
        return pd.Series({
            "sunrise": s["sunrise"],
            "sunset": s["sunset"],
        })
    except ValueError as e:
 
        return pd.Series({"sunrise": pd.NaT, "sunset": pd.NaT})


# Apply function row-wise
sun_times = unique_days.apply(compute_sun_times, axis=1)

# Attach sunrise/sunset back onto unique_days
unique_days = pd.concat([unique_days, sun_times], axis=1)

unique_days.head()


Number of (area_code, date) combos: 17785


Unnamed: 0,area_code,date,latitude,longitude,sunrise,sunset
0,4,2023-11-01,50.04303,-110.679016,2023-11-01 08:12:39.258799-06:00,2023-11-01 17:59:09.120795-06:00
1,4,2023-11-02,50.04303,-110.679016,2023-11-02 08:14:19.259748-06:00,2023-11-02 17:57:27.501948-06:00
2,4,2023-11-03,50.04303,-110.679016,2023-11-03 08:15:59.319370-06:00,2023-11-03 17:55:47.481679-06:00
3,4,2023-11-04,50.04303,-110.679016,2023-11-04 08:17:39.405875-06:00,2023-11-04 17:54:09.107819-06:00
4,4,2023-11-05,50.04303,-110.679016,2023-11-05 07:19:19.485238-07:00,2023-11-05 16:52:32.428658-07:00


Merge sunrise/sunset with hourly data

In [50]:
weather_sun = weather_with_coords.merge(
    unique_days[["area_code", "date", "sunrise", "sunset"]],
    on=["area_code", "date"],
    how="left"
)

weather_sun[["area_code", "timestamp", "sunrise", "sunset"]].head()


Unnamed: 0,area_code,timestamp,sunrise,sunset
0,4,2023-11-01 00:00:00,2023-11-01 08:12:39.258799-06:00,2023-11-01 17:59:09.120795-06:00
1,4,2023-11-01 01:00:00,2023-11-01 08:12:39.258799-06:00,2023-11-01 17:59:09.120795-06:00
2,4,2023-11-01 02:00:00,2023-11-01 08:12:39.258799-06:00,2023-11-01 17:59:09.120795-06:00
3,4,2023-11-01 03:00:00,2023-11-01 08:12:39.258799-06:00,2023-11-01 17:59:09.120795-06:00
4,4,2023-11-01 04:00:00,2023-11-01 08:12:39.258799-06:00,2023-11-01 17:59:09.120795-06:00


Create daylight binary column 

In [51]:
# Convert sunrise/sunset to naive local wall time (drop timezone info)
# They are already in the correct timezone from Astral, so we just strip tz.
sunrise_local = weather_sun["sunrise"].dt.tz_localize(None)
sunset_local = weather_sun["sunset"].dt.tz_localize(None)

# Your timestamp column is already naive local wall time (from CSV)
ts = weather_sun["timestamp"]

# Valid rows: we have sunrise, sunset, and timestamp
mask_valid = sunrise_local.notna() & sunset_local.notna() & ts.notna()

# Default to night (0)
weather_sun["daylight"] = 0

# Daylight where timestamp is between sunrise and sunset (inclusive)
weather_sun.loc[
    mask_valid & (ts >= sunrise_local) & (ts <= sunset_local),
    "daylight"
] = 1

weather_sun[["area_code", "timestamp", "daylight"]].head()


Unnamed: 0,area_code,timestamp,daylight
0,4,2023-11-01 00:00:00,0
1,4,2023-11-01 01:00:00,0
2,4,2023-11-01 02:00:00,0
3,4,2023-11-01 03:00:00,0
4,4,2023-11-01 04:00:00,0


Save data to weather data file

In [52]:
base_cols = ["area_code", "timestamp", "temp_c", "precip_mm"]

# Keep only original columns + daylight
output_df = weather_sun[base_cols + ["daylight"]].copy()

# Save to a new file (recommended) or overwrite original
# OUTPUT_PATH = DATA_DIR / "weather_all_areas_hourly_with_daylight.csv"
output_df.to_csv(OUTPUT_PATH, index=False)
print("Saved with daylight column to:", OUTPUT_PATH)


Saved with daylight column to: ../data/weather_all_areas_hourly_with_daylight.csv
