In [5]:
import sys
from pathlib import Path

# assuming your notebook lives in ROOT/notebooks/
project_root = Path().cwd().parent     # go up one folder from notebooks/
sys.path.insert(0, str(project_root))  # put it at the front of sys.path


In [6]:
# Import the new weather station functions
from alviaorange import (
    fetch_climate_stations,
    find_nearest_stations,
    get_station_weather_data,
    weather_station_workflow
)
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import geopandas as gpd

# Vancouver coordinates (lon, lat)
vancouver_coords = (-123.1207, 49.2827)

# Find 5 nearest stations within 50km
nearest_stations = find_nearest_stations(
    point_coords=vancouver_coords,
    k=5,
    max_distance_km=50,
    province='BC'
)

print(f"Found {len(nearest_stations)} weather stations near Vancouver:")
if not nearest_stations.empty:
    print(nearest_stations[['STATION_NAME', 'distance_km', 'CLIMATE_IDENTIFIER']].to_string())


Found 5 weather stations near Vancouver:
              STATION_NAME  distance_km CLIMATE_IDENTIFIER
883   VANCOUVER HARBOUR CS     1.409752            1108446
1761     VANCOUVER HARBOUR     1.471355            110Q44V
828    N VANCOUVER WHARVES     3.589893            1105669
1533    WEST VANCOUVER AUT     8.892511            1108824
1715      VANCOUVER INTL A    10.812409            1108395


In [7]:
# Get weather data from the nearest station for July 2023
if not nearest_stations.empty:
    nearest_station_id = nearest_stations.iloc[0]['CLIMATE_IDENTIFIER']
    station_name = nearest_stations.iloc[0]['STATION_NAME']
    
    print(f"Getting weather data for: {station_name} ({nearest_station_id})")
    
    weather_data = get_station_weather_data(
        station_id=nearest_station_id,
        start_date='2023-07-01',
        end_date='2023-07-31',
        data_type='daily'
    )
    
    if not weather_data.empty:
        print(f"\nRetrieved {len(weather_data)} days of weather data")
        print("\nAvailable columns:")
        print(list(weather_data.columns))
        print("\nSample data:")
        display_cols = ['LOCAL_DATE']
        # Add commonly available columns if they exist
        for col in ['MEAN_TEMPERATURE', 'MAX_TEMPERATURE', 'MIN_TEMPERATURE', 'TOTAL_PRECIPITATION']:
            if col in weather_data.columns:
                display_cols.append(col)
        print(weather_data[display_cols].head(10))
    else:
        print("No weather data available for this period")


Getting weather data for: VANCOUVER HARBOUR CS (1108446)

Retrieved 31 days of weather data

Available columns:
['STATION_NAME', 'CLIMATE_IDENTIFIER', 'ID', 'LOCAL_DATE', 'PROVINCE_CODE', 'LOCAL_YEAR', 'LOCAL_MONTH', 'LOCAL_DAY', 'MEAN_TEMPERATURE', 'MEAN_TEMPERATURE_FLAG', 'MIN_TEMPERATURE', 'MIN_TEMPERATURE_FLAG', 'MAX_TEMPERATURE', 'MAX_TEMPERATURE_FLAG', 'TOTAL_PRECIPITATION', 'TOTAL_PRECIPITATION_FLAG', 'TOTAL_RAIN', 'TOTAL_RAIN_FLAG', 'TOTAL_SNOW', 'TOTAL_SNOW_FLAG', 'SNOW_ON_GROUND', 'SNOW_ON_GROUND_FLAG', 'DIRECTION_MAX_GUST', 'DIRECTION_MAX_GUST_FLAG', 'SPEED_MAX_GUST', 'SPEED_MAX_GUST_FLAG', 'COOLING_DEGREE_DAYS', 'COOLING_DEGREE_DAYS_FLAG', 'HEATING_DEGREE_DAYS', 'HEATING_DEGREE_DAYS_FLAG', 'MIN_REL_HUMIDITY', 'MIN_REL_HUMIDITY_FLAG', 'MAX_REL_HUMIDITY', 'MAX_REL_HUMIDITY_FLAG']

Sample data:
   LOCAL_DATE  MEAN_TEMPERATURE  MAX_TEMPERATURE  MIN_TEMPERATURE  \
1  2023-07-01              19.6             24.0             15.2   
0  2023-07-02              18.7             22.

In [8]:
# Plot temperature data if available
if not weather_data.empty:
    # Temperature plot with min, max, and mean if available
    fig = go.Figure()
    
    if 'MAX_TEMPERATURE' in weather_data.columns:
        fig.add_trace(go.Scatter(
            x=weather_data['LOCAL_DATE'],
            y=weather_data['MAX_TEMPERATURE'],
            mode='lines+markers',
            name='Max Temperature',
            line=dict(color='red')
        ))
    
    if 'MIN_TEMPERATURE' in weather_data.columns:
        fig.add_trace(go.Scatter(
            x=weather_data['LOCAL_DATE'],
            y=weather_data['MIN_TEMPERATURE'],
            mode='lines+markers',
            name='Min Temperature',
            line=dict(color='blue')
        ))
    
    if 'MEAN_TEMPERATURE' in weather_data.columns:
        fig.add_trace(go.Scatter(
            x=weather_data['LOCAL_DATE'],
            y=weather_data['MEAN_TEMPERATURE'],
            mode='lines+markers',
            name='Mean Temperature',
            line=dict(color='green')
        ))
    
    fig.update_layout(
        title=f'Daily Temperature - {station_name}',
        xaxis_title='Date',
        yaxis_title='Temperature (°C)',
        hovermode='x unified'
    )
    
    fig.show()


In [9]:
# Plot precipitation data if available
if not weather_data.empty and 'TOTAL_PRECIPITATION' in weather_data.columns:
    fig = px.bar(
        weather_data, 
        x='LOCAL_DATE', 
        y='TOTAL_PRECIPITATION',
        title=f'Daily Precipitation - {station_name}',
        labels={'TOTAL_PRECIPITATION': 'Precipitation (mm)', 'LOCAL_DATE': 'Date'}
    )
    fig.update_layout(showlegend=False)
    fig.show()


In [10]:
# Get data from multiple stations for comparison
from alviaorange import get_multi_station_data

if len(nearest_stations) >= 3:
    # Get the top 3 stations
    top_3_stations = nearest_stations.head(3)
    station_ids = top_3_stations['CLIMATE_IDENTIFIER'].tolist()
    station_names = top_3_stations['STATION_NAME'].tolist()
    
    print("Getting data from multiple stations:")
    for name, sid in zip(station_names, station_ids):
        print(f"  - {name} ({sid})")
    
    # Get data without aggregation to compare stations
    multi_station_data = get_multi_station_data(
        station_ids=station_ids,
        start_date='2023-07-01',
        end_date='2023-07-31',
        data_type='daily',
        aggregate_method='none'  # Don't aggregate, keep separate
    )
    
    if not multi_station_data.empty:
        print(f"\nRetrieved data from {multi_station_data['STATION_ID'].nunique()} stations")
        print(f"Total records: {len(multi_station_data)}")
        
        # Create a mapping of station IDs to names for better visualization
        id_to_name = dict(zip(top_3_stations['CLIMATE_IDENTIFIER'], top_3_stations['STATION_NAME']))
        multi_station_data['STATION_NAME'] = multi_station_data['STATION_ID'].map(id_to_name)
        
        # Plot temperature comparison if available
        if 'MEAN_TEMPERATURE' in multi_station_data.columns:
            fig = px.line(
                multi_station_data,
                x='LOCAL_DATE',
                y='MEAN_TEMPERATURE', 
                color='STATION_NAME',
                title='Temperature Comparison - Multiple Stations',
                labels={'MEAN_TEMPERATURE': 'Mean Temperature (°C)', 'LOCAL_DATE': 'Date'}
            )
            fig.show()
    else:
        print("No data available for the specified period")
else:
    print(f"Only {len(nearest_stations)} stations found, need at least 3 for comparison")


Getting data from multiple stations:
  - VANCOUVER HARBOUR CS (1108446)
  - VANCOUVER HARBOUR (110Q44V)
  - N VANCOUVER WHARVES (1105669)

Retrieved data from 3 stations
Total records: 93



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



In [11]:
# Use the complete workflow to get aggregated data from 3 nearest stations
print("Running complete workflow...")
workflow_stations, aggregated_data = weather_station_workflow(
    point_coords=vancouver_coords,
    k=3,
    start_date='2023-07-01',
    end_date='2023-07-31',
    data_type='daily',
    aggregate_method='mean',  # Average across all stations
    max_distance_km=50,
    province='BC'
)

print(f"\nWorkflow found {len(workflow_stations)} stations:")
if not workflow_stations.empty:
    for idx, station in workflow_stations.iterrows():
        print(f"  - {station['STATION_NAME']} ({station['distance_km']:.1f} km away)")

if not aggregated_data.empty:
    print(f"\nAggregated weather data shape: {aggregated_data.shape}")
    print("\nSample aggregated data:")
    display_cols = ['LOCAL_DATE']
    for col in ['MEAN_TEMPERATURE', 'MAX_TEMPERATURE', 'MIN_TEMPERATURE', 'TOTAL_PRECIPITATION']:
        if col in aggregated_data.columns:
            display_cols.append(col)
    print(aggregated_data[display_cols].head())
    
    # Plot the aggregated temperature data
    if 'MEAN_TEMPERATURE' in aggregated_data.columns:
        fig = px.line(
            aggregated_data, 
            x='LOCAL_DATE', 
            y='MEAN_TEMPERATURE',
            title='Aggregated Daily Mean Temperature (3-Station Average)',
            labels={'MEAN_TEMPERATURE': 'Temperature (°C)', 'LOCAL_DATE': 'Date'}
        )
        fig.update_traces(line=dict(width=3))
        fig.show()
else:
    print("No aggregated data available")


Running complete workflow...

Workflow found 3 stations:
  - VANCOUVER HARBOUR CS (1.4 km away)
  - VANCOUVER HARBOUR (1.5 km away)
  - N VANCOUVER WHARVES (3.6 km away)

Aggregated weather data shape: (31, 13)

Sample aggregated data:
  LOCAL_DATE  MEAN_TEMPERATURE  MAX_TEMPERATURE  MIN_TEMPERATURE  \
0 2023-07-01             19.25            23.85            14.60   
1 2023-07-02             18.60            22.65            14.50   
2 2023-07-03             19.55            24.50            14.60   
3 2023-07-04             21.20            25.95            16.40   
4 2023-07-05             22.75            28.35            17.05   

   TOTAL_PRECIPITATION  
0                  0.0  
1                  0.0  
2                  0.0  
3                  0.0  
4                  0.0  



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.

