In [1]:
import sys

# Add the weatherpy_class directory to the path
sys.path.insert(0, r'C:\Users\Administrator\Documents\weatherpy_class')
import logging
import pandas as pd
from pathlib import Path

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Import weatherpy classes
from weatherpy.data.wd_importer import BOMWeatherDataImporter, NOAAWeatherDataImporter
from weatherpy.data.wd_unifier import BOMWeatherDataUnifier, NOAAWeatherDataUnifier
from weatherpy.data.wd_cleaner import BOMDataCleaner, NOAADataCleaner
from weatherpy.data.wd_base import WeatherData


def process_bom_data():
    """
    Example of processing BOM weather data using explicit classes.
    """
    logger.info("=== Processing BOM Weather Data ===")
    
    # Step 1: Import data using BOMWeatherDataImporter
    station_id = '066037'  # Example BOM station ID (Wyndham Aero)
    
    logger.info(f"Importing BOM data for station {station_id}")
    importer = BOMWeatherDataImporter(
        station_id=station_id,
        year_start=2019,
        year_end=2020,
        time_zone='LocalTime',
        interval=60  # 60-minute data
    )
    
    # Import the data
    weather_data = importer.import_data(save_raw=True)
    logger.info(f"Imported data shape: {weather_data.data.shape}")
    logger.info(f"Imported data columns: {weather_data.data.columns.tolist()}")
    
    # Step 2: Unify data using BOMWeatherDataUnifier
    logger.info("Unifying BOM data")
    unifier = BOMWeatherDataUnifier()
    unified_data = unifier.unify_data(weather_data)
    logger.info(f"Unified data shape: {unified_data.data.shape}")
    logger.info(f"Unified data columns: {unified_data.data.columns.tolist()}")
    
    # Step 3: Clean data using BOMDataCleaner
    # FIXED: Use BOMDataCleaner instead of WeatherDataCleaner, and pass unified_data to constructor
    logger.info("Cleaning BOM data")
    cleaner = BOMDataCleaner(unified_data)  # Pass the WeatherData object
    cleaned_data = cleaner.clean_data(inplace=False)  # Call clean_data not clean
    logger.info(f"Cleaned data shape: {cleaned_data.data.shape}")
    
    # Display operations log
    logger.info("Operations performed on BOM data:")
    for op in cleaned_data.operations_log:
        logger.info(f"  - {op}")
    
    return cleaned_data


def process_noaa_data():
    """
    Example of processing NOAA weather data using explicit classes.
    """
    logger.info("\n=== Processing NOAA Weather Data ===")
    
    # Step 1: Import data using NOAAWeatherDataImporter
    station_id = '72509014739'  # Example NOAA station ID
    
    logger.info(f"Importing NOAA data for station {station_id}")
    importer = NOAAWeatherDataImporter(
        station_id=station_id,
        year_start=2019,
        year_end=2020,
        time_zone='UTC'
    )
    
    # Import the data
    weather_data = importer.import_data(save_raw=True)
    logger.info(f"Imported data shape: {weather_data.data.shape}")
    logger.info(f"Imported data columns: {weather_data.data.columns.tolist()}")
    
    # Step 2: Unify data using NOAAWeatherDataUnifier
    logger.info("Unifying NOAA data")
    unifier = NOAAWeatherDataUnifier()
    unified_data = unifier.unify_data(weather_data)
    logger.info(f"Unified data shape: {unified_data.data.shape}")
    logger.info(f"Unified data columns: {unified_data.data.columns.tolist()}")
    
    # Step 3: Clean data using NOAADataCleaner
    # FIXED: Use NOAADataCleaner instead of WeatherDataCleaner, and pass unified_data to constructor
    logger.info("Cleaning NOAA data")
    cleaner = NOAADataCleaner(unified_data)  # Pass the WeatherData object
    cleaned_data = cleaner.clean_data(inplace=False)  # Call clean_data not clean
    logger.info(f"Cleaned data shape: {cleaned_data.data.shape}")
    
    # Display operations log
    logger.info("Operations performed on NOAA data:")
    for op in cleaned_data.operations_log:
        logger.info(f"  - {op}")
    
    return cleaned_data


def process_data_with_base_class():
    """
    Example of processing weather data using the WeatherData base class.
    This demonstrates the simplified API that chains operations.
    """
    logger.info("\n=== Processing Weather Data with Base Class ===")
    
     # Initialize separate WeatherData classes for each data type
    bom_wd = WeatherData()
    noaa_wd = WeatherData()
    
    # Import, unify, and clean BOM data in a chain
    logger.info("Processing BOM data with WeatherData class")
    bom_data = bom_wd.import_data(
        station_id='066037',
        year_start=2019,
        year_end=2020
    ).unify(
        inplace=True
    ).clean(
        inplace=True
    )
    
    logger.info(f"Processed BOM data shape: {bom_data.data.shape}")
    
    # Import, unify, and clean NOAA data in a chain
    logger.info("Processing NOAA data with WeatherData class")
    noaa_data = noaa_wd.import_data(
        station_id='72509014739',
        data_type='NOAA',
        year_start=2019,
        year_end=2020,
        time_zone='UTC'
    ).unify(
        inplace=True
    ).clean(
        inplace=True
    )
    
    logger.info(f"Processed NOAA data shape: {noaa_data.data.shape}")
    
    # Display operations log
    logger.info("Operations performed on NOAA data:")
    for op in noaa_data.operations_log:
        logger.info(f"  - {op}")
    
    return bom_data, noaa_data


def compare_methods(bom_explicit, bom_base, noaa_explicit, noaa_base):
    """
    Compare results from explicit class usage vs. base class usage.
    """
    logger.info("\n=== Comparing Methods ===")
    
    # Compare BOM data
    logger.info("Comparing BOM data processing methods:")
    bom_explicit_cols = set(bom_explicit.data.columns)
    bom_base_cols = set(bom_base.data.columns)
    
    logger.info(f"Explicit method columns: {len(bom_explicit_cols)}")
    logger.info(f"Base class method columns: {len(bom_base_cols)}")
    logger.info(f"Column differences: {bom_explicit_cols.symmetric_difference(bom_base_cols)}")
    
    # Compare NOAA data
    logger.info("Comparing NOAA data processing methods:")
    noaa_explicit_cols = set(noaa_explicit.data.columns)
    noaa_base_cols = set(noaa_base.data.columns)
    
    logger.info(f"Explicit method columns: {len(noaa_explicit_cols)}")
    logger.info(f"Base class method columns: {len(noaa_base_cols)}")
    logger.info(f"Column differences: {noaa_explicit_cols.symmetric_difference(noaa_base_cols)}")



# Process data using explicit classes
bom_explicit = process_bom_data()
noaa_explicit = process_noaa_data()

# Process data using base class
bom_base, noaa_base = process_data_with_base_class()

# Compare methods
compare_methods(bom_explicit, bom_base, noaa_explicit, noaa_base)

logger.info("\nAll processing completed successfully!")
        

2025-03-23 05:14:47,943 - INFO - === Processing BOM Weather Data ===
2025-03-23 05:14:47,945 - INFO - Importing BOM data for station 066037
2025-03-23 05:14:48,039 - INFO - Imported data shape: (17544, 19)
2025-03-23 05:14:48,040 - INFO - Imported data columns: ['UTC', 'Rain', 'RainIntensity', 'DryBulbTemperature', 'DryBulbTemperatureMax', 'DryBulbTemperatureMin', 'WetBulbTemperature', 'DewPointTemperature', 'RelativeHumidity', 'VapourPressure', 'SaturatedPressure', 'WindSpeed', 'WindSpeedSTD', 'WindSpeedCompleteness', 'WindDirection', 'WindDirectionSTD', 'WindGust', 'SeaLevelPressure', 'StationLevelPressure']
2025-03-23 05:14:48,041 - INFO - Unifying BOM data
2025-03-23 05:14:48,077 - INFO - Unified data shape: (17544, 16)
2025-03-23 05:14:48,078 - INFO - Unified data columns: ['UTC', 'WindDirection', 'WindSpeed', 'WindGust', 'SeaLevelPressure', 'DryBulbTemperature', 'WetBulbTemperature', 'DewPointTemperature', 'RelativeHumidity', 'Rain', 'RainIntensity', 'RainCumulative', 'CloudHeigh

Using cached data for station 066037


2025-03-23 05:14:48,443 - INFO - Imported data shape: (27835, 37)
2025-03-23 05:14:48,444 - INFO - Imported data columns: ['LocalTime', 'REPORT_TYPE', 'QUALITY_CONTROL', 'LONGITUDE', 'ELEVATION', 'SOURCE', 'LATITUDE', 'CALL_SIGN', 'WindDirection', 'QCWindDirection', 'WindType', 'WindSpeed', 'QCWindSpeed', 'CloudHgt', 'QCCloudHgt', 'CeilingDetCode', 'CavokCode', 'Visibility', 'QCVisibility', 'VisibilityVarCode', 'QCVisVar', 'DryBulbTemperature', 'QCTemperature', 'DewPointTemperature', 'QCDewPoint', 'SeaLevelPressure', 'QCSeaLevelPressure', 'CloudOktas', 'GA2', 'GA3', 'GA4', 'GA5', 'GA6', 'AA1', 'RainCumulative', 'AA3', 'AA4']
2025-03-23 05:14:48,444 - INFO - Unifying NOAA data
2025-03-23 05:14:48,554 - INFO - Unified data shape: (27835, 22)
2025-03-23 05:14:48,556 - INFO - Unified data columns: ['LocalTime', 'WindDirection', 'WindSpeed', 'WindGust', 'SeaLevelPressure', 'DryBulbTemperature', 'WetBulbTemperature', 'DewPointTemperature', 'RelativeHumidity', 'Rain', 'RainIntensity', 'RainCu

Using cached data for station 72509014739


2025-03-23 05:14:48,636 - INFO - Cleaned data shape: (26830, 22)
2025-03-23 05:14:48,637 - INFO - Operations performed on NOAA data:
2025-03-23 05:14:48,638 - INFO -   - {'timestamp': '2025-03-23T05:14:48.476463', 'class': 'System', 'method': 'Initialize', 'inputs': {'data_type': None, 'interval': None}, 'outputs': {}}
2025-03-23 05:14:48,639 - INFO -   - {'timestamp': '2025-03-23T05:14:48.477365', 'class': 'Unifier', 'method': 'unify_data', 'inputs': {'additional_columns': None, 'inplace': False}, 'outputs': {'columns_before': ['LocalTime', 'REPORT_TYPE', 'QUALITY_CONTROL', 'LONGITUDE', 'ELEVATION', 'SOURCE', 'LATITUDE', 'CALL_SIGN', 'WindDirection', 'QCWindDirection', 'WindType', 'WindSpeed', 'QCWindSpeed', 'CloudHgt', 'QCCloudHgt', 'CeilingDetCode', 'CavokCode', 'Visibility', 'QCVisibility', 'VisibilityVarCode', 'QCVisVar', 'DryBulbTemperature', 'QCTemperature', 'DewPointTemperature', 'QCDewPoint', 'SeaLevelPressure', 'QCSeaLevelPressure', 'CloudOktas', 'GA2', 'GA3', 'GA4', 'GA5', '

Using cached data for station 066037


2025-03-23 05:14:49,183 - INFO - Removed 1005 rows with invalid values in columns: ['WindSpeed', 'WindDirection']


Using cached data for station 72509014739


2025-03-23 05:14:49,247 - INFO - Processed NOAA data shape: (26830, 22)
2025-03-23 05:14:49,248 - INFO - Operations performed on NOAA data:
2025-03-23 05:14:49,249 - INFO -   - {'timestamp': '2025-03-23T05:14:48.660765', 'class': 'System', 'method': 'Initialize', 'inputs': {'data_type': None, 'interval': None}, 'outputs': {}}
2025-03-23 05:14:49,250 - INFO -   - {'timestamp': '2025-03-23T05:14:49.064940', 'class': 'UpdateStation', 'method': {'station_id': '72509014739'}, 'inputs': {}, 'outputs': {}}
2025-03-23 05:14:49,250 - INFO -   - {'timestamp': '2025-03-23T05:14:49.064940', 'class': 'UpdateDataType', 'method': {'data_type': 'NOAA'}, 'inputs': {}, 'outputs': {}}
2025-03-23 05:14:49,251 - INFO -   - {'timestamp': '2025-03-23T05:14:49.064940', 'class': 'UpdateInterval', 'method': {'interval': 30}, 'inputs': {}, 'outputs': {}}
2025-03-23 05:14:49,251 - INFO -   - {'timestamp': '2025-03-23T05:14:49.088409', 'class': 'Unifier', 'method': 'unify_data', 'inputs': {'additional_columns': No

In [2]:
bom_explicit_data = bom_explicit.data
noaa_explicit_data = noaa_explicit.data

bom_base_data = bom_base.data
noaa_base_data = noaa_base.data


In [11]:
# Extract operation method and dataChanged status
operation_info = [(entry['class'] + '.' + entry['method'], 
                  entry['outputs'].get('dataChanged', None)) 
                  for entry in bom_explicit.operations_log]

# Print in a readable format
for operation, changed in operation_info:
    print(f"Operation: {operation:<30} Data Changed: {changed}")

Operation: System.Initialize              Data Changed: None
Operation: Unifier.unify_data             Data Changed: None
Operation: BOMUnifier.adjust_wind_direction Data Changed: True
Operation: Unifier.round_wind_direction   Data Changed: True
Operation: Unifier.zero_calm_direction    Data Changed: True
Operation: Cleaner.clean_invalid          Data Changed: True
Operation: Cleaner.clean_threshold        Data Changed: True
Operation: Cleaner.clean_duplicates       Data Changed: False
