In [1]:
#!/usr/bin/env python3

import pandas as pd
import numpy as np

# File paths
DAILY_FILEPATH = '/home/bkelley/capstone/data_collection/weather/data/cleaned_daily_weather_data.csv'
NEW_DAILY_FILEPATH = '/home/bkelley/capstone/data_collection/weather/data/daily_weather_with_temp_avg.csv'

HOURLY_FILEPATH = '/home/bkelley/capstone/data_collection/weather/data/cleaned_hourly_weather_data.csv'
NEW_HOURLY_FILEPATH = '/home/bkelley/capstone/data_collection/weather/data/hourly_weather_with_temp_avg.csv'

# Constants
MO = 28.9644  # Molar mass of dry air (g/mol)
R = 8.31432   # Universal gas constant (J/(K·mol))
gamma = 1.40  # Adiabatic index for air

# Function to process the weather data
def process_weather_data(filepath, new_filepath):
    # Load the data
    data = pd.read_csv(filepath, index_col='date')

    # Forward fill missing data
    data = data.ffill()

    # Convert index to pandas datetime
    data.index = pd.to_datetime(data.index)
    # grab avg temp from daily records
    if 'temperature_2m_max' in data.columns and 'temperature_2m_min' in data.columns:
        data['temp_avg'] = (data['temperature_2m_max'] + data['temperature_2m_min']) / 2 
        

    # Ensure 'surface_pressure' and 'temperature_2m' columns exist
    if 'surface_pressure' in data.columns and 'temperature_2m' in data.columns:
        # Convert temperature from Celsius to Kelvin
        data['temperature_2m_K'] = data['temperature_2m'] + 273.15
        
        # Convert surface pressure from millibars to Pascals (1 millibar = 100 Pascals)
        data['surface_pressure_Pa'] = data['surface_pressure'] * 100
        
        # Calculate density (p) = (MO/R) * (PR/TM)
        data['density'] = (MO / R) * (data['surface_pressure_Pa'] / data['temperature_2m_K'])
        print("Added 'density' column.")
        
        # Calculate speed of sound = sqrt(gamma * (R/MO) * temperature)
        data['speed_of_sound'] = np.sqrt(gamma * (R / MO) * data['temperature_2m_K'])
        print("Added 'speed_of_sound' column.")
    
    else:
        print(f"Required columns ('surface_pressure' or 'temperature_2m') are missing in {filepath}.")
    
    # Save the new dataset with temp_avg to a new CSV file
    data.to_csv(new_filepath)
    print(f"New file saved at: {new_filepath}")

# Process both hourly and daily data
process_weather_data(HOURLY_FILEPATH, NEW_HOURLY_FILEPATH)
process_weather_data(DAILY_FILEPATH, NEW_DAILY_FILEPATH)


Added 'density' column.
Added 'speed_of_sound' column.


New file saved at: /home/bkelley/capstone/data_collection/weather/data/hourly_weather_with_temp_avg.csv
Required columns ('surface_pressure' or 'temperature_2m') are missing in /home/bkelley/capstone/data_collection/weather/data/cleaned_daily_weather_data.csv.


New file saved at: /home/bkelley/capstone/data_collection/weather/data/daily_weather_with_temp_avg.csv
