In [12]:
import pandas as pd
import xgboost as xgb
import numpy as np

# Load the dataset
df = pd.read_csv('/home/bkelley/capstone/data_collection/weather/data/hourly_weather_with_temp_avg.csv')

# Display the dataframe columns to confirm
print("DataFrame columns:", df.columns)

# Load the saved XGBoost model
xgb_model = xgb.Booster()
xgb_model.load_model('/home/bkelley/capstone/xgb_model_pres_temp.json')

# Define features based on the model (without 'date')
features = [
    'relative_humidity_2m', 'precipitation', 'rain', 
    'weather_code', 'surface_pressure', 'cloud_cover', 'wind_speed_10m',
    'wind_speed_100m', 'wind_direction_10m', 'wind_direction_100m', 
    'temperature_2m_K', 'surface_pressure_Pa', 'density', 'speed_of_sound'
]

# Prepare the data for prediction
X = df[features]

# One-hot encoding for 'weather_code' (if used during model training)
X = pd.get_dummies(X, columns=['weather_code'], drop_first=True)

# Ensure all columns used in training are present in the test data
# Add missing columns if necessary
missing_cols = set(xgb_model.feature_names) - set(X.columns)
for col in missing_cols:
    X[col] = 0  # Add missing columns as zeros

# Reorder columns to match the order used in training
X = X[xgb_model.feature_names]

# Create DMatrix for XGBoost prediction
dtest = xgb.DMatrix(X)

# Make predictions
predictions = xgb_model.predict(dtest)

# Assign predictions to the dataframe
df['predictions'] = predictions

# Create a date range for 15 days in the future (with hourly frequency)
future_dates = pd.date_range(start=df['date'].max(), periods=15*24, freq='h')

# Use the last 24 hours of data to extrapolate
future_data = df.iloc[-24:].copy()

# Repeat the last 24 hours data to cover 15 days
future_data = pd.concat([future_data] * 15, ignore_index=True)

# Assign future dates to the future data
future_data['date'] = future_dates

# Prepare future data for prediction (same process as earlier)
X_future = future_data[features]

# One-hot encoding for 'weather_code' in the future data
X_future = pd.get_dummies(X_future, columns=['weather_code'], drop_first=True)

# Ensure all columns used in training are present in the future data
missing_cols = set(X.columns) - set(X_future.columns)
for col in missing_cols:
    X_future[col] = 0  # Add missing columns as zeros

# Reorder columns to match the order used in training
X_future = X_future[X.columns]

# Create DMatrix for the future data
dtest_future = xgb.DMatrix(X_future)

# Make predictions for the next 15 days
future_predictions = xgb_model.predict(dtest_future)

# Add future predictions to the future_data dataframe
future_data['predictions'] = future_predictions

# Display the forecast for the next 15 days
# print(future_data[['date', 'predictions']])
print(future_data)
future_data.to_csv('/home/bkelley/capstone/data_collection/weather/data/model_forecast.csv')

DataFrame columns: Index(['date', 'temperature_2m', 'relative_humidity_2m', 'precipitation',
       'rain', 'weather_code', 'surface_pressure', 'cloud_cover',
       'wind_speed_10m', 'wind_speed_100m', 'wind_direction_10m',
       'wind_direction_100m', 'temperature_2m_K', 'surface_pressure_Pa',
       'density', 'speed_of_sound'],
      dtype='object')
                         date  temperature_2m  relative_humidity_2m  \
0   2024-10-12 23:00:00+00:00       14.535001             72.676550   
1   2024-10-13 00:00:00+00:00       14.485001             77.689730   
2   2024-10-13 01:00:00+00:00       14.285001             75.107925   
3   2024-10-13 02:00:00+00:00       13.785001             77.064150   
4   2024-10-13 03:00:00+00:00       13.385000             78.042060   
..                        ...             ...                   ...   
355 2024-10-27 18:00:00+00:00       24.734999             43.340990   
356 2024-10-27 19:00:00+00:00       25.035000             40.911613   
357 