# Import & installation

In [None]:
!pip install python-metar xgboost lightgbm scikit-learn pandas numpy matplotlib seaborn requests

Collecting python-metar
  Downloading python-metar-1.4.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: python-metar
  Building wheel for python-metar (setup.py) ... [?25l[?25hdone
  Created wheel for python-metar: filename=python_metar-1.4.0-py3-none-any.whl size=16926 sha256=c7efeb736c88c1d2bd99137f073274fa7d52cf1d0c479078a1f1d02ae4e84b92
  Stored in directory: /root/.cache/pip/wheels/1c/c7/33/370bed0725fd1aab6f731fd77dadfc7b66bdb6998909b7d8d0
Successfully built python-metar
Installing collected packages: python-metar
Successfully installed python-metar-1.4.0


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import requests
import warnings
warnings.filterwarnings('ignore')
from metar import Metar
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, classification_report
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
import lightgbm as lgb
import joblib
import re


np.random.seed(42)

In [None]:
# url_str = "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?network=IN__ASOS&station=" + input("Enter ICAO code\n") +"&data=metar&year1=2025&month1=8&day1=2&year2=2025&month2=8&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=null&trace=0.0001&direct=no&report_type=3&report_type=4"
# print(url_str)

In [None]:
url = "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?network=IN__ASOS&station=VIAR&data=metar&year1=2018&month1=1&day1=1&year2=2023&month2=12&day2=12&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=null&trace=0.0001&direct=no&report_type=3&report_type=4"
filename = "metar_data.csv"
response = requests.get(url)
with open(filename, "wb") as f:
    f.write(response.content)
print("Data downloaded and saved as", filename)

Data downloaded and saved as metar_data.csv


In [None]:
df = pd.read_csv('metar_data.csv')
print("Dataset shape:", df.shape)
print("\nColumn info:")
print(df.info())
print("\nFirst few rows:")
print(df.head())

Dataset shape: (94732, 3)

Column info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94732 entries, 0 to 94731
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   station  94732 non-null  object
 1   valid    94732 non-null  object
 2   metar    94732 non-null  object
dtypes: object(3)
memory usage: 2.2+ MB
None

First few rows:
  station             valid                                              metar
0    VIAR  2018-01-01 00:00  VIAR 010000Z 00000KT 0300 R34/0700 FG NSC 06/0...
1    VIAR  2018-01-01 00:30  VIAR 010030Z 00000KT 0300 R34/1000 FG NSC 06/0...
2    VIAR  2018-01-01 01:00  VIAR 010100Z 00000KT 0300 R34/0325 FG NSC 07/0...
3    VIAR  2018-01-01 01:30  VIAR 010130Z 00000KT 0400 R34/0450 FG NSC 06/0...
4    VIAR  2018-01-01 02:00  VIAR 010200Z 00000KT 0400 R34/1200 FG NSC 06/0...


#General Functions

In [None]:
def clean_metar_string(raw_metar: str) -> str:
    """
    Cleans METAR string to remove correction/amendment flags and unsupported fields.
    """
    metar = raw_metar.strip()

    # Remove METAR/SPECI, COR, AMD prefixes anywhere at start
    metar = re.sub(r'^(METAR|SPECI)?\s*(COR|AMD)?\s*', '', metar, flags=re.IGNORECASE)

    # Remove malformed RVRs like RMID/2000 or RM34/P2000
    metar = re.sub(r'R[M]?[A-Z0-9]{2,4}/P?\d{3,4}', '', metar)

    # Remove unsupported VV/// (vertical visibility unknown)
    metar = re.sub(r'VV///', '', metar)

    # Remove malformed runway groups that the parser can't handle
    metar = re.sub(r'R\d{2}/\d{4}', '', metar)

    # Remove double slashes (extra spacing)
    metar = re.sub(r'\s{2,}', ' ', metar)

    return metar.strip()

def parse_metar_comprehensive(metar_string):
    """
    Parse METAR string for aviation-related weather and ML feature extraction.
    """
    try:
        cleaned_metar = clean_metar_string(metar_string)
        report = Metar.Metar(cleaned_metar)
        parsed_data = {
            # 'station': report.station_id,
            # 'datetime': report.time if hasattr(report, 'time') else None,
            # 'raw_metar': metar_string,
            'wind_gust': report.wind_gust.value() if hasattr(report, 'wind_gust') and report.wind_gust else None,
            'wind_spd_kt': report.wind_speed.value() if hasattr(report, 'wind_speed') and report.wind_speed else None,
            'wind_dir_deg': report.wind_dir.value() if hasattr(report, 'wind_dir') and report.wind_dir else None,
            'vis_m': report.vis.value() if hasattr(report, 'vis') and report.vis else None,
            'temp_c': report.temp.value() if hasattr(report, 'temp') and report.temp else None,
            'dewpt_c': report.dewpt.value() if hasattr(report, 'dewpt') and report.dewpt else None,
            'pressure_hpa': report.press.value() if hasattr(report, 'press') and report.press else None,
            'sky': report.sky if hasattr(report, 'sky') and report.sky else [],
        }

        # Calculate dew point spread (fix field names)
        if parsed_data['temp_c'] is not None and parsed_data['dewpt_c'] is not None:
            parsed_data['dewpoint_spread'] = parsed_data['temp_c'] - parsed_data['dewpt_c']
        else:
            parsed_data['dewpoint_spread'] = None

        # Extract weather phenomena (winter conditions)
        weather_phenomena = str(report.weather) if hasattr(report, 'weather') and report.weather else ""
        # Tokenize weather phenomena
        weather_tokens = weather_phenomena.split()

        weather_phenomena = str(report.weather) if report.weather else ""
        parsed_data['has_fog'] = 1 if 'FG' in weather_phenomena else 0
        parsed_data['has_mist'] = 1 if 'BR' in weather_phenomena else 0
        parsed_data['has_haze'] = 1 if 'HZ' in weather_phenomena else 0
        parsed_data['has_rain'] = 1 if 'RA' in weather_phenomena else 0

        parsed_data['weather_phenomena'] = weather_phenomena

        # Extract raw representation of sky tuple list as 'sky_conditions'
        # sky_conditions = str(report.sky) if hasattr(report, 'sky') and report.sky else ""
        # parsed_data['sky_conditions'] = sky_conditions

        # Extract ceiling height from first OVC/BKN layer (if present)
        ceiling_height = None
        if hasattr(report, 'sky') and report.sky:
            for sky_layer in report.sky:
                if sky_layer[0] in ['OVC', 'BKN']:
                    ceiling_height = sky_layer[1].value() if sky_layer[1] else None
                    break
        parsed_data['ceiling_height_ft'] = ceiling_height

        # IFR/VFR flight category, using correct keys
        vis_m = parsed_data['vis_m']
        ceiling_ft = parsed_data['ceiling_height_ft']
        # Flight category determination
        if vis_m is not None and vis_m < 1600 or (ceiling_ft is not None and ceiling_ft < 500):
            flight_category = 'LIFR'
        elif vis_m is not None and vis_m < 4800 or (ceiling_ft is not None and ceiling_ft < 1000):
            flight_category = 'IFR'
        elif vis_m is not None and vis_m < 8000 or (ceiling_ft is not None and ceiling_ft < 3000):
            flight_category = 'MVFR'
        else:
            flight_category = 'VFR'
        parsed_data['flight_category'] = flight_category

        return parsed_data

    except Exception as e:
        print(f"Error parsing METAR: {metar_string}")
        print(f"Error details: {e}")
        return None


In [None]:
def analyze_dataset(df):
    """
    Analyzes the dataset for missing values, data types, and basic statistics.

    Args:
        df: pandas DataFrame
    """
    print("Dataset Shape:")
    print(df.shape)

    # print("\nData Types:")
    # print(df.dtypes)

    print("\nMissing Values Count out of: "+str(len(df)))
    print(df.isnull().sum())

    print("\nPercentage of Missing Values:")
    print((df.isnull().sum() / len(df)) * 100)

    # print("\nBasic Statistics:")
    # display(df.describe(include='all'))


In [None]:
from typing import List, Tuple, Optional, Union

def parse_metar_clouds(text: str) -> List[Union[Tuple[str, int, Optional[str]], str]]:
    """
    Parses cloud and weather trend info from a METAR string.

    Returns a list of:
    - Tuples: (cloud cover, base altitude in feet, CB if present)
    - Strings: "NSC" or "NOSIG" if present
    """
    pattern = r'\b(FEW|SCT|BKN|OVC)(\d{3})(CB)?\b'
    cloud_matches = re.findall(pattern, text)

    results = []
    for cover, height_str, cb in cloud_matches:
        height = int(height_str) * 100  # Convert to feet
        results.append((cover, height, cb if cb else None))

    # Look for NSC and NOSIG separately
    if re.search(r'\bNSC\b', text):
        results.append("NSC")

    if re.search(r'\bNOSIG\b', text):
        results.append("NOSIG")

    return results

In [None]:
def extract_sky_features(metar_string):
    """
    Extracts cloud layers and overcast information, and NSC/NOSIG from a METAR string.
    """
    parsed_results = parse_metar_clouds(metar_string)
    layer_data = {}
    cloud_layers = [item for item in parsed_results if isinstance(item, tuple)]

    for i, layer in enumerate(cloud_layers):
        layer_data[f'layer{i+1}_cover'] = layer[0]
        layer_data[f'layer{i+1}_height_ft'] = layer[1]
        if layer[2] == 'CB':
            layer_data[f'layer{i+1}_cb'] = 1
        else:
            layer_data[f'layer{i+1}_cb'] = 0

    # Initialize columns for up to 4 layers in case they are not present
    for i in range(1, 5):
        layer_data.setdefault(f'layer{i}_cover', None)
        layer_data.setdefault(f'layer{i}_height_ft', None)
        layer_data.setdefault(f'layer{i}_cb', 0)


    # Check for NSC and NOSIG
    layer_data['nsc'] = 0
    layer_data['nosig'] = 0
    if "NSC" in parsed_results:
        layer_data['layer1_cover'] = 'NSC'
        layer_data['nsc'] = 1
    if "NOSIG" in parsed_results:
        layer_data['nosig'] = 1

    # Check for overcast (OVC) in any layer
    layer_data['overcast'] = 0
    for layer in cloud_layers:
        if layer[0] == 'OVC':
            layer_data['overcast'] = 1
            break

    return layer_data

In [None]:
def add_lag_features(df, cols,lags=[1,2,3,6]):
    for col in cols:
        for lag in lags:
            df[f"{col}_lag{lag}"] = df[col].shift(lag)
    return df

In [None]:
def forecast_temperature(model, latest_data: pd.DataFrame, features: list, horizon=2):
    """
    model: trained RF model
    latest_data: dataframe with the most recent data points (at least enough for lags)
    features: list of features the model was trained on
    horizon: how many steps ahead to forecast (1=next 30min, 2=next 1h, etc.)
    """
    preds = []
    # Ensure we have enough data for the required lags and the forecast horizon
    required_length = max(6, horizon) + 1 # Max lag is 6, plus horizon, plus one for the current row
    temp_data = latest_data.tail(required_length).copy()


    for step in range(horizon):
        # Prepare the features for the current step's prediction
        X_latest = temp_data[features].iloc[[-1]]

        # Predict the next step's temperature
        y_pred = model.predict(X_latest)[0]
        preds.append(y_pred)

        # Update the temp_c column with the prediction for the next step
        # This is necessary to generate lags for subsequent predictions in the horizon
        next_row_index = temp_data.index[-1] + 1
        next_row = pd.Series(index=temp_data.columns)
        next_row['temp_c'] = y_pred # Update with prediction
        next_row['valid'] = temp_data['valid'].iloc[-1] + timedelta(minutes=30) # Increment time

        # Update cyclic features for the next time step
        next_valid_dt = pd.to_datetime(next_row['valid'])
        next_row['hour'] = next_valid_dt.hour
        next_row['minute'] = next_valid_dt.minute
        next_row['hour_fraction'] = (next_row['hour'] + next_row['minute'] / 60) / 24
        next_row['time_x'] = np.cos(2 * np.pi * next_row['hour_fraction'])
        next_row['time_y'] = np.sin(2 * np.pi * next_row['hour_fraction'])
        next_row['day_of_year'] = next_valid_dt.dayofyear
        next_row['day_x'] = np.cos(2 * np.pi * next_row['day_of_year'] / 365)
        next_row['day_y'] = np.sin(2 * np.pi * next_row['day_of_year'] / 365)


        # Shift lag features in the next row based on current temp_data
        for lag in [1, 2, 3, 6]:
            if f'temp_c_lag{lag}' in temp_data.columns:
                # Find the index in temp_data that corresponds to the value needed for the lag
                lag_index = -lag # Negative index from the end of temp_data
                if abs(lag_index) <= len(temp_data):
                    next_row[f'temp_c_lag{lag}'] = temp_data['temp_c'].iloc[lag_index]
                else:
                     # If not enough data in temp_data for this lag, use the last known value
                     next_row[f'temp_c_lag{lag}'] = temp_data['temp_c'].iloc[0]

        # Append the new row with the prediction and updated features for the next step
        next_row_df = pd.DataFrame([next_row], index=[next_row_index])
        temp_data = pd.concat([temp_data, next_row_df])
        temp_data = temp_data.tail(required_length) # Keep only the latest required rows


    return preds

#Extracting from Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
file_path = '/content/drive/My Drive/Colab_CSV/avproj_dataset.csv'
df2 = pd.read_csv(file_path)
print(f"DataFrame loaded from: {file_path}")

DataFrame loaded from: /content/drive/My Drive/Colab_CSV/avproj_dataset.csv


#Models

#Load Final Model

##Temp

###RF

In [None]:
# Load the saved model from Google Drive
file_path = '/content/drive/My Drive/Colab_Models/Aviation_Models/rf_temp_forecast.pkl'
rf_final_temp = joblib.load(file_path)
print(f"Model loaded from: {file_path}")

Model loaded from: /content/drive/My Drive/Colab_Models/Aviation_Models/rf_temp_forecast.pkl


###XGB

In [None]:
# Load the saved XGBoost model from Google Drive
file_path = '/content/drive/My Drive/Colab_Models/Aviation_Models/xgb_temp_forecast.pkl'
xgb_final_temp = joblib.load(file_path)
print(f"XGBoost model loaded from: {file_path}")

XGBoost model loaded from: /content/drive/My Drive/Colab_Models/Aviation_Models/xgb_temp_forecast.pkl


#Mock Prediction

##Mock Pred Temp

###Input and Preprocessing

In [None]:
from datetime import datetime

date_str = input("Enter the date (YYYY-MM-DD): ")
time_str = input("Enter the time (HH:MM:SS): ")
input_datetime_str = f"{date_str} {time_str}"

try:
    input_datetime = datetime.strptime(input_datetime_str, '%Y-%m-%d %H:%M:%S')
    print(f"You entered: {input_datetime}")
except ValueError:
    print("Invalid date or time format. Please use YYYY-MM-DD for date and HH:MM:SS for time.")

Enter the date (YYYY-MM-DD): 2025-09-10
Enter the time (HH:MM:SS): 17:30:00
You entered: 2025-09-10 17:30:00


In [None]:
from datetime import timedelta

# Use the input_datetime from the previous cell (assuming it's defined)
# Calculate the start date (one day before the input date)
start_datetime = input_datetime - timedelta(days=1)

# Calculate the end date (one day after the input date)
end_datetime = input_datetime + timedelta(days=1)


# Extract year, month, and day for the start and end dates
year1 = start_datetime.year
month1 = start_datetime.month
day1 = start_datetime.day

year2 = end_datetime.year
month2 = end_datetime.month
day2 = end_datetime.day

# Construct the URL
url = f"https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?network=IN__ASOS&station=VIAR&data=metar&year1={year1}&month1={month1}&day1={day1}&year2={year2}&month2={month2}&day2={day2}&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=null&trace=0.0001&direct=no&report_type=3&report_type=4"
filename = "metar_forecast_data.csv" # Use a different filename to avoid overwriting
response = requests.get(url)
with open(filename, "wb") as f:
    f.write(response.content)
print("Data downloaded and saved as", filename)

# Load the downloaded data into a DataFrame
df_forecast = pd.read_csv(filename)

# Apply preprocessing and feature engineering steps similar to df2
metar_attributes_fc = df_forecast['metar'].apply(parse_metar_comprehensive).tolist()
metar_attributes_fc = [item for item in metar_attributes_fc if item is not None]
attributes_df_fc = pd.DataFrame(metar_attributes_fc)
df_forecast = pd.concat([df_forecast, attributes_df_fc], axis=1)

sky_features_fc = df_forecast['metar'].apply(extract_sky_features).tolist()
sky_features_df_fc = pd.DataFrame(sky_features_fc)
df_forecast = df_forecast.drop(columns=['sky', 'cloud'], errors='ignore')
sky_feature_columns_to_drop_fc = [col for col in df_forecast.columns if any(f'layer{i}' in col or col in ['overcast', 'nsc', 'nosig'] for i in range(1, 5))]
df_forecast = df_forecast.drop(columns=sky_feature_columns_to_drop_fc, errors='ignore')
df_forecast = pd.concat([df_forecast, sky_features_df_fc], axis=1)

# Use the same columns_to_drop defined earlier (assuming it's in scope)
columns_to_drop = [
    'wind_gust',
    'layer1_cb',
    'layer2_cover', 'layer2_height_ft', 'layer2_cb',
    'layer3_cover', 'layer3_height_ft', 'layer3_cb',
    'layer4_cover', 'layer4_height_ft', 'layer4_cb',
    'layer5_cover', 'layer5_height_ft', 'layer5_cb',
    'ceiling_height_ft', 'layer1_cb',
]
df_forecast = df_forecast.drop(columns=columns_to_drop, errors='ignore')

cols_to_interpolate_linear = ['temp_c', 'dewpt_c', 'pressure_hpa', 'dewpoint_spread', 'wind_spd_kt','vis_m' ]
for col in cols_to_interpolate_linear:
    df_forecast[col].interpolate(method='linear', inplace=True)

cols_to_fill_fb = ['has_fog', 'has_mist', 'has_haze', 'has_rain']
for col in cols_to_fill_fb:
    df_forecast[col].ffill(inplace=True)
for col in cols_to_fill_fb:
    df_forecast[col].bfill(inplace=True)

df_forecast['wind_dir_deg'].fillna(360, inplace=True)
df_forecast.loc[(df_forecast['nsc'] == 1) & (df_forecast['layer1_height_ft'].isnull()), 'layer1_height_ft'] = 25000
df_forecast['layer1_height_ft'].interpolate(method='linear', inplace=True)
df_forecast['flight_category'].ffill(inplace=True)

# Apply lag features - make sure add_lag_features is defined earlier
df_forecast = add_lag_features(df_forecast, ['temp_c', 'dewpt_c', 'vis_m', 'wind_spd_kt'])
cols_to_fill_lag = [col for col in df_forecast.columns if '_lag' in col] # Dynamically get lag columns
for col in cols_to_fill_lag:
    df_forecast[col].bfill(inplace=True)
    for col in cols_to_fill_lag: # Added ffill after bfill
        df_forecast[col].ffill(inplace=True)


# Apply cyclic features - make sure cyclic feature logic is defined earlier
df_forecast['valid'] = pd.to_datetime(df_forecast['valid'])
df_forecast['wind_dir_rad'] = np.deg2rad(df_forecast['wind_dir_deg'])
df_forecast['wind_x'] = np.cos(df_forecast['wind_dir_rad'])
df_forecast['wind_y'] = np.sin(df_forecast['wind_dir_rad'])

df_forecast['hour'] = df_forecast['valid'].dt.hour
df_forecast['minute'] = df_forecast['valid'].dt.minute
df_forecast['hour_fraction'] = (df_forecast['hour'] + df_forecast['minute'] / 60) / 24
df_forecast['time_x'] = np.cos(2 * np.pi * df_forecast['hour_fraction'])
df_forecast['time_y'] = np.sin(2 * np.pi * df_forecast['hour_fraction'])

df_forecast['day_of_year'] = df_forecast['valid'].dt.dayofyear
df_forecast['day_x'] = np.cos(2 * np.pi * df_forecast['day_of_year'] / 365)
df_forecast['day_y'] = np.sin(2 * np.pi * df_forecast['day_of_year'] / 365)

print("\nForecast data downloaded and preprocessed.")

Data downloaded and saved as metar_forecast_data.csv

Forecast data downloaded and preprocessed.


In [None]:
# Filter df_forecast to include data up to the input timestamp
df_forecast_filtered = df_forecast[df_forecast['valid'] <= input_datetime].copy()

# Calculate the timestamp 12 hours before the input datetime
twelve_hours_ago = input_datetime - timedelta(hours=12)

# Further filter to keep only the last 12 hours of data
df_forecast_filtered = df_forecast_filtered[df_forecast_filtered['valid'] >= twelve_hours_ago].copy()

print(f"Original df_forecast shape: {df_forecast.shape}")
print(f"Filtered df_forecast_filtered shape (last 12 hours): {df_forecast_filtered.shape}")

Original df_forecast shape: (72, 48)
Filtered df_forecast_filtered shape (last 12 hours): (25, 48)


###Forecast

In [None]:
# Ensure df_test_filtered has at least enough rows for the maximum lag (6 in this case)
if len(df_forecast_filtered) > 6:
    # Define the features used during training
    features_for_prediction = ['wind_spd_kt', 'pressure_hpa', 'temp_c_lag1', 'temp_c_lag2', 'temp_c_lag3', 'temp_c_lag6',
                'dewpt_c_lag1', 'dewpt_c_lag2', 'dewpt_c_lag3', 'dewpt_c_lag6', 'time_x', 'time_y', 'day_of_year', 'day_x', 'day_y']

    forecasted_temps = forecast_temperature(rf_final_temp, df_forecast_filtered, features_for_prediction, horizon=2)

    # Calculate the forecast timestamp (1 hour after the last timestamp in filtered data)
    last_timestamp = df_forecast_filtered['valid'].iloc[-1]
    forecast_timestamp = last_timestamp + timedelta(hours=1)

    print(f"\nForecast for {forecast_timestamp}: {forecasted_temps[-1]:.2f} °C")
else:
    print("df_test_filtered does not have enough data points for forecasting with the specified lags.")


Forecast for 2025-09-10 18:30:00: 27.04 °C


In [None]:
# Get the last two rows of the filtered forecast DataFrame
last_two_rows = df_forecast_filtered.tail(3)

# Print the valid timestamp and temp_c for these rows
print("Last two temperature values from filtered forecast data:")
for index, row in last_two_rows.iterrows():
    print(f"Time: {row['valid']}, Temperature: {row['temp_c']}")

Last two temperature values from filtered forecast data:
Time: 2025-09-10 16:30:00, Temperature: 27.0
Time: 2025-09-10 17:00:00, Temperature: 27.0
Time: 2025-09-10 17:30:00, Temperature: 27.0


In [None]:
# Ensure df_test_filtered has at least enough rows for the maximum lag (6 in this case)
if len(df_forecast_filtered) > 6:
    # Define the features used during training
    features_for_prediction = ['wind_spd_kt', 'pressure_hpa', 'temp_c_lag1', 'temp_c_lag2', 'temp_c_lag3', 'temp_c_lag6',
                'dewpt_c_lag1', 'dewpt_c_lag2', 'dewpt_c_lag3', 'dewpt_c_lag6', 'time_x', 'time_y', 'day_of_year', 'day_x', 'day_y']

    # Forecast using Random Forest model
    rf_forecasted_temps = forecast_temperature(rf_final_temp, df_forecast_filtered, features_for_prediction, horizon=2)

    # Forecast using XGBoost model
    xgb_forecasted_temps = forecast_temperature(xgb_final_temp, df_forecast_filtered, features_for_prediction, horizon=2)


    # Calculate the forecast timestamp (1 hour after the last timestamp in filtered data)
    last_timestamp = df_forecast_filtered['valid'].iloc[-1]
    forecast_timestamp = last_timestamp + timedelta(hours=1)

    print(f"\nRandom Forest Forecast for {forecast_timestamp}: {rf_forecasted_temps[-1]:.2f} °C")
    print(f"XGBoost Forecast for {forecast_timestamp}: {xgb_forecasted_temps[-1]:.2f} °C")
else:
    print("df_test_filtered does not have enough data points for forecasting with the specified lags.")


Random Forest Forecast for 2025-09-10 18:30:00: 27.04 °C
XGBoost Forecast for 2025-09-10 18:30:00: 23.89 °C
