In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense



In [11]:
def load_and_preprocess_data():
    file_2022 = pd.read_excel('Region_4A_DA_MORONG_RIZAL_1-1-22_12-00_AM_1_Year_1732084536_v2.xlsx')
    file_2023 = pd.read_excel('Region_4A_DA_MORONG_RIZAL_1-1-23_12-00_AM_1_Year_1732084578_v2.xlsx')
    file_2024 = pd.read_excel('Region_4A_DA_MORONG_RIZAL_1-1-24_12-00_AM_1_Year_1732084607_v2.xlsx')

    cols_needed = ['Date & Time', 'Rain - mm', 'Rain Rate - mm/h']
    df_2022 = file_2022[cols_needed]
    df_2023 = file_2023[cols_needed]
    df_2024 = file_2024[cols_needed]

    combined_df = pd.concat([df_2022, df_2023, df_2024], ignore_index=True)

    combined_df['Date & Time'] = pd.to_datetime(combined_df['Date & Time'], format='%m/%d/%y %I:%M %p')
    combined_df = combined_df.sort_values(by='Date & Time')
    combined_df['Rain - mm'] = combined_df['Rain - mm'].fillna(0)
    combined_df['Rain Rate - mm/h'] = combined_df['Rain Rate - mm/h'].fillna(0)

    combined_df.set_index('Date & Time', inplace=True)
    print("Combined Data Head:")
    print(combined_df.head())
    print("\nSummary of Combined Data:")
    print(combined_df.describe())
    return combined_df

In [15]:
def resample_data(df):
    resampled_15min = df.resample('15T').mean()
    resampled_1hr = df.resample('1H').mean()
    resampled_2hr = df.resample('2H').mean()
    return resampled_15min, resampled_1hr, resampled_2hr

# Creating sequences for LSTM input
def create_sequences(data, n_past, n_future):
    sequences, labels = [], []
    for i in range(len(data) - n_past - n_future):
        seq = data.iloc[i:i + n_past].values
        label = data.iloc[i + n_past:i + n_past + n_future]['Rain Rate - mm/h'].mean()
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

In [17]:
def build_and_train_model(X, y, input_shape):
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(32, return_sequences=False),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    history = model.fit(X, y, epochs=20, batch_size=32, validation_split=0.2, verbose=1)
    return model, history


In [19]:
def evaluate_and_visualize_model(model, X_test, y_test, title):
    predictions = model.predict(X_test).flatten()
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    accuracy = (1 - np.mean(np.abs((y_test - predictions) / y_test))) * 100

    print(f"{title} Metrics:")
    print(f"  RMSE: {rmse:.4f}")
    print(f"  MAE: {mae:.4f}")
    print(f"  R²: {r2:.4f}")
    print(f"  Accuracy: {accuracy:.2f}%")

    plt.figure(figsize=(10, 6))
    plt.plot(y_test, label='Actual', color='blue', alpha=0.6)
    plt.plot(predictions, label='Predicted', color='red', alpha=0.6)
    plt.title(f'{title} - Actual vs Predicted')
    plt.xlabel('Time Steps')
    plt.ylabel('Rainfall Rate')
    plt.legend()
    plt.grid()
    plt.show()

    return predictions

In [21]:
def predict_future(model, recent_data, n_past):
    recent_sequence = recent_data[-n_past:].values.reshape(1, n_past, recent_data.shape[1])
    prediction = model.predict(recent_sequence).flatten()
    return prediction