In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dataset/dataset_harga_beras.xlsx
/kaggle/input/dataset/dataset_hama.xlsx
/kaggle/input/dataset/dataset_rainfall_all.xlsx


**GRID SEARCH PREDIKSI LSTM**

**UNIVARIATE**

**GRID SEARCH 3 TAHUN**

In [2]:
# --- Import Library ---
import pandas as pd
import numpy as np
import itertools
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

# --- Load Data ---
df = pd.read_excel("/kaggle/input/dataset/dataset_rainfall_all.xlsx", sheet_name="Sheet3")
df['tanggal'] = pd.to_datetime(df['tanggal'], dayfirst=True)
df = df.sort_values('tanggal')
target = 'rr'
df[[target]] = df[[target]].interpolate(method='linear').bfill().ffill()

# --- Normalize ---
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(df[[target]])  # univariate input
y_scaled = scaler_y.fit_transform(df[[target]])

# --- Dataset Creator Function ---
def create_dataset(X, y, lookback=12):
    Xs, ys = [], []
    for i in range(lookback, len(X)):
        Xs.append(X[i-lookback:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

# --- Grid Search Parameters ---
param_grid = {
    'lookback': [12, 30],
    'dropout': [0.2, 0.3],
    'batch_size': [16, 32]
}

results = []

# --- Start Grid Search ---
for lookback, dropout, batch_size in itertools.product(param_grid['lookback'], param_grid['dropout'], param_grid['batch_size']):
    # Create dataset
    X_seq, y_seq = create_dataset(X_scaled, y_scaled, lookback=lookback)
    train_size = int(0.8 * len(X_seq))
    X_train, X_test = X_seq[:train_size], X_seq[train_size:]
    y_train, y_test = y_seq[:train_size], y_seq[train_size:]

    # Build model
    model = Sequential([
        LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(dropout),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss=MeanSquaredError())

    # Train model
    start_time = time.time()
    model.fit(X_train, y_train, epochs=50, batch_size=batch_size, validation_split=0.1, verbose=0)
    train_time = time.time() - start_time

    # Predict and Evaluate
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_test)

    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    smape = 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-8))

    results.append({
        'Lookback': lookback,
        'Dropout': dropout,
        'Batch Size': batch_size,
        'MAE': mae,
        'R²': r2,
        'SMAPE': smape,
        'Training Time (s)': train_time
    })

# --- Show Results ---
df_results = pd.DataFrame(results).sort_values(by='MAE')
print(df_results)


2025-04-30 16:27:15.366804: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746030435.788171      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746030435.900552      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
I0000 00:00:1746030455.508636      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1746030455.509474      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability:

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  
   Lookback  Dropout  Batch Size        MAE        R²       SMAPE  \
1        12      0.2          32  14.542596  0.005201  124.326938   
5        30      0.2          32  14.694975  0.005261  124.012540   
2        12      0.3          16  14.916420  0.012662  124.056594   
6        30      0.3          16  14.934864  0.015358  124.142301   
0        12      0.2          16  15.140609  0.015689  123.956303   
3        12      0.3          32  15.183188  0.014280  123.928187   
4        30      0.2          16  15.235019  0.011273  124.140594   
7        30      0.3          32  15.322222  0.016048  124.062871   

   Training Time (s)  
1          12.611589  
5          13.929643  
2          20.855387  
6          22.545747  
0          23.869766  
3          12.200866  
4          22.959343  
7          13.731447  


**GRID SEARCH 5 TAHUN**

In [3]:
# --- Import Library ---
import pandas as pd
import numpy as np
import itertools
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

# --- Load Data ---
df = pd.read_excel("/kaggle/input/dataset/dataset_rainfall_all.xlsx", sheet_name="Sheet2")
df['tanggal'] = pd.to_datetime(df['tanggal'], dayfirst=True)
df = df.sort_values('tanggal')
target = 'rr'
df[[target]] = df[[target]].interpolate(method='linear').bfill().ffill()

# --- Normalize ---
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(df[[target]])  # univariate input
y_scaled = scaler_y.fit_transform(df[[target]])

# --- Dataset Creator Function ---
def create_dataset(X, y, lookback=12):
    Xs, ys = [], []
    for i in range(lookback, len(X)):
        Xs.append(X[i-lookback:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

# --- Grid Search Parameters ---
param_grid = {
    'lookback': [12, 30],
    'dropout': [0.2, 0.3],
    'batch_size': [16, 32]
}

results = []

# --- Start Grid Search ---
for lookback, dropout, batch_size in itertools.product(param_grid['lookback'], param_grid['dropout'], param_grid['batch_size']):
    # Create dataset
    X_seq, y_seq = create_dataset(X_scaled, y_scaled, lookback=lookback)
    train_size = int(0.8 * len(X_seq))
    X_train, X_test = X_seq[:train_size], X_seq[train_size:]
    y_train, y_test = y_seq[:train_size], y_seq[train_size:]

    # Build model
    model = Sequential([
        LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(dropout),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss=MeanSquaredError())

    # Train model
    start_time = time.time()
    model.fit(X_train, y_train, epochs=50, batch_size=batch_size, validation_split=0.1, verbose=0)
    train_time = time.time() - start_time

    # Predict and Evaluate
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_test)

    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    smape = 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-8))

    results.append({
        'Lookback': lookback,
        'Dropout': dropout,
        'Batch Size': batch_size,
        'MAE': mae,
        'R²': r2,
        'SMAPE': smape,
        'Training Time (s)': train_time
    })

# --- Show Results ---
df_results = pd.DataFrame(results).sort_values(by='MAE')
print(df_results)


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
   Lookback  Dropout  Batch Size        MAE        R²       SMAPE  \
4        30      0.2          16  15.537996 -0.012262  120.156806   
3        12      0.3          32  15.650492 -0.004449  120.749803   
6        30      0.3          16  15.724326 -0.009287  120.171325   
2        12      0.3          16  15.735584 -0.001782  120.493798   
1        12      0.2          32  15.799854 -0.002462  120.540730   
0        12      0.2          16  15.875744  0.002670  120.232541   
7        30      0.3          32  15.894125 -0.006749  120.252114   
5        30      0.2          32  16.454274 -0.000763  119.673653   

   Training Time (s)  
4          15.742198  
3          10.058932  
6          15.311660  
2          14.288546  
1          10.432821  
0          14.122482  
7           9.840466  
5          10.351324  


**GRID SEARCH 10 TAHUN**

In [4]:
# --- Import Library ---
import pandas as pd
import numpy as np
import itertools
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

# --- Load Data ---
df = pd.read_excel("/kaggle/input/dataset/dataset_rainfall_all.xlsx", sheet_name="Sheet1")
df['tanggal'] = pd.to_datetime(df['tanggal'], dayfirst=True)
df = df.sort_values('tanggal')
target = 'rr'
df[[target]] = df[[target]].interpolate(method='linear').bfill().ffill()

# --- Normalize ---
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(df[[target]])  # univariate input
y_scaled = scaler_y.fit_transform(df[[target]])

# --- Dataset Creator Function ---
def create_dataset(X, y, lookback=12):
    Xs, ys = [], []
    for i in range(lookback, len(X)):
        Xs.append(X[i-lookback:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

# --- Grid Search Parameters ---
param_grid = {
    'lookback': [12, 30],
    'dropout': [0.2, 0.3],
    'batch_size': [16, 32]
}

results = []

# --- Start Grid Search ---
for lookback, dropout, batch_size in itertools.product(param_grid['lookback'], param_grid['dropout'], param_grid['batch_size']):
    # Create dataset
    X_seq, y_seq = create_dataset(X_scaled, y_scaled, lookback=lookback)
    train_size = int(0.8 * len(X_seq))
    X_train, X_test = X_seq[:train_size], X_seq[train_size:]
    y_train, y_test = y_seq[:train_size], y_seq[train_size:]

    # Build model
    model = Sequential([
        LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(dropout),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss=MeanSquaredError())

    # Train model
    start_time = time.time()
    model.fit(X_train, y_train, epochs=50, batch_size=batch_size, validation_split=0.1, verbose=0)
    train_time = time.time() - start_time

    # Predict and Evaluate
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_test)

    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    smape = 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-8))

    results.append({
        'Lookback': lookback,
        'Dropout': dropout,
        'Batch Size': batch_size,
        'MAE': mae,
        'R²': r2,
        'SMAPE': smape,
        'Training Time (s)': train_time
    })

# --- Show Results ---
df_results = pd.DataFrame(results).sort_values(by='MAE')
print(df_results)


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
   Lookback  Dropout  Batch Size        MAE        R²       SMAPE  \
0        12      0.2          16  14.638315  0.013059  124.900001   
7        30      0.3          32  14.644496  0.011624  124.866582   
1        12      0.2          32  14.950167  0.017566  124.868286   
2        12      0.3          16  15.139925  0.020320  124.863451   
3        12      0.3          32  15.424836  0.021094  124.921483   
4        30      0.2          16  15.536839  0.022498  124.847321   
5        30      0.2          32  15.628333  0.021428  124.992296   
6        30      0.3          16  15.650111  0.021871  124.939893   

   Training Time (s)  
0          35.032898  
7          22.155160  
1          19.914841  
2          35.362849  
3          19.780876  
4          38.952059  
5          22.471766  
6          38.612366  


---

**MULTIVARIATE**

**GRID SEARCH 3 TAHUN**

In [5]:
# --- Import Library ---
import pandas as pd
import numpy as np
import itertools
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

# --- Load Data ---
df = pd.read_excel("/kaggle/input/dataset/dataset_rainfall_all.xlsx", sheet_name="Sheet2")
df['tanggal'] = pd.to_datetime(df['tanggal'], dayfirst=True)
df = df.sort_values('tanggal')
features = ['tavg', 'rh_avg', 'ss']
target = 'rr'
df[features + [target]] = df[features + [target]].interpolate(method='linear').bfill().ffill()

# --- Normalize ---
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(df[features])
y_scaled = scaler_y.fit_transform(df[[target]])

# --- Dataset Creator Function ---
def create_dataset(X, y, lookback=12):
    Xs, ys = [], []
    for i in range(lookback, len(X)):
        Xs.append(X[i-lookback:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

# --- Grid Search Parameters ---
param_grid = {
    'lookback': [12, 30],
    'dropout': [0.2, 0.3],
    'batch_size': [16, 32]
}

results = []

# --- Start Grid Search ---
for lookback, dropout, batch_size in itertools.product(param_grid['lookback'], param_grid['dropout'], param_grid['batch_size']):
    # Create dataset
    X_seq, y_seq = create_dataset(X_scaled, y_scaled, lookback=lookback)
    train_size = int(0.8 * len(X_seq))
    X_train, X_test = X_seq[:train_size], X_seq[train_size:]
    y_train, y_test = y_seq[:train_size], y_seq[train_size:]

    # Build model
    model = Sequential([
        LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(dropout),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss=MeanSquaredError())

    # Train model
    start_time = time.time()
    model.fit(X_train, y_train, epochs=50, batch_size=batch_size, validation_split=0.1, verbose=0)
    train_time = time.time() - start_time

    # Predict and Evaluate
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_test)

    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    smape = 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-8))

    results.append({
        'Lookback': lookback,
        'Dropout': dropout,
        'Batch Size': batch_size,
        'MAE': mae,
        'R²': r2,
        'SMAPE': smape,
        'Training Time (s)': train_time
    })

# --- Show Results ---
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by='MAE')
print(df_results)

  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
   Lookback  Dropout  Batch Size        MAE        R²       SMAPE  \
3        12      0.3          32  14.338625  0.132163  122.160602   
0        12      0.2          16  14.532080  0.146894  121.811515   
6        30      0.3          16  14.921598  0.150548  120.570099   
1        12      0.2          32  15.047448  0.153085  121.010247   
7        30      0.3          32  16.037856  0.143876  120.411707   
5        30      0.2          32  16.157135  0.144819  120.962233   
4        30      0.2          16  16.380881  0.146116  121.124676   
2        12      0.3          16  16.671095  0.132344  121.619490   

   Training Time (s)  
3           8.985124  
0          14.701552  
6          14.180372  
1           9.425845  
7           9.254487  
5           9.801531  
4          14.264577  
2          15.440305  


**GRID SEARCH 5 TAHUN**

In [6]:
# Import Library 
import pandas as pd
import numpy as np
import itertools
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

# --- Load Data ---
df = pd.read_excel("/kaggle/input/dataset/dataset_rainfall_all.xlsx", sheet_name="Sheet3")
df['tanggal'] = pd.to_datetime(df['tanggal'], dayfirst=True)
df = df.sort_values('tanggal')
features = ['tavg', 'rh_avg', 'ss']
target = 'rr'
df[features + [target]] = df[features + [target]].interpolate(method='linear').bfill().ffill()

# --- Normalize ---
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(df[features])
y_scaled = scaler_y.fit_transform(df[[target]])

# Dataset Creator Function ---
def create_dataset(X, y, lookback=12):
    Xs, ys = [], []
    for i in range(lookback, len(X)):
        Xs.append(X[i-lookback:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

# --- Grid Search Parameters ---
param_grid = {
    'lookback': [12, 30],
    'dropout': [0.2, 0.3],
    'batch_size': [16, 32]
}

results = []

# --- Start Grid Search ---
for lookback, dropout, batch_size in itertools.product(param_grid['lookback'], param_grid['dropout'], param_grid['batch_size']):
    # Create dataset
    X_seq, y_seq = create_dataset(X_scaled, y_scaled, lookback=lookback)
    train_size = int(0.8 * len(X_seq))
    X_train, X_test = X_seq[:train_size], X_seq[train_size:]
    y_train, y_test = y_seq[:train_size], y_seq[train_size:]

    # Build model
    model = Sequential([
        LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(dropout),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss=MeanSquaredError())

    # Train model
    start_time = time.time()
    model.fit(X_train, y_train, epochs=50, batch_size=batch_size, validation_split=0.1, verbose=0)
    train_time = time.time() - start_time

    # Predict and Evaluate
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_test)

    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    smape = 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-8))

    results.append({
        'Lookback': lookback,
        'Dropout': dropout,
        'Batch Size': batch_size,
        'MAE': mae,
        'R²': r2,
        'SMAPE': smape,
        'Training Time (s)': train_time
    })

# --- Show Results ---
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by='MAE')
print(df_results)

  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step  
   Lookback  Dropout  Batch Size        MAE        R²       SMAPE  \
0        12      0.2          16  12.941305  0.163002  124.286437   
6        30      0.3          16  13.067986  0.167662  124.407826   
3        12      0.3          32  13.077436  0.166205  124.307765   
1        12      0.2          32  13.087063  0.168765  124.626095   
4        30      0.2          16  13.246615  0.169458  124.849399   
5        30      0.2          32  14.098236  0.173855  123.791917   
2        12      0.3          16  14.160307  0.173885  123.931785   
7        30      0.3          32  14.359758  0.171865  124.211895   

   Training Time (s)  
0          19.762174  
6          20.439833  
3          11.439328  
1          11.786331  
4          21.313179  
5          12.886794  
2          18.920758  
7          13.145250  


**GRID SEARCH 10 TAHUN**

In [7]:
# --- Import Library ---
import pandas as pd
import numpy as np
import itertools
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

# --- Load Data ---
df = pd.read_excel("/kaggle/input/dataset/dataset_rainfall_all.xlsx", sheet_name="Sheet1")
df['tanggal'] = pd.to_datetime(df['tanggal'], dayfirst=True)
df = df.sort_values('tanggal')
features = ['tavg', 'rh_avg', 'ss']
target = 'rr'
df[features + [target]] = df[features + [target]].interpolate(method='linear').bfill().ffill()

# --- Normalize ---
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(df[features])
y_scaled = scaler_y.fit_transform(df[[target]])

# --- Dataset Creator Function ---
def create_dataset(X, y, lookback=12):
    Xs, ys = [], []
    for i in range(lookback, len(X)):
        Xs.append(X[i-lookback:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

# --- Grid Search Parameters ---
param_grid = {
    'lookback': [12, 30],
    'dropout': [0.2, 0.3],
    'batch_size': [16, 32]
}

results = []

# --- Start Grid Search ---
for lookback, dropout, batch_size in itertools.product(param_grid['lookback'], param_grid['dropout'], param_grid['batch_size']):
    # Create dataset
    X_seq, y_seq = create_dataset(X_scaled, y_scaled, lookback=lookback)
    train_size = int(0.8 * len(X_seq))
    X_train, X_test = X_seq[:train_size], X_seq[train_size:]
    y_train, y_test = y_seq[:train_size], y_seq[train_size:]

    # Build model
    model = Sequential([
        LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(dropout),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss=MeanSquaredError())

    # Train model
    start_time = time.time()
    model.fit(X_train, y_train, epochs=50, batch_size=batch_size, validation_split=0.1, verbose=0)
    train_time = time.time() - start_time

    # Predict and Evaluate
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_test)

    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    smape = 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-8))

    results.append({
        'Lookback': lookback,
        'Dropout': dropout,
        'Batch Size': batch_size,
        'MAE': mae,
        'R²': r2,
        'SMAPE': smape,
        'Training Time (s)': train_time
    })

# --- Show Results ---
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by='MAE')
print(df_results)

  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
   Lookback  Dropout  Batch Size        MAE        R²       SMAPE  \
3        12      0.3          32  12.935038  0.162575  125.756283   
0        12      0.2          16  12.974633  0.167788  125.784869   
5        30      0.2          32  12.996346  0.163461  125.181096   
7        30      0.3          32  13.134091  0.169783  124.808260   
6        30      0.3          16  13.295037  0.173908  124.565303   
2        12      0.3          16  13.549940  0.179643  125.162030   
4        30      0.2          16  13.618660  0.177980  124.795248   
1        12      0.2          32  13.693558  0.180828  125.133210   

   Training Time (s)  
3          18.717557  
0          32.346066  
5          20.232774  
7          20.307832  
6          36.841804  
2          33.300921  
4          35.573031  
1          17.988586  


----


**GRID SEARCH CLASSIFICATION**

In [8]:
# --- Import Libraries ---
import pandas as pd
import numpy as np
import random
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from time import time
import itertools

# --- Set Seed untuk Reproducibility ---
SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

# --- Load & Preprocess Data ---
df = pd.read_excel("/kaggle/input/dataset/dataset_rainfall_all.xlsx", sheet_name="Sheet1")
df['tanggal'] = pd.to_datetime(df['tanggal'], dayfirst=True)
df = df.sort_values('tanggal').reset_index(drop=True)

features = ['tavg', 'rh_avg', 'ss']
target = 'rr'

# Interpolate missing values
df[features + [target]] = df[features + [target]].interpolate(method='linear').bfill().ffill()

# Create classification target
df['rr_label'] = df['rr'].apply(lambda x: 1 if x >= 5 else 0)

# Normalize inputs
scaler_x = MinMaxScaler()
X_scaled = scaler_x.fit_transform(df[features])
y_binary = df['rr_label'].values

# Create sequence dataset
def create_dataset(X, y, lookback=12):
    Xs, ys = [], []
    for i in range(lookback, len(X)):
        Xs.append(X[i-lookback:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

lookback = 12
X_seq, y_seq = create_dataset(X_scaled, y_binary, lookback=lookback)
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False, random_state=SEED)

# --- Define Hyperparameter Space ---
param_grid = {
    'units': [64, 128],
    'dropout': [0.2, 0.3],
    'learning_rate': [0.001, 0.0005],
    'batch_size': [32, 64]
}

# --- Grid Search ---
print("Starting Grid Search...")

best_f1 = -1
best_model = None
best_params = {}

# Create all combinations of parameters
param_combinations = list(itertools.product(
    param_grid['units'],
    param_grid['dropout'],
    param_grid['learning_rate'],
    param_grid['batch_size']
))

for idx, (units, dropout, lr, batch_size) in enumerate(param_combinations):
    print(f"\nTraining model {idx+1}/{len(param_combinations)}: units={units}, dropout={dropout}, lr={lr}, batch={batch_size}")
    
    model = Sequential([
        LSTM(units, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(dropout),
        LSTM(units//2),
        Dropout(dropout),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer=Adam(learning_rate=lr), loss='binary_crossentropy', metrics=['accuracy'])
    
    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
    start_time = time()
    model.fit(X_train, y_train, epochs=100, batch_size=batch_size, validation_split=0.1, callbacks=[early_stop], verbose=0)
    training_time = time() - start_time

    # Evaluate
    y_pred_prob = model.predict(X_test)
    y_pred = (y_pred_prob >= 0.5).astype(int)
    
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    print(f"Model {idx+1} Results - Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}, Training Time: {training_time:.2f}s")
    
    # Save best model
    if f1 > best_f1:
        best_f1 = f1
        best_model = model
        best_params = {
            'units': units,
            'dropout': dropout,
            'learning_rate': lr,
            'batch_size': batch_size,
            'f1_score': f1
        }

print("\n--- Best Model Summary ---")
print(f"Best Hyperparameters: {best_params}")
print(f"Best F1 Score: {best_f1:.4f}")

# --- Save Best Model ---
best_model.save("best_lstm_classification_model.h5")
print("\nBest LSTM Classification Model saved successfully!")

Starting Grid Search...

Training model 1/16: units=64, dropout=0.2, lr=0.001, batch=32


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step
Model 1 Results - Accuracy: 0.6939, Precision: 0.7055, Recall: 0.5740, F1: 0.6330, Training Time: 26.62s

Training model 2/16: units=64, dropout=0.2, lr=0.001, batch=64


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Model 2 Results - Accuracy: 0.7007, Precision: 0.7107, Recall: 0.5888, F1: 0.6440, Training Time: 16.97s

Training model 3/16: units=64, dropout=0.2, lr=0.0005, batch=32


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step
Model 3 Results - Accuracy: 0.6966, Precision: 0.7138, Recall: 0.5680, F1: 0.6326, Training Time: 31.73s

Training model 4/16: units=64, dropout=0.2, lr=0.0005, batch=64


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Model 4 Results - Accuracy: 0.6980, Precision: 0.7164, Recall: 0.5680, F1: 0.6337, Training Time: 24.73s

Training model 5/16: units=64, dropout=0.3, lr=0.001, batch=32


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Model 5 Results - Accuracy: 0.6898, Precision: 0.7115, Recall: 0.5473, F1: 0.6187, Training Time: 24.42s

Training model 6/16: units=64, dropout=0.3, lr=0.001, batch=64


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
Model 6 Results - Accuracy: 0.6980, Precision: 0.7101, Recall: 0.5799, F1: 0.6384, Training Time: 17.32s

Training model 7/16: units=64, dropout=0.3, lr=0.0005, batch=32


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Model 7 Results - Accuracy: 0.6966, Precision: 0.7154, Recall: 0.5651, F1: 0.6314, Training Time: 36.53s

Training model 8/16: units=64, dropout=0.3, lr=0.0005, batch=64


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Model 8 Results - Accuracy: 0.6980, Precision: 0.7071, Recall: 0.5858, F1: 0.6408, Training Time: 27.93s

Training model 9/16: units=128, dropout=0.2, lr=0.001, batch=32


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Model 9 Results - Accuracy: 0.6912, Precision: 0.7048, Recall: 0.5651, F1: 0.6273, Training Time: 21.31s

Training model 10/16: units=128, dropout=0.2, lr=0.001, batch=64


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Model 10 Results - Accuracy: 0.6925, Precision: 0.7105, Recall: 0.5592, F1: 0.6258, Training Time: 14.77s

Training model 11/16: units=128, dropout=0.2, lr=0.0005, batch=32


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Model 11 Results - Accuracy: 0.6884, Precision: 0.7057, Recall: 0.5533, F1: 0.6202, Training Time: 26.79s

Training model 12/16: units=128, dropout=0.2, lr=0.0005, batch=64


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Model 12 Results - Accuracy: 0.6925, Precision: 0.7105, Recall: 0.5592, F1: 0.6258, Training Time: 19.01s

Training model 13/16: units=128, dropout=0.3, lr=0.001, batch=32


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
Model 13 Results - Accuracy: 0.6898, Precision: 0.6993, Recall: 0.5710, F1: 0.6287, Training Time: 21.05s

Training model 14/16: units=128, dropout=0.3, lr=0.001, batch=64


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
Model 14 Results - Accuracy: 0.6939, Precision: 0.7100, Recall: 0.5651, F1: 0.6293, Training Time: 14.82s

Training model 15/16: units=128, dropout=0.3, lr=0.0005, batch=32


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Model 15 Results - Accuracy: 0.6925, Precision: 0.7188, Recall: 0.5444, F1: 0.6195, Training Time: 34.08s

Training model 16/16: units=128, dropout=0.3, lr=0.0005, batch=64


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Model 16 Results - Accuracy: 0.6980, Precision: 0.7086, Recall: 0.5828, F1: 0.6396, Training Time: 20.46s

--- Best Model Summary ---
Best Hyperparameters: {'units': 64, 'dropout': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'f1_score': 0.6440129449838188}
Best F1 Score: 0.6440

Best LSTM Classification Model saved successfully!
