# Prediksi menggunakan data per hari

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt

# Load data (gunakan data hasil feature engineering)
df = pd.read_csv("dataset_total_prec_per_dayyy.csv")
df['date'] = pd.date_range(start='2022-01-01', periods=len(df), freq='D')
df['date'] = pd.to_datetime(df['date'])
df['day_of_week'] = df['date'].dt.dayofweek

# Cyclical encoding of day
df['sin_day'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
df['cos_day'] = np.cos(2 * np.pi * df['day_of_week'] / 7)

# Lag features
for lag in range(1, 8):
    df[f'lag_{lag}'] = df['total_precipitation_mm'].shift(lag)

# Moving average features
df['ma_3'] = df['total_precipitation_mm'].rolling(window=3).mean()
df['ma_7'] = df['total_precipitation_mm'].rolling(window=7).mean()
df['ma_14'] = df['total_precipitation_mm'].rolling(window=14).mean()
df['std_3'] = df['total_precipitation_mm'].rolling(window=3).std()
df['std_7'] = df['total_precipitation_mm'].rolling(window=7).std()

# Month and dummy seasonal clustering
df['month'] = df['date'].dt.month
df['season_cluster'] = df['month'] % 4
df = pd.get_dummies(df, columns=['season_cluster'], prefix='season')

# Log transform target
df['precip_log'] = np.log1p(df['total_precipitation_mm'])
df = df.dropna().reset_index(drop=True)

# Fitur dan target
features = [
    'sin_day', 'cos_day', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7',
    'ma_3', 'ma_7', 'ma_14', 'std_3', 'std_7'
] + [col for col in df.columns if col.startswith('season_')]
X = df[features].values
y = df['precip_log'].values.reshape(-1, 1)

# Normalisasi fitur
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)

# Konversi ke tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Neural Network with Dropout and BatchNorm
class RainNN(nn.Module):
    def __init__(self):
        super(RainNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(X_train.shape[1], 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

model = RainNN()
criterion = nn.SmoothL1Loss()  # Huber Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.5)

# Training loop
epochs = 2000
best_loss = float('inf')
patience = 30
counter = 0
for epoch in range(epochs):
    model.train()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    scheduler.step()

    if loss.item() < best_loss:
        best_loss = loss.item()
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

    if (epoch+1) % 50 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    preds_log = model(X_test_tensor)
    preds = torch.expm1(preds_log).numpy()
    y_actual = np.expm1(y_test_tensor.numpy())
    mse = np.mean((preds - y_actual)**2)
    rmse = np.sqrt(mse)
    print(f"\nTest RMSE: {rmse:.4f} mm")

# Print first 10 predictions
for i in range(10):
    print(f"Prediksi: {preds[i][0]:.2f} mm, Aktual: {y_actual[i][0]:.2f} mm")

# Visualisasi Prediksi vs Aktual
plt.figure(figsize=(10, 5))
plt.plot(preds, label='Prediksi')
plt.plot(y_actual, label='Aktual')
plt.title('Perbandingan Curah Hujan: Prediksi vs Aktual')
plt.xlabel('Hari ke-n')
plt.ylabel('Curah Hujan (mm)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()